In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

file_path = "/Users/iranblanco/Desktop/Masters/ANA 680/Week 1/HW1/breast-cancer-wisconsin.data"

# Column names 
column_names = [
    "Sample code number",
    "Clump Thickness",
    "Uniformity of Cell Size",
    "Uniformity of Cell Shape",
    "Marginal Adhesion",
    "Single Epithelial Cell Size",
    "Bare Nuclei",
    "Bland Chromatin",
    "Normal Nucleoli",
    "Mitoses",
    "Class"
]

df = pd.read_csv(file_path, header=None, names=column_names)

# Missing values replaced
df.replace('?', np.nan, inplace=True)
df["Bare Nuclei"] = pd.to_numeric(df["Bare Nuclei"])  

# Drop rows 
df.dropna(inplace=True)

# Defining features (X) and target variable (y)
X = df.drop(columns=["Sample code number", "Class"])
y = df["Class"]

# Spliting into training (75%) and testing (25%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

print("Data preprocessing completed and saved.")

Data preprocessing completed and saved.


In [3]:
# Load data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

# Initialize and train the Kernel SVM model (RBF kernel)
model = SVC(kernel="rbf")
model.fit(X_train, y_train)

# Predictions on the test set
y_pred = model.predict(X_test)

# Evaluating performance
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Saving results to a CSV file
results = pd.DataFrame({"Model": ["Kernel SVM (RBF)"], "Accuracy": [accuracy]})
results.to_csv("results_kernel_svm.csv", index=False)

# Print
print("Kernel SVM (RBF) Results:")
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)

Kernel SVM (RBF) Results:
Accuracy: 0.9473684210526315
Confusion Matrix:
 [[101   2]
 [  7  61]]
