Load the dataset from the file Vertebral column 3C.dat. Divide the data into a training set
and a testing set. Use the training set to train a SVM, and use the trained model to predict
the class labels of the testing data samples. Summarize the prediction results in a confusion
matrix and calculate the prediction accuracy. Use at least two kernels and two values for the
hyperparameter C, and compare their prediction accuracy.

In [36]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

columns = ['Pelvic incidence', 'Pelvic tilt', 'Lumbar lordosis angle',
           'Sacral slope', 'Pelvic radius', 'Grade of spondylolisthesis', 'Class']

# load the dataset into a pd df
file_path = 'Vertebral_column_3C.dat' 
data = pd.read_csv(file_path, delimiter=' ', names=columns)
data = data.dropna()

# replace class categories with 0, 1, 2 
data.loc[data["Class"] == "DH", "Class"] = 0
data.loc[data["Class"] == "SL", "Class"] = 1
data.loc[data["Class"] == "NO", "Class"] = 2
data["Class"] = data["Class"].astype(int)

# divide into testing and training
X = data.drop("Class", axis=1)
y = data["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#train the SVM
svm1 = svm.SVC(kernel='linear') #create an instance of svm classifier with a linear classifier
svm1.fit(X_train, y_train)

#find predictions for y-values of X_test data
y_pred = svm1.predict(X_test)

#check how well our prediction did
accuracy1 = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Initial Accuracy: {accuracy:.4f}")
print("Initial Confusion Matrix:")
print(conf_matrix)


#PART 2 - comparing four different models - kernels [linear, rbf] and c value [1.0, 0.1]

results = {}
kernels = ['linear', 'rbf']  # Two different kernels
C_values = [0.1, 1.0]  # Two different values for the hyperparameter C

for kernel in kernels:
    for C_val in C_values:
        print(f"\nTraining SVM with kernel='{kernel}', C={C_val}")

        # Train the SVM
        clf = svm.SVC(kernel=kernel, C=C_val)
        clf.fit(X_train, y_train)

        # Predict on the test set
        y_pred = clf.predict(X_test)

        # Compute accuracy and confusion matrix
        accuracy = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)

        # Store results
        key = f"Kernel: {kernel}, C: {C_val}"
        results[key] = (accuracy, conf_matrix)

        # Print results
        print(f"Accuracy: {accuracy:.4f}")
        print("Confusion Matrix:")
        print(conf_matrix)

 




Initial Accuracy: {accuracy:.4f}
Initial Confusion Matrix:
[[10  1  7]
 [ 0 50  1]
 [ 5  0 19]]

Training SVM with kernel='linear', C=0.1
Accuracy: 0.8495
Confusion Matrix:
[[10  1  7]
 [ 0 50  1]
 [ 5  0 19]]

Training SVM with kernel='linear', C=1.0
Accuracy: 0.8495
Confusion Matrix:
[[10  1  7]
 [ 0 50  1]
 [ 5  0 19]]

Training SVM with kernel='rbf', C=0.1
Accuracy: 0.7634
Confusion Matrix:
[[ 0  0 18]
 [ 0 48  3]
 [ 0  1 23]]

Training SVM with kernel='rbf', C=1.0
Accuracy: 0.8065
Confusion Matrix:
[[ 4  1 13]
 [ 0 50  1]
 [ 2  1 21]]
