In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd

file_path = "/content/drive/MyDrive/1-Unimib/Sig and Img Acq Modelling in Healthcare/epileptic_data.csv"
data = pd.read_csv(file_path)
data.head()

Unnamed: 0.1,Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,5
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,5
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,5


In [3]:
import pandas as pd

# Drop the first column
data = data.drop(data.columns[0], axis=1)

# Separate features (X) and labels (y)
X = data.drop('y', axis=1)  # Assuming the label column is named 'y'
y = data['y']
X.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X169,X170,X171,X172,X173,X174,X175,X176,X177,X178
0,135,190,229,223,192,125,55,-9,-33,-38,...,8,-17,-15,-31,-77,-103,-127,-116,-83,-51
1,386,382,356,331,320,315,307,272,244,232,...,168,164,150,146,152,157,156,154,143,129
2,-32,-39,-47,-37,-32,-36,-57,-73,-85,-94,...,29,57,64,48,19,-12,-30,-35,-35,-36
3,-105,-101,-96,-92,-89,-95,-102,-100,-87,-79,...,-80,-82,-81,-80,-77,-85,-77,-72,-69,-65
4,-9,-65,-98,-102,-78,-48,-16,0,-21,-59,...,10,4,2,-12,-32,-41,-65,-83,-89,-73


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA

In [5]:
# Standardize the features 
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [19]:
X

array([[ 0.88505134,  1.20992878,  1.46276429, ..., -0.63414367,
        -0.43329036, -0.23539922],
       [ 2.40057718,  2.36619038,  2.23944096, ...,  1.02342937,
         0.95424076,  0.85653664],
       [-0.12328657, -0.16915405, -0.22513147, ..., -0.13687176,
        -0.13859348, -0.14440456],
       ...,
       [ 0.1544592 ,  0.10184476, -0.01720228, ...,  0.0657205 ,
         0.07015014,  0.02545213],
       [-0.17159018, -0.08484331,  0.00725997, ...,  0.49546166,
         0.43852123,  0.40762968],
       [ 0.24502848,  0.31262161,  0.41088722, ...,  0.0657205 ,
         0.08856869,  0.19530882]])

In [6]:
# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
#The stratify parameter is set to y to ensure that the class proportions are maintained in both the train and test sets.

In [7]:
#Calculate sensitivity and specificity
def sensitivity_specificity(conf_matrix):
    # Calculate sensitivity (true positive rate) and specificity (true negative rate)
    sensitivity = conf_matrix[0, 0] / (conf_matrix[0, 0] + conf_matrix[1, 0])
    specificity = conf_matrix[1, 1] / (conf_matrix[1, 1] + conf_matrix[0, 1])

    return sensitivity, specificity


In [8]:
#Apply SVM with different parameters
# A list of kernels to be used with the SVM models
kernels = ['linear', 'poly', 'rbf']

# Iterate through the kernels and train SVM models with them
for kernel in kernels:
    print(f"Training SVM model with {kernel} kernel...")
    
    # Create the SVM model with the current kernel
    svm_model = SVC(kernel=kernel, random_state=42)
    
    # Train the SVM model on the training data
    svm_model.fit(X_train, y_train)
    
    # Predict the test data
    y_pred = svm_model.predict(X_test)
    
    # Calculate the confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    
    # Calculate sensitivity and specificity
    sensitivity, specificity = sensitivity_specificity(conf_matrix)
    
    # Print the results
    print(f"Sensitivity for {kernel} kernel: {sensitivity:.3f}")
    print(f"Specificity for {kernel} kernel: {specificity:.3f}")
    print("\n")


Training SVM model with linear kernel...
Sensitivity for linear kernel: 0.973
Specificity for linear kernel: 0.563


Training SVM model with poly kernel...
Sensitivity for poly kernel: 1.000
Specificity for poly kernel: 0.476


Training SVM model with rbf kernel...
Sensitivity for rbf kernel: 0.948
Specificity for rbf kernel: 0.821




In [9]:
def perform_cross_validation(model, X, y, n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)
    sensitivity_scores = []
    specificity_scores = []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        conf_matrix = confusion_matrix(y_test, y_pred)
        sensitivity, specificity = sensitivity_specificity(conf_matrix)
        sensitivity_scores.append(sensitivity)
        specificity_scores.append(specificity)

    return np.mean(sensitivity_scores), np.mean(specificity_scores)


for kernel in kernels:
    print(f"Performing cross-validation for SVM model with {kernel} kernel...")
    svm_model = SVC(kernel=kernel, random_state=42)
    mean_sensitivity, mean_specificity = perform_cross_validation(svm_model, X, y)
    print(f"Average sensitivity for {kernel} kernel: {mean_sensitivity:.3f}")
    print(f"Average specificity for {kernel} kernel: {mean_specificity:.3f}")
    print("\n")


Performing cross-validation for SVM model with linear kernel...
Average sensitivity for linear kernel: 0.959
Average specificity for linear kernel: 0.572


Performing cross-validation for SVM model with poly kernel...
Average sensitivity for poly kernel: 0.989
Average specificity for poly kernel: 0.448


Performing cross-validation for SVM model with rbf kernel...
Average sensitivity for rbf kernel: 0.946
Average specificity for rbf kernel: 0.829




In [10]:
# Apply PCA and obtain explained variance ratio
pca = PCA()
X_pca = pca.fit_transform(X)
explained_variance_ratio = pca.explained_variance_ratio_

# Determine the optimal number of components to reach 95% of explained variance
cumulative_variance_ratio = np.cumsum(explained_variance_ratio)
optimal_n_components = np.argmax(cumulative_variance_ratio > 0.95) + 1

print(f"Optimal number of components for PCA: {optimal_n_components}")


Optimal number of components for PCA: 39


In [11]:
for kernel in kernels:
    print(f"Performing cross-validation for SVM model with {kernel} kernel...")
    svm_model = SVC(kernel=kernel, random_state=42)
    mean_sensitivity, mean_specificity = perform_cross_validation(svm_model, X_pca, y)
    print(f"Average sensitivity for {kernel} kernel: {mean_sensitivity:.3f}")
    print(f"Average specificity for {kernel} kernel: {mean_specificity:.3f}")
    print("\n")

Performing cross-validation for SVM model with linear kernel...
Average sensitivity for linear kernel: 0.959
Average specificity for linear kernel: 0.572


Performing cross-validation for SVM model with poly kernel...
Average sensitivity for poly kernel: 0.989
Average specificity for poly kernel: 0.448


Performing cross-validation for SVM model with rbf kernel...
Average sensitivity for rbf kernel: 0.946
Average specificity for rbf kernel: 0.829




In [15]:
import matplotlib.pyplot as plt

def plot_performance_comparison(performance_data, title):
  labels = ['linear', 'poly', 'rbf']
  sensitivity = [perf_data['sensitivity'] for perf_data in performance_data]
  specificity = [perf_data['specificity'] for perf_data in performance_data]

  x=np.arange(len(labels))
  width = 0.35

  fig, ax = plt.subplots()
  rects1 = ax.bar(x-width/2, sensitivity, width, label="Sensitivity")
  rects2 = ax.bar(x-width/2, specificity, width, label="Specificity")

  ax.set_ylabel("Scores")
  ax.set_title(title)
  ax.set_xticks(x)
  ax.set_xticklabels(labels)
  ax.legend()

  fig.tight_layout()
  plt.show()