In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os

from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler,StandardScaler

from data_mining_process import *

import warnings
warnings.filterwarnings('ignore')

# make directory for visualization
if not os.path.exists('visualization'):
    os.makedirs('visualization')

In [2]:
# load cases from case features folder
Ent_Case = pd.read_csv('case_features/Window_Entropy_Case.csv', header=None)
Slope_Case = pd.read_csv('case_features/Window_Slope_Case.csv', header=None)
Mf_Case = pd.read_csv('case_features/Window_Mf_Case.csv', header=None)

In [3]:
# load controls from control features folder
Ent_Control = pd.read_csv('control_features/Window_Entropy_Control.csv', header=None)
Slope_Control = pd.read_csv('control_features/Window_Slope_Control.csv', header=None)
Mf_Control = pd.read_csv('control_features/Window_Mf_Control.csv', header=None)

In [9]:
col_names = ["Entropy ", "Slope ", "ID", "Hurst Exponent ",
    "Left Slope ", "Right Slope ","Left Tangent ",
    "Right Tangent", "Broadness", "Left Tangent Point", "Right Tangent Point"]
# combine Ent and Slope and Mfcc dataframes for each class
Case = pd.concat([ Ent_Case.T, Slope_Case.T, Mf_Case ], axis=1)
Case.columns = col_names
Control = pd.concat([ Ent_Control.T, Slope_Control.T, Mf_Control ], axis=1)
Control.columns = col_names
Case['Case'] = 1
Control['Case'] = 0
Case

Unnamed: 0,Entropy,Slope,ID,Hurst Exponent,Left Slope,Right Slope,Left Tangent,Right Tangent,Broadness,Left Tangent Point,Right Tangent Point,Case
0,0.285395,-1.608997,13918,-0.507037,0.825365,-0.710861,1.4,-1.0,0.523666,-0.749354,-0.225688,1
1,0.188718,-1.794542,13918,-0.569219,1.023791,-0.564038,1.8,-0.8,0.549938,-0.764571,-0.214633,1
2,0.322635,-1.840223,13918,-0.643782,0.971962,-0.867502,1.7,-0.9,0.436316,-0.849551,-0.413235,1
3,0.266751,-1.951984,13918,-0.447557,0.738082,-0.679130,1.1,-0.9,0.565467,-0.718530,-0.153063,1
4,0.136910,-2.188939,14241,-0.545745,0.916445,-0.574756,1.6,-0.8,0.566208,-0.763979,-0.197771,1
...,...,...,...,...,...,...,...,...,...,...,...,...
611,0.216837,-1.494053,85343,-0.619981,0.858294,-0.576277,1.8,-0.9,0.580076,-0.853001,-0.272926,1
612,0.132755,-2.492685,9979,-0.508295,0.849355,-0.599108,1.6,-0.8,0.569303,-0.743767,-0.174465,1
613,0.131332,-3.062884,9979,-0.620613,0.930732,-0.604266,1.9,-0.9,0.545865,-0.835498,-0.289633,1
614,0.213663,-1.883848,9979,-0.549001,0.876512,-0.498835,1.8,-0.8,0.629111,-0.777178,-0.148067,1


In [10]:
# combine Case and Control data
data = pd.concat([Case, Control], axis=0)

# check if any missing values
data.isna().sum()

Entropy                156
Slope                  156
ID                       0
Hurst Exponent           0
Left Slope               0
Right Slope              0
Left Tangent             0
Right Tangent            0
Broadness                0
Left Tangent Point       0
Right Tangent Point      0
Case                     0
dtype: int64

In [11]:
# suffle data
data = data.sample(frac=1, random_state= 42).reset_index(drop=True);
# replace NaN values with mean of the column
data = data.fillna(data.mean())


In [12]:
# a model using three features: entorpy, slope, and left tangent point
X = data.iloc[:, [0,1,9]]
y = data.iloc[:, 11]

In [14]:
target = data.columns[11]  # target variable name
features = [data.columns[0], data.columns[1], data.columns[9]]  # predictor features name

In [16]:
# model trained without class weights


def build_svm(mod_params):
    # mod_params is a list, and the order of its elems is C, gamma, kernel, and class_weight
    if len(mod_params) == 4:  # if true, class_weight is used
        return SVC(probability=True, C=mod_params[0], gamma=mod_params[1], kernel=mod_params[2],
                  class_weight=mod_params[3])
    else:
        return SVC(probability=True, C=mod_params[0], gamma=mod_params[1], kernel=mod_params[2])

# train and test svm model on different samples
#mod_params = [1.0, 'scale', 'rbf']  # default values
mod_params = [10, 0.01, 'linear']  # default values
performance_measures_dict, max_cm = repeat_sampling_and_training(build_svm, mod_params, data,
                                    target, features, num_repeat=1000, doMinMaxScaling=False)

In [17]:
print_performance_metrics(performance_measures_dict)
max_cm

Mean of Training Accuracy: 0.6566375634517767
Mean of Testing Accuracy: 0.6493765182186234
Standard deviation of Testing Accuracy: 0.026785341877492867
Mean of Sensitivity: 0.5841676889001778
Standard deviation of Sensitivity: 0.04462539280319894
Mean of Speicificity: 0.7184566501592445
Standard deviation of Speicificity: 0.04412410708793066
Mean of Case Precision: 0.6826309942758768
Standard deviation of Case Precision: 0.044082269852881606
Mean of Control Precision: 0.6255280705924535
Standard deviation of Control Precision: 0.03954007512610224
Mean of Case F1: 0.6279641619613001
Standard deviation of Case F1: 0.031376190370656414
Mean of Control F1: 0.6674629542254422
Standard deviation of Control F1: 0.029274904694266753
Mean of AUC: 0.7136191516551958
Standard deviation of AUC: 0.028814432739181467


Unnamed: 0,Predicted Negative(Absent),Predicted Positive(Present)
Actual Negative(Absent),90,32
Actual positive(Present),35,90


In [19]:
target = data.columns[11]  # target variable name
features = [col_name for col_name in data.columns[1:11]]  # predictor features name
features.pop(features.index('ID'))  # remove patient ID from feature names list
features

['Slope ',
 'Hurst Exponent ',
 'Left Slope ',
 'Right Slope ',
 'Left Tangent ',
 'Right Tangent',
 'Broadness',
 'Left Tangent Point',
 'Right Tangent Point']

In [20]:
# train and test models
#mod_params = [1.0, 'scale', 'rbf']  # default values
mod_params = [0.1, 0.001, 'linear']  # default values
performance_measures_dict, max_cm = repeat_sampling_and_training(build_svm, mod_params, data,
                                    target, features, num_repeat=1000, doMinMaxScaling=False)

In [21]:
print_performance_metrics(performance_measures_dict)
max_cm

Mean of Training Accuracy: 0.659915736040609
Mean of Testing Accuracy: 0.6487449392712551
Standard deviation of Testing Accuracy: 0.028020289420120793
Mean of Sensitivity: 0.5609978037778051
Standard deviation of Sensitivity: 0.053799972569885564
Mean of Speicificity: 0.7400168022366811
Standard deviation of Speicificity: 0.053246533558704776
Mean of Case Precision: 0.6866698177707972
Standard deviation of Case Precision: 0.05185668311624298
Mean of Control Precision: 0.6257139845925033
Standard deviation of Control Precision: 0.0398697163731432
Mean of Case F1: 0.6147645605543753
Standard deviation of Case F1: 0.03525099742913561
Mean of Control F1: 0.6761732343035469
Standard deviation of Control F1: 0.028778068636538465
Mean of AUC: 0.6994432767589241
Standard deviation of AUC: 0.029611623249373324


Unnamed: 0,Predicted Negative(Absent),Predicted Positive(Present)
Actual Negative(Absent),94,20
Actual positive(Present),48,85


Applying class weights without balancing the dataset to mitigate data imbalance

In [22]:
# a model using three features: entorpy, slope, and left tangent point
target = data.columns[11]  # target variable name
features = [data.columns[0], data.columns[1], data.columns[9]]  # predictor features name

In [23]:
#split data into training and testing data
X = data.loc[:, features]
y = data.loc[:, target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [26]:
from sklearn.utils import class_weight
import numpy as np


# Compute the class weights
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
# Create a dictionary mapping the class indices to their respective weights
class_weights_dict = dict(enumerate(class_weights))
class_weights_values = [{0: 1, 1: 1+i/2} for i in range(4, 8)]  # list of class_weights dictionary
class_weights_values.append(class_weights)

# Hyperparameter tuning including class weight
parameters = {'C': [0.1, 10, 100, 1000],
              'gamma': [10, 0.1, 0.01, 0.001],
              'kernel': ['rbf', 'linear'],
              'class_weight': class_weights_values
             }
# maximize auc of roc curve in gridsearch
gridsearch = GridSearchCV(SVC(probability=True), parameters, refit = True, verbose = 3, cv = 5, scoring='roc_auc')
gridsearch.fit(X_train, y_train)
print(gridsearch.best_params_)


Fitting 5 folds for each of 256 candidates, totalling 1280 fits
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.723 total time=   0.2s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.693 total time=   0.2s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.694 total time=   0.2s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.677 total time=   0.2s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.653 total time=   0.2s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.722 total time=   0.1s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.731 total time=   0.1s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.662 total time=   0.1s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.701 total time=   0.

[CV 4/5] END C=0.1, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.706 total time=   0.1s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.689 total time=   0.1s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.714 total time=   0.2s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.743 total time=   0.2s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.700 total time=   0.2s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.686 total time=   0.2s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.686 total time=   0.2s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=linear;, score=0.718 total time=   0.1s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=linear;, score=0.723 total time=   0.1s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 2.5}, gamma=

[CV 2/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.716 total time=   0.1s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.670 total time=   0.1s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.689 total time=   0.1s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.684 total time=   0.1s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.723 total time=   0.3s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.734 total time=   0.3s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.697 total time=   0.3s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.686 total time=   0.3s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.678 total time=   0.3s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 3.5}, 

[CV 5/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.660 total time=   0.3s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.709 total time=   0.1s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.711 total time=   0.2s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.672 total time=   0.2s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.687 total time=   0.2s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.684 total time=   0.1s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.728 total time=   0.3s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.740 total time=   0.3s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.690 total time=   0.3s
[CV 4/5] END C=0.1, class_weight={0: 1, 

[CV 4/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=rbf;, score=nan total time=   0.0s
[CV 5/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=rbf;, score=nan total time=   0.0s
[CV 1/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=linear;, score=nan total time=   0.0s
[CV 2/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=linear;, score=nan total time=   0.0s
[CV 3/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=linear;, score=nan total time=   0.0s
[CV 4/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=linear;, score=nan total time=   0.0s
[CV 5/5] END C=0.1, class_weight=[0.62101129 2.56592292], gamma=0.001, kernel=linear;, score=nan total time=   0.0s
[CV 1/5] END C=10, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.689 total time=   0.3s
[CV 2/5] END C=10, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.652 total tim

[CV 2/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=rbf;, score=0.743 total time=   0.2s
[CV 3/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=rbf;, score=0.684 total time=   0.2s
[CV 4/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=rbf;, score=0.704 total time=   0.2s
[CV 5/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=rbf;, score=0.693 total time=   0.2s
[CV 1/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.727 total time=   0.2s
[CV 2/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.739 total time=   0.1s
[CV 3/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.688 total time=   0.1s
[CV 4/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.708 total time=   0.2s
[CV 5/5] END C=10, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   0.1s
[CV 1/5] END C=10, class_weight={0: 1, 1

[CV 1/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.687 total time=   0.3s
[CV 2/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.685 total time=   0.3s
[CV 3/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.660 total time=   0.3s
[CV 4/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.668 total time=   0.3s
[CV 5/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.673 total time=   0.3s
[CV 1/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.725 total time=   0.2s
[CV 2/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.737 total time=   0.2s
[CV 3/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   0.2s
[CV 4/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.710 total time=   0.2s
[CV 5/5] END C=10, class_weight={0: 1, 1: 3

[CV 1/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.682 total time=   0.3s
[CV 2/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.680 total time=   0.3s
[CV 3/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.657 total time=   0.3s
[CV 4/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.664 total time=   0.3s
[CV 5/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.671 total time=   0.3s
[CV 1/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.724 total time=   0.2s
[CV 2/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   0.2s
[CV 3/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.692 total time=   0.2s
[CV 4/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.712 total time=   0.2s
[CV 5/5] END C=10, class_weight={0: 1, 1: 4

[CV 1/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.679 total time=   0.8s
[CV 2/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.668 total time=   0.6s
[CV 3/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.637 total time=   0.9s
[CV 4/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.648 total time=   0.7s
[CV 5/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.634 total time=   0.4s
[CV 1/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.727 total time=   8.5s
[CV 2/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.741 total time=   5.7s
[CV 3/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.683 total time=   5.1s
[CV 4/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.704 total time=   7.0s
[CV 5/5] END C=100, class_weight={0: 1, 1: 1.5}, gamma=10, ke

[CV 4/5] END C=100, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.708 total time=   0.4s
[CV 5/5] END C=100, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   0.7s
[CV 1/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.729 total time=   0.5s
[CV 2/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.682 total time=   0.5s
[CV 3/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.713 total time=   0.5s
[CV 4/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.665 total time=   0.5s
[CV 5/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.642 total time=   0.4s
[CV 1/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=linear;, score=0.726 total time=   0.3s
[CV 2/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=linear;, score=0.738 total time=   0.3s
[CV 3/5] END C=100, class_weight={0: 1, 1: 2.5}, gamma=

[CV 2/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.737 total time=   0.3s
[CV 3/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   0.3s
[CV 4/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.710 total time=   0.2s
[CV 5/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   0.4s
[CV 1/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.728 total time=   0.5s
[CV 2/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.725 total time=   0.5s
[CV 3/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.690 total time=   0.4s
[CV 4/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.667 total time=   0.4s
[CV 5/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.650 total time=   0.4s
[CV 1/5] END C=100, class_weight={0: 1, 1: 3.5}, 

[CV 5/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.688 total time=   0.3s
[CV 1/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.724 total time=   0.3s
[CV 2/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   0.3s
[CV 3/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.693 total time=   0.3s
[CV 4/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.712 total time=   0.3s
[CV 5/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   0.3s
[CV 1/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.733 total time=   0.5s
[CV 2/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.735 total time=   0.5s
[CV 3/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=10, kernel=rbf;, score=0.677 total time=   0.4s
[CV 4/5] END C=100, class_weight={0: 1, 

[CV 1/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.672 total time=   3.8s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.626 total time=   2.8s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.659 total time=   3.6s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.618 total time=   3.2s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=rbf;, score=0.646 total time=   3.1s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.726 total time=  14.6s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.741 total time=  12.1s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.684 total time=   9.1s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 1.5}, gamma=10, kernel=linear;, score=0.709 total time=  13.7s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 1.5}, ga

[CV 3/5] END C=1000, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.688 total time=   1.4s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.708 total time=   1.8s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 2.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   2.2s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.696 total time=   2.3s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.662 total time=   3.0s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.694 total time=   2.1s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.642 total time=   1.9s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=rbf;, score=0.651 total time=   1.5s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 2.5}, gamma=10, kernel=linear;, score=0.726 total time=   1.2s
[CV 2/5] END C=1000, class_weight={0: 1, 1:

[CV 5/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.695 total time=   0.2s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.725 total time=   1.0s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   1.0s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   1.0s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.710 total time=   0.9s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   0.8s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.720 total time=   2.2s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.684 total time=   2.0s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 3.5}, gamma=10, kernel=rbf;, score=0.666 total time=   1.7s
[CV 4/5] END C=1000, class_weig

[CV 2/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.740 total time=   0.3s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.691 total time=   0.3s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.708 total time=   0.3s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.694 total time=   0.3s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.724 total time=   0.8s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   0.9s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.692 total time=   0.8s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.712 total time=   0.9s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   0.9s
[CV 1/5] END C=1000, c

{'C': 1000, 'class_weight': {0: 1, 1: 2.5}, 'gamma': 0.001, 'kernel': 'rbf'}


In [40]:
# performance report
train_pred = gridsearch.predict(X_train)
print(f"Train Accuracy: {accuracy_score(y_train, train_pred)}")

y_pred = gridsearch.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cm = pd.DataFrame({"Predicted Negative(Absent)": cm[:, 0], "Predicted Positive(Present)": cm[:, 1]})
cm.index = ["Actual Negative(Absent)", "Actual positive(Present)"]
print(classification_report(y_test, y_pred))
y_prob = gridsearch.predict_proba(X_test)
print(f"AUC: {roc_auc_score(y_test, y_prob[:, 1])}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Present F1 score: {f1_score(y_test, y_pred)}")
print(f"Absent F1 score: {f1_score(y_test, y_pred,pos_label=0)}")
print(f"Sensitivity: {recall_score(y_test, y_pred)}")
print(f"Speicificity: {recall_score(y_test, y_pred,pos_label=0)}")
cm

              precision    recall  f1-score   support

           0       0.85      0.90      0.88       510
           1       0.47      0.36      0.41       123

    accuracy                           0.80       633
   macro avg       0.66      0.63      0.64       633
weighted avg       0.78      0.80      0.79       633

AUC: 0.7346484935437589


Unnamed: 0,Predicted Negative(Absent),Predicted Positive(Present)
Actual Negative(Absent),461,49
Actual positive(Present),79,44


In [55]:
# Using all features in the data
target = data.columns[11]  # target variable name
features = [col_name for col_name in data.columns[1:11]]  # predictor features name
features.pop(features.index('ID'))  # remove patient ID from feature names list
features.pop(features.index('Broadness'))  # not discriminatory according to wilcoxon ranksum test
features.pop(features.index('Right Tangent'))  # not discriminatory according to wilcoxon ranksum test
features

['Slope ',
 'Hurst Exponent ',
 'Left Slope ',
 'Right Slope ',
 'Left Tangent ',
 'Left Tangent Point',
 'Right Tangent Point']

In [56]:
# split data into training and testing data
X = data.loc[:,features]
y = data.loc[:, target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [57]:
# Compute the class weights
class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
# Create a dictionary mapping the class indices to their respective weights
class_weights_dict = dict(enumerate(class_weights))
class_weights_values = [{0: 1, 1: 1+i/2} for i in range(4, 8)]  # list of class_weights dictionary
class_weights_values.append(class_weights)

# Hyperparameter tuning including class weight
parameters = {'C': [0.1, 10, 100, 1000],
              'gamma': [10, 0.1, 0.01, 0.001],
              'kernel': ['rbf', 'linear'],
              'class_weight': class_weights_values
             }
# maximize auc of roc curve in gridsearch
gridsearch = GridSearchCV(SVC(probability=True), parameters, refit = True, verbose = 3, cv = 5, scoring='roc_auc')
gridsearch.fit(X_train, y_train)
print(gridsearch.best_params_)

Fitting 5 folds for each of 160 candidates, totalling 800 fits
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.688 total time=   1.2s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.649 total time=   1.1s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.694 total time=   1.2s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.707 total time=   1.2s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.679 total time=   1.1s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.697 total time=   0.6s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.689 total time=   0.6s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.684 total time=   0.6s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.733 total time=   0.6

[CV 3/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=0.001, kernel=linear;, score=0.686 total time=   0.6s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=0.001, kernel=linear;, score=0.734 total time=   0.6s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 3.5}, gamma=0.001, kernel=linear;, score=0.671 total time=   0.6s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.683 total time=   1.2s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.667 total time=   1.2s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.694 total time=   1.2s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.710 total time=   1.3s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.672 total time=   1.2s
[CV 1/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=linear;, score=0.693 total time=   0.6s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 4.0}, gam

[CV 1/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.691 total time=   0.7s
[CV 2/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.692 total time=   0.7s
[CV 3/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.689 total time=   0.7s
[CV 4/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.729 total time=   0.7s
[CV 5/5] END C=0.1, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.669 total time=   0.7s
[CV 1/5] END C=0.1, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 2/5] END C=0.1, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 3/5] END C=0.1, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 4/5] END C=0.1, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 5/5] END C=

[CV 1/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.692 total time=   1.2s
[CV 2/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.696 total time=   1.2s
[CV 3/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.689 total time=   1.2s
[CV 4/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.721 total time=   1.2s
[CV 5/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.677 total time=   1.2s
[CV 1/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.694 total time=   0.8s
[CV 2/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.683 total time=   0.7s
[CV 3/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.675 total time=   0.7s
[CV 4/5] END C=10, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   0.7s
[CV 5/5] END C=10, class_weight={0: 1, 1: 3

[CV 5/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.01, kernel=linear;, score=0.665 total time=   0.8s
[CV 1/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.687 total time=   1.3s
[CV 2/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.699 total time=   1.3s
[CV 3/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.690 total time=   1.3s
[CV 4/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.716 total time=   1.3s
[CV 5/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.673 total time=   1.3s
[CV 1/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.691 total time=   0.8s
[CV 2/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.684 total time=   0.8s
[CV 3/5] END C=10, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.675 total time=   0.8s
[CV 4/5] END C=10, class_weight={0: 1, 1: 4.

[CV 1/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.561 total time=   1.9s
[CV 2/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.594 total time=   1.9s
[CV 3/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.622 total time=   2.0s
[CV 4/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.577 total time=   2.0s
[CV 5/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=rbf;, score=0.557 total time=   1.9s
[CV 1/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.691 total time=   1.8s
[CV 2/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.683 total time=   1.5s
[CV 3/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.671 total time=   1.5s
[CV 4/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, kernel=linear;, score=0.737 total time=   1.5s
[CV 5/5] END C=100, class_weight={0: 1, 1: 3.0}, gamma=10, ke

[CV 4/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=0.001, kernel=linear;, score=0.741 total time=   1.6s
[CV 5/5] END C=100, class_weight={0: 1, 1: 3.5}, gamma=0.001, kernel=linear;, score=0.665 total time=   1.6s
[CV 1/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.576 total time=   2.0s
[CV 2/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.587 total time=   2.0s
[CV 3/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.604 total time=   2.0s
[CV 4/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.577 total time=   2.2s
[CV 5/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=rbf;, score=0.549 total time=   2.0s
[CV 1/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=linear;, score=0.690 total time=   1.6s
[CV 2/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=10, kernel=linear;, score=0.686 total time=   1.6s
[CV 3/5] END C=100, class_weight={0: 1, 1: 4.0}, gamma=

[CV 2/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.689 total time=   1.6s
[CV 3/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.675 total time=   1.6s
[CV 4/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.735 total time=   1.7s
[CV 5/5] END C=100, class_weight={0: 1, 1: 4.5}, gamma=0.001, kernel=linear;, score=0.664 total time=   1.6s
[CV 1/5] END C=100, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 2/5] END C=100, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 3/5] END C=100, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 4/5] END C=100, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 5/5] END C=100, class_weight=[0.62070658 2.57113821], gamma=10, kernel=rbf;, score=nan total time=   0.0s
[CV 1/5] END C

[CV 2/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.691 total time=   1.1s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.686 total time=   1.1s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.738 total time=   1.2s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=rbf;, score=0.675 total time=   1.1s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   7.1s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.683 total time=   6.8s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.672 total time=   6.9s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.736 total time=   7.9s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 3.0}, gamma=0.001, kernel=linear;, score=0.669 total time=   6.8s
[CV 1/5] END C=1000, c

[CV 4/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.01, kernel=linear;, score=0.739 total time=   7.3s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.01, kernel=linear;, score=0.665 total time=   7.7s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.698 total time=   1.2s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.693 total time=   1.2s
[CV 3/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.689 total time=   1.2s
[CV 4/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.739 total time=   1.3s
[CV 5/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=rbf;, score=0.675 total time=   1.2s
[CV 1/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.690 total time=   7.9s
[CV 2/5] END C=1000, class_weight={0: 1, 1: 4.0}, gamma=0.001, kernel=linear;, score=0.685 total time=   7.3s
[CV 3/5] END C=1000, class_

{'C': 100, 'class_weight': {0: 1, 1: 4.0}, 'gamma': 0.1, 'kernel': 'rbf'}


In [58]:
# performance report
train_pred = gridsearch.predict(X_train)
print(f"Train Accuracy: {accuracy_score(y_train, train_pred)}")

y_pred = gridsearch.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cm = pd.DataFrame({"Predicted Negative(Absent)": cm[:, 0], "Predicted Positive(Present)": cm[:, 1]})
cm.index = ["Actual Negative(Absent)", "Actual positive(Present)"]
print(classification_report(y_test, y_pred))
y_prob = gridsearch.predict_proba(X_test)
print(f"AUC: {roc_auc_score(y_test, y_prob[:, 1])}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Present F1 score: {f1_score(y_test, y_pred)}")
print(f"Absent F1 score: {f1_score(y_test, y_pred,pos_label=0)}")
print(f"Sensitivity: {recall_score(y_test, y_pred)}")
print(f"Speicificity: {recall_score(y_test, y_pred,pos_label=0)}")
cm

Train Accuracy: 0.7660079051383399
              precision    recall  f1-score   support

           0       0.88      0.82      0.85       509
           1       0.42      0.54      0.48       124

    accuracy                           0.77       633
   macro avg       0.65      0.68      0.66       633
weighted avg       0.79      0.77      0.78       633

AUC: 0.7410799163445085
Test Accuracy: 0.7661927330173776
Present F1 score: 0.47517730496453897
Absent F1 score: 0.8495934959349594
Sensitivity: 0.5403225806451613
Speicificity: 0.8212180746561886


Unnamed: 0,Predicted Negative(Absent),Predicted Positive(Present)
Actual Negative(Absent),418,91
Actual positive(Present),57,67
