In [1]:
# Importing the necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [2]:

# Reading the CKD dataset
dataset = pd.read_csv('CKD.csv')


In [3]:

# One-hot encoding categorical features (if any)
dataset = pd.get_dummies(dataset, drop_first=True)


In [4]:

# Separating independent and dependent variables
indep = dataset[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes']] 
dep = dataset['classification_yes']

In [5]:

# Splitting the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size=1/3, random_state=0)


In [6]:

# Scaling the features
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [7]:

# Creating the SVM classifier and parameter grid
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

param_grid = {'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
             'gamma': ['auto', 'scale'],
             'C': [10, 100, 1000, 2000, 3000]}


In [8]:

# Performing grid search cross-validation
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='f1_weighted')
grid.fit(X_train, y_train)


Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [9]:

# Evaluating the best model on the testing set
grid_predictions = grid.predict(X_test)


In [10]:

# Printing performance metrics
from sklearn.metrics import confusion_matrix, classification_report, f1_score, roc_auc_score

cm = confusion_matrix(y_test, grid_predictions)
clf_report = classification_report(y_test, grid_predictions)
f1_macro = f1_score(y_test, grid_predictions, average='weighted')
roc_auc = roc_auc_score(y_test, grid_predictions)

print("Best parameters:", grid.best_params_)
print("F1-macro score:", f1_macro)
print("Confusion matrix:\n", cm)
print("Classification report:\n", clf_report)
print("ROC AUC score:", roc_auc)



Best parameters: {'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}
F1-macro score: 0.9924946382275899
Confusion matrix:
 [[51  0]
 [ 1 81]]
Classification report:
               precision    recall  f1-score   support

       False       0.98      1.00      0.99        51
        True       1.00      0.99      0.99        82

    accuracy                           0.99       133
   macro avg       0.99      0.99      0.99       133
weighted avg       0.99      0.99      0.99       133

ROC AUC score: 0.9939024390243902


In [11]:

# Displaying the results as a table
table = pd.DataFrame.from_dict(grid.cv_results_)
print(table)

    mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0        0.008243      0.006419         0.003017        0.003815      10   
1        0.006248      0.007653         0.006249        0.007653      10   
2        0.003124      0.006249         0.006249        0.007653      10   
3        0.006249      0.007653         0.009373        0.007653      10   
4        0.003124      0.006249         0.006249        0.007653      10   
5        0.006249      0.007653         0.006249        0.007653      10   
6        0.006248      0.007652         0.003124        0.006248      10   
7        0.003125      0.006249         0.006249        0.007653      10   
8        0.003124      0.006248         0.003124        0.006248     100   
9        0.003124      0.006248         0.006249        0.007653     100   
10       0.006249      0.007653         0.006249        0.007653     100   
11       0.003124      0.006248         0.003124        0.006249     100   
12       0.0