# Importing Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
import seaborn as sns
sns.set()

# Reading the data

In [2]:
data = pd.read_csv('heart_failure_clinical_records_dataset.csv') 
data.head(5)

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


# Scaling the data

In [3]:
Min_max = preprocessing.MinMaxScaler(feature_range = (0,1))

data['age'] = Min_max.fit_transform(data[['age']])
data['creatinine_phosphokinase'] = Min_max.fit_transform(data[['creatinine_phosphokinase']])
data['ejection_fraction'] = Min_max.fit_transform(data[['ejection_fraction']])
data['platelets'] = Min_max.fit_transform(data[['platelets']])
data['serum_creatinine'] = Min_max.fit_transform(data[['serum_creatinine']])
data['serum_sodium'] = Min_max.fit_transform(data[['serum_sodium']])
data['time'] = Min_max.fit_transform(data[['time']])

data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,0.636364,0,0.071319,0,0.090909,1,0.290823,0.157303,0.485714,1,0,0.0,1
1,0.272727,0,1.0,0,0.363636,0,0.288833,0.067416,0.657143,1,0,0.007117,1
2,0.454545,0,0.015693,0,0.090909,0,0.16596,0.089888,0.457143,1,1,0.010676,1
3,0.181818,1,0.011227,0,0.090909,0,0.224148,0.157303,0.685714,1,0,0.010676,1
4,0.454545,1,0.017479,1,0.090909,0,0.365984,0.247191,0.085714,0,0,0.014235,1


In [4]:
X = data.iloc[:,:12].to_numpy()
y = data.iloc[:,12:].to_numpy()

# LinearSVC Model

In [5]:
svc = LinearSVC(max_iter = 5000, dual = False)
svc.fit(X, np.ravel(y))
svc

LinearSVC(dual=False, max_iter=5000)

In [6]:
print("The training accuracy score is:", svc.score(X, np.ravel(y)))
print("===================================================")
print("The testing accuracy score is:", svc.score(X, np.ravel(y)))

The training accuracy score is: 0.8561872909698997
The testing accuracy score is: 0.8561872909698997


In [7]:
results = cross_validate(svc,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00099778 0.00199461 0.00099397 0.00099945 0.00099635]
The score time is: [0.00099897 0.00098848 0.00100732 0.         0.        ]
The test score is: [0.65       0.8        0.86666667 0.85       0.6779661 ]
The train score is: [0.86192469 0.83682008 0.83263598 0.83682008 0.89583333]


# SVC (Linear)

In [8]:
parameters = {'kernel':list(['linear']), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5, return_train_score = True)
clf.fit(X, np.ravel(y))
clf.cv_results_
# i = 1
# linear_values = []
# while(i <= 10):
#     svc_1 = svm.SVC(kernel ='linear', C = i)
#     svc_1.fit(X_train, np.ravel(y_train))
#     linear_values.append(svc_1.score(X_train, np.ravel(y_train)))
#     i = i + 1
    
# linear_values

{'mean_fit_time': array([0.0017827 , 0.00259318]),
 'std_fit_time': array([0.00074521, 0.00048932]),
 'mean_score_time': array([0.00079885, 0.00040193]),
 'std_score_time': array([0.00074709, 0.00049228]),
 'param_C': masked_array(data=[1, 10],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['linear', 'linear'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'}, {'C': 10, 'kernel': 'linear'}],
 'split0_test_score': array([0.65, 0.7 ]),
 'split1_test_score': array([0.76666667, 0.81666667]),
 'split2_test_score': array([0.88333333, 0.9       ]),
 'split3_test_score': array([0.8       , 0.81666667]),
 'split4_test_score': array([0.6779661, 0.6779661]),
 'mean_test_score': array([0.75559322, 0.78225989]),
 'std_test_score': array([0.08436589, 0.0823095 ]),
 'rank_test_score': array([2, 1]),
 'split0_train_score': array([0.87029289, 0.86192

In [9]:
clf.best_params_

{'C': 10, 'kernel': 'linear'}

In [10]:
svc_1 = svm.SVC(kernel ='linear', C = 1)
svc_1.fit(X, np.ravel(y))
results = cross_validate(svc_1,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00199366 0.00099707 0.00199509 0.00199842 0.00301123]
The score time is: [0.0009985  0.         0.         0.00199771 0.        ]
The test score is: [0.65       0.76666667 0.88333333 0.8        0.6779661 ]
The train score is: [0.87029289 0.82845188 0.80753138 0.85774059 0.90833333]


In [11]:
svc_1 = svm.SVC(kernel ='linear', C = 5)
svc_1.fit(X, np.ravel(y))
results = cross_validate(svc_1,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00099921 0.00199485 0.0029943  0.00199413 0.000983  ]
The score time is: [0.00099707 0.00099659 0.00199723 0.         0.        ]
The test score is: [0.7        0.78333333 0.93333333 0.85       0.6779661 ]
The train score is: [0.87029289 0.84518828 0.82008368 0.84100418 0.90833333]


In [12]:
svc_1 = svm.SVC(kernel ='linear', C = 10)
svc_1.fit(X, np.ravel(y))
results = cross_validate(svc_1,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00199318 0.00300932 0.00299025 0.00298929 0.00198555]
The score time is: [0.         0.00098395 0.         0.         0.00100684]
The test score is: [0.7        0.81666667 0.9        0.81666667 0.6779661 ]
The train score is: [0.86192469 0.84518828 0.82008368 0.84518828 0.9125    ]


# SVC (RBF)

In [13]:
parameters = {'kernel':list(['rbf']), 'C':[1, 10], 'gamma':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5, return_train_score = True)
clf.fit(X, np.ravel(y))
clf.cv_results_
# rbf_values = []
# for i in range(1,11):
#     for j in range(1,11): 
#         svc_2 = svm.SVC(kernel = 'rbf', C = i , gamma = j)
#         svc_2.fit(X_train, np.ravel(y_train))
#         rbf_values.append(svc_2.score(X_train, np.ravel(y_train)))
        
# rbf_values

{'mean_fit_time': array([0.00279198, 0.00328856, 0.00220027, 0.00258617]),
 'std_fit_time': array([0.00039848, 0.00123938, 0.00039603, 0.00049551]),
 'mean_score_time': array([0.00179648, 0.00070477, 0.00059814, 0.00039153]),
 'std_score_time': array([0.00116352, 0.0003974 , 0.00048838, 0.00047966]),
 'param_C': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_gamma': masked_array(data=[1, 10, 1, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'rbf', 'rbf', 'rbf'],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'gamma': 1, 'kernel': 'rbf'},
  {'C': 1, 'gamma': 10, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 1, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 10, 'kernel': 'rbf'}],
 'split0_test_score': array([0.68333333, 0.56666667, 0.

In [14]:
clf.best_params_

{'C': 1, 'gamma': 1, 'kernel': 'rbf'}

In [36]:
svc_2 = svm.SVC(kernel = 'rbf', C = 1 , gamma = 'scale')
svc_2.fit(X, np.ravel(y))
results = cross_validate(svc_2,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00199461 0.00099754 0.0039947  0.00196266 0.00099683]
The score time is: [0.00099754 0.00099754 0.00098157 0.00099754 0.0009973 ]
The test score is: [0.73333333 0.8        0.8        0.73333333 0.6779661 ]
The train score is: [0.89121339 0.84100418 0.84518828 0.85355649 0.8875    ]


In [16]:
svc_2 = svm.SVC(kernel = 'rbf', C = 1 , gamma = 10)
svc_2.fit(X, np.ravel(y))
results = cross_validate(svc_2,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00498676 0.00398636 0.00397015 0.00299168 0.00200534]
The score time is: [0.00199437 0.00103068 0.00197625 0.         0.00196934]
The test score is: [0.56666667 0.66666667 0.66666667 0.68333333 0.6779661 ]
The train score is: [0.9665272 0.958159  0.9623431 0.9707113 0.975    ]


In [17]:
svc_2 = svm.SVC(kernel = 'rbf', C = 10 , gamma = 1)
svc_2.fit(X, np.ravel(y))
results = cross_validate(svc_2,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00199604 0.00302219 0.00298452 0.00398898 0.00199413]
The score time is: [0.         0.00097036 0.0009706  0.00099707 0.00099754]
The test score is: [0.63333333 0.73333333 0.76666667 0.71666667 0.6779661 ]
The train score is: [0.958159   0.94142259 0.93723849 0.9539749  0.96666667]


In [18]:
svc_2 = svm.SVC(kernel = 'rbf', C = 50 , gamma = 10)
svc_2.fit(X, np.ravel(y))
results = cross_validate(svc_2,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00498676 0.004987   0.00299263 0.00498605 0.00395751]
The score time is: [0.0009973  0.00099659 0.         0.         0.        ]
The test score is: [0.46666667 0.63333333 0.61666667 0.6        0.6779661 ]
The train score is: [1. 1. 1. 1. 1.]


In [19]:
svc_2 = svm.SVC(kernel = 'rbf', C = 10 , gamma = 50)
svc_2.fit(X, np.ravel(y))
results = cross_validate(svc_2,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00298882 0.0049839  0.00398946 0.00601888 0.00498557]
The score time is: [0.00102949 0.00099707 0.00099707 0.00296521 0.0009985 ]
The test score is: [0.63333333 0.66666667 0.68333333 0.68333333 0.6779661 ]
The train score is: [1. 1. 1. 1. 1.]


# SVC (Polynomial)

In [20]:
parameters = {'kernel':list(['poly']), 'degree':[1, 10], 'gamma':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5, return_train_score = True)
clf.fit(X, np.ravel(y))
clf.cv_results_
# i = 2
# poly_values = []
# while(i <= 11):
#     svc_3 = svm.SVC(kernel = 'poly', degree = i)
#     svc_3.fit(X_train, y_train)
#     poly_values.append(svc_3.score(X_train, np.ravel(y_train)))
#     i = i + 1
    
# # np.ravel(y_train).shape
# poly_values

{'mean_fit_time': array([0.00239067, 0.0035913 , 0.00299187, 0.00359659]),
 'std_fit_time': array([0.00048344, 0.00048716, 0.00063098, 0.00049434]),
 'mean_score_time': array([0.00099726, 0.0013979 , 0.00099783, 0.0013905 ]),
 'std_score_time': array([1.92750768e-06, 7.98929031e-04, 1.27414601e-06, 4.81263299e-04]),
 'param_degree': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_gamma': masked_array(data=[1, 10, 1, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['poly', 'poly', 'poly', 'poly'],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'degree': 1, 'gamma': 1, 'kernel': 'poly'},
  {'degree': 1, 'gamma': 10, 'kernel': 'poly'},
  {'degree': 10, 'gamma': 1, 'kernel': 'poly'},
  {'degree': 10, 'gamma': 10, 'kernel': 'poly'}],
 'spli

In [21]:
clf.best_params_

{'degree': 1, 'gamma': 10, 'kernel': 'poly'}

In [22]:
svc_3 = svm.SVC(kernel = 'poly', degree = 3)
svc_3.fit(X, np.ravel(y))
results = cross_validate(svc_3,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00498652 0.00199485 0.00399613 0.00298667 0.0029943 ]
The score time is: [0.00199533 0.00099778 0.00100827 0.00099421 0.00099635]
The test score is: [0.65       0.78333333 0.75       0.76666667 0.6779661 ]
The train score is: [0.90794979 0.87029289 0.84100418 0.87029289 0.90416667]


In [23]:
svc_3 = svm.SVC(kernel = 'poly', degree = 5)
svc_3.fit(X, np.ravel(y))
results = cross_validate(svc_3,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00398898 0.00398803 0.00398469 0.00199461 0.00199199]
The score time is: [0.00099754 0.00200033 0.00099802 0.0009973  0.00100684]
The test score is: [0.66666667 0.71666667 0.73333333 0.7        0.6779661 ]
The train score is: [0.94560669 0.93305439 0.92468619 0.93723849 0.9375    ]


In [24]:
svc_3 = svm.SVC(kernel = 'poly', degree = 10)
svc_3.fit(X, np.ravel(y))
results = cross_validate(svc_3,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00299788 0.00498676 0.00299168 0.00299263 0.00199413]
The score time is: [0. 0. 0. 0. 0.]
The test score is: [0.63333333 0.58333333 0.58333333 0.73333333 0.6779661 ]
The train score is: [0.9748954 0.9707113 0.9707113 0.9790795 0.975    ]


# SVC (Sigmoid)

In [25]:
parameters = {'kernel':list(['sigmoid']), 'gamma':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5, return_train_score = True)
clf.fit(X, np.ravel(y))
clf.cv_results_

# i = 2
# sigmoid_values = []
# while(i <= 11):
#     svc_4 = svm.SVC(kernel = 'sigmoid', gamma = i)
#     svc_4.fit(X_train, y_train)
#     sigmoid_values.append(svc_4.score(X_train, np.ravel(y_train)))
#     i = i + 1
# sigmoid_values

{'mean_fit_time': array([0.00278673, 0.00119305]),
 'std_fit_time': array([0.00040184, 0.00039339]),
 'mean_score_time': array([0.00099807, 0.00119672]),
 'std_score_time': array([1.43368686e-06, 3.98707686e-04]),
 'param_gamma': masked_array(data=[1, 10],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['sigmoid', 'sigmoid'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'gamma': 1, 'kernel': 'sigmoid'},
  {'gamma': 10, 'kernel': 'sigmoid'}],
 'split0_test_score': array([0.58333333, 0.66666667]),
 'split1_test_score': array([0.68333333, 0.68333333]),
 'split2_test_score': array([0.55      , 0.68333333]),
 'split3_test_score': array([0.55      , 0.68333333]),
 'split4_test_score': array([0.71186441, 0.6779661 ]),
 'mean_test_score': array([0.61570621, 0.67892655]),
 'std_test_score': array([0.06856011, 0.00647281]),
 'rank_test_score': array([2, 1]),
 'split0_

In [26]:
clf.best_params_

{'gamma': 10, 'kernel': 'sigmoid'}

In [27]:
svc_4 = svm.SVC(kernel = 'sigmoid', gamma = 1)
svc_4.fit(X, np.ravel(y))
results = cross_validate(svc_4,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00299072 0.00199628 0.0039897  0.00299144 0.00199223]
The score time is: [0.00099754 0.0009954  0.         0.         0.00099659]
The test score is: [0.58333333 0.68333333 0.55       0.55       0.71186441]
The train score is: [0.57740586 0.53138075 0.56066946 0.58158996 0.7       ]


In [28]:
svc_4 = svm.SVC(kernel = 'sigmoid', gamma = 5)
svc_4.fit(X, np.ravel(y))
results = cross_validate(svc_4,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00299025 0.00299358 0.00299311 0.00199437 0.00299191]
The score time is: [0.00099778 0.00101733 0.         0.         0.00099683]
The test score is: [0.66666667 0.68333333 0.68333333 0.68333333 0.6779661 ]
The train score is: [0.68200837 0.67782427 0.67782427 0.67782427 0.67916667]


In [29]:
svc_4 = svm.SVC(kernel = 'sigmoid', gamma = 10)
svc_4.fit(X, np.ravel(y))
results = cross_validate(svc_4,X, np.ravel(y),cv=5, return_train_score= True)
print("The fit time is:", results['fit_time'])
print("The score time is:", results['score_time'])
print("The test score is:", results['test_score'])
print("The train score is:", results['train_score'])

The fit time is: [0.00298905 0.0029974  0.00399351 0.00298643 0.00498557]
The score time is: [0.00100756 0.         0.00299335 0.00099444 0.00199819]
The test score is: [0.66666667 0.68333333 0.68333333 0.68333333 0.6779661 ]
The train score is: [0.68200837 0.67782427 0.67782427 0.67782427 0.67916667]


# Best Kernel & their parameters

In [30]:
parameters = {'kernel':('linear', 'rbf', 'poly', 'sigmoid'), 'C':[1, 10], 'degree':[1, 10], 'gamma':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5, return_train_score = True)
clf.fit(X, np.ravel(y))
clf.cv_results_

{'mean_fit_time': array([0.0029984 , 0.00299196, 0.00099854, 0.0031919 , 0.00139604,
        0.0033905 , 0.00180745, 0.00219064, 0.00100241, 0.00197906,
        0.00267377, 0.00180039, 0.00118651, 0.00298672, 0.00260296,
        0.00199499, 0.00178757, 0.0019856 , 0.00200152, 0.00201578,
        0.00220494, 0.00259147, 0.01057262, 0.0017941 , 0.00218992,
        0.00200734, 0.00218868, 0.00240488, 0.00179944, 0.00238686,
        0.00258861, 0.00202241]),
 'std_fit_time': array([6.40593506e-04, 1.09049380e-03, 1.56994606e-06, 7.46238215e-04,
        4.88352086e-04, 4.87196250e-04, 7.46877075e-04, 4.02571995e-04,
        1.44148433e-05, 6.31323455e-04, 1.38759794e-03, 4.02152897e-04,
        3.88680688e-04, 6.31664843e-04, 7.99427679e-04, 6.21797420e-04,
        4.10765574e-04, 2.09300554e-05, 6.21528638e-04, 6.30732688e-04,
        7.48951404e-04, 5.01594673e-04, 6.09022610e-03, 3.99681453e-04,
        7.54059967e-04, 1.82028174e-05, 3.84338115e-04, 8.22969498e-04,
        4.02249030e-0

In [33]:
# df = pd.DataFrame(clf.cv_results_)
# df.to_csv('C:\\Users\\yashg\\Desktop\\Semester 8\\Machine Learning\\SVC_Results.csv')

In [34]:
clf.best_params_

{'C': 1, 'degree': 1, 'gamma': 10, 'kernel': 'poly'}

# 