In [83]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import plot_model
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow. keras.layers import Dense,Dropout

from sklearn import svm
import pandas as pd
import numpy as np

## Import Data as dataframe

In [84]:
cleveland_data = pd.read_csv(open('processed.cleveland.data'), delimiter=',', header=None, na_values = '?')
# Converted all the classes 1,2,3,4 into 1
cleveland_data[13]= np.where(cleveland_data[13] > 1, 1 ,0)
cleveland_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,0
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,0
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,1
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,1
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,0


## There were total 6 null values ( due to ?). Droped all those as number of observations were much higher than number of null values 

In [85]:
cleveland_data = cleveland_data.dropna()

## Converted dataframe into numpy array

In [86]:
dataset = cleveland_data.to_numpy()
dataset

array([[63.,  1.,  1., ...,  0.,  6.,  0.],
       [67.,  1.,  4., ...,  3.,  3.,  1.],
       [67.,  1.,  4., ...,  2.,  7.,  0.],
       ...,
       [68.,  1.,  4., ...,  2.,  7.,  1.],
       [57.,  1.,  4., ...,  1.,  7.,  1.],
       [57.,  0.,  2., ...,  1.,  3.,  0.]])

In [87]:
# Split the dataset into Predictors and Target variable
x = dataset[:,0:13]
y = dataset[:,13]

print(x.shape,y.shape)

(297, 13) (297,)


# SVM Model with 10 fold cross validation with scoring accuracy, f1 and roc_auc

In [88]:
scoring = ['accuracy', 'f1','roc_auc']
clf = svm.SVC(kernel='linear', C=1, random_state=42)
cross_validate(clf, x, y, cv=10,scoring=scoring)

{'fit_time': array([0.44081974, 0.29421258, 0.50664425, 0.61638141, 0.5006659 ,
        0.20744014, 0.3201437 , 0.26731229, 0.53357315, 0.45375848]),
 'score_time': array([0.00199485, 0.00199533, 0.00199533, 0.00198674, 0.00199866,
        0.00199413, 0.00199461, 0.00199437, 0.00199437, 0.00199461]),
 'test_accuracy': array([0.9       , 0.8       , 0.83333333, 0.83333333, 0.96666667,
        0.76666667, 0.9       , 0.72413793, 0.86206897, 0.93103448]),
 'test_f1': array([0.82352941, 0.625     , 0.70588235, 0.66666667, 0.94736842,
        0.66666667, 0.82352941, 0.33333333, 0.75      , 0.875     ]),
 'test_roc_auc': array([0.92613636, 0.84659091, 0.88068182, 0.93181818, 0.98412698,
        0.75132275, 0.96825397, 0.85119048, 0.875     , 0.9702381 ])}

# Deep learning Model 

### Model Generation

In [89]:
model = Sequential() 
model.add(layers.Dense(64, input_dim=13, activation='relu')) 
model.add(layers.Dense(32, activation = 'relu')) 
model.add(layers.Dropout(0.2)) 
model.add(layers.Dense(16, activation = 'relu')) 
model.add(layers.Dense(1, activation='sigmoid'))

In [90]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                896       
_________________________________________________________________
dense_5 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 17        
Total params: 3,521
Trainable params: 3,521
Non-trainable params: 0
_________________________________________________________________
None


### Created functions for F1 score

In [91]:
from keras import backend as K

def recall(y_true, y_pred):
    y_true = K.ones_like(y_true) 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    
    recall = true_positives / (all_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    y_true = K.ones_like(y_true) 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_score(y_true, y_pred):
    precision_m = precision(y_true, y_pred)
    recall_m = recall(y_true, y_pred)
    return 2*((precision_m*recall_m)/(precision_m+recall_m+K.epsilon()))

### Train the model with 10 fold cross validations with the matrics accuracy, F1 score and AUC

In [92]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=101)
cvscores = []
for train, test in kfold.split(x, y):
   # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',f1_score, keras.metrics.AUC()])
    # Fit the model
    model.fit(x[train], y[train], epochs=150, batch_size=10, verbose=0)
    # evaluate the model
    scores = model.evaluate(x[test], y[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]*100))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]*100))
    cvscores.append(scores[1] * 100)


accuracy: 80.00%
f1_score: 57.14%
auc_54: 89.77%
accuracy: 86.67%
f1_score: 57.14%
auc_55: 98.58%
accuracy: 83.33%
f1_score: 53.66%
auc_56: 89.77%
accuracy: 86.67%
f1_score: 50.00%
auc_57: 93.18%
accuracy: 70.00%
f1_score: 57.14%
auc_58: 77.78%
accuracy: 83.33%
f1_score: 57.14%
auc_59: 92.59%
accuracy: 80.00%
f1_score: 60.47%
auc_60: 86.77%
accuracy: 96.55%
f1_score: 38.89%
auc_61: 99.40%
accuracy: 86.21%
f1_score: 24.24%
auc_62: 96.73%
accuracy: 86.21%
f1_score: 43.24%
auc_63: 92.86%
