In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score,roc_curve,f1_score
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow. keras.layers import Dense,Dropout
from tensorflow.keras.optimizers import RMSprop 

from sklearn.svm import SVC
import pandas as pd
import numpy as np

## Import Data as dataframe

In [2]:
cleveland_data = pd.read_csv(open('processed.cleveland.data'), delimiter=',', header=None, na_values = '?')
cleveland_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1


In [3]:
cleveland_data[13].unique()

array([0, 2, 1, 3, 4], dtype=int64)

### So target class contains 5 classes

## There were total 6 null values ( due to ?). Droped all those as number of observations were much higher than number of null values 

In [4]:
cleveland_data = cleveland_data.dropna()

## Converted dataframe into numpy array

In [5]:
dataset = cleveland_data.to_numpy()
dataset

array([[63.,  1.,  1., ...,  0.,  6.,  0.],
       [67.,  1.,  4., ...,  3.,  3.,  2.],
       [67.,  1.,  4., ...,  2.,  7.,  1.],
       ...,
       [68.,  1.,  4., ...,  2.,  7.,  2.],
       [57.,  1.,  4., ...,  1.,  7.,  3.],
       [57.,  0.,  2., ...,  1.,  3.,  1.]])

In [7]:
# Split the dataset into Predictors and Target variable
X = dataset[:,0:13]
y = dataset[:,13]
print(X.shape,y.shape)


# Train-test-split( 70:30)
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3,random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(297, 13) (297,)
(207, 13) (90, 13) (207,) (90,)


# SVM Model with scoring accuracy, and f1 score

In [8]:
clf = SVC(kernel='linear', C=1, random_state=42,probability=True)
clf.fit(X_train, y_train)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [9]:
y_pred = clf.predict(X_test)
y_pred

array([0., 0., 0., 4., 0., 0., 0., 0., 2., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 2., 0., 2., 0., 0., 0., 2., 0., 3., 0., 0., 0., 0., 0., 3., 0.,
       0., 0., 2., 0., 4., 0., 0., 2., 3., 1., 0., 0., 0., 1., 0., 0., 0.,
       4., 0., 0., 1., 0., 0., 0., 4., 3., 0., 0., 0., 0., 0., 1., 3., 0.,
       2., 0., 1., 4., 4., 0., 2., 0., 0., 0., 0., 0., 0., 1., 4., 3., 3.,
       0., 0., 4., 0., 4.])

In [10]:
confusion_matrix(y_test, y_pred)

array([[47,  1,  0,  1,  0],
       [ 9,  1,  2,  4,  1],
       [ 2,  2,  1,  1,  4],
       [ 0,  2,  5,  0,  4],
       [ 2,  0,  0,  1,  0]], dtype=int64)

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.78      0.96      0.86        49
         1.0       0.17      0.06      0.09        17
         2.0       0.12      0.10      0.11        10
         3.0       0.00      0.00      0.00        11
         4.0       0.00      0.00      0.00         3

    accuracy                           0.54        90
   macro avg       0.21      0.22      0.21        90
weighted avg       0.47      0.54      0.50        90



## Decision Tree with scoring accuracy, and f1

In [12]:
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 500).fit(X_train, y_train)

In [13]:
dtree_pred = dtree_model.predict(X_test)
dtree_pred

array([0., 1., 0., 3., 0., 1., 0., 0., 4., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 3., 3., 3., 1., 0., 2., 0., 2., 1., 0., 0., 0., 0., 1., 2.,
       0., 0., 1., 1., 4., 4., 0., 1., 3., 1., 0., 4., 1., 2., 1., 0., 0.,
       4., 0., 0., 4., 0., 0., 0., 2., 4., 0., 0., 1., 0., 0., 1., 0., 0.,
       2., 1., 2., 4., 3., 0., 2., 0., 1., 1., 0., 0., 0., 1., 2., 1., 3.,
       0., 0., 1., 1., 2.])

In [14]:
confusion_matrix(y_test, dtree_pred)

array([[36, 10,  1,  1,  1],
       [ 7,  4,  2,  2,  2],
       [ 0,  4,  3,  2,  1],
       [ 0,  3,  4,  1,  3],
       [ 0,  1,  0,  1,  1]], dtype=int64)

In [15]:
print(classification_report(y_test, dtree_pred))

              precision    recall  f1-score   support

         0.0       0.84      0.73      0.78        49
         1.0       0.18      0.24      0.21        17
         2.0       0.30      0.30      0.30        10
         3.0       0.14      0.09      0.11        11
         4.0       0.12      0.33      0.18         3

    accuracy                           0.50        90
   macro avg       0.32      0.34      0.32        90
weighted avg       0.55      0.50      0.52        90



## Random Forest with scoring accuracy, and f1

In [16]:
from sklearn.ensemble import RandomForestClassifier
rfmodel = RandomForestClassifier(max_depth=500, random_state=123).fit(X_train, y_train)

In [17]:
rf_pred =  rfmodel.predict(X_test)
rf_pred

array([0., 0., 0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 4., 2., 0., 2., 3., 1., 0., 0., 0., 0., 0., 0., 0.,
       2., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       2., 0., 0., 2., 1., 0., 2., 0., 0., 0., 0., 0., 0., 0., 2., 3., 1.,
       0., 0., 0., 0., 2.])

In [18]:
confusion_matrix(y_test, rf_pred)

array([[47,  2,  0,  0,  0],
       [11,  2,  3,  1,  0],
       [ 6,  1,  2,  1,  0],
       [ 3,  3,  4,  0,  1],
       [ 2,  1,  0,  0,  0]], dtype=int64)

In [19]:
print(classification_report(y_test, rf_pred))

              precision    recall  f1-score   support

         0.0       0.68      0.96      0.80        49
         1.0       0.22      0.12      0.15        17
         2.0       0.22      0.20      0.21        10
         3.0       0.00      0.00      0.00        11
         4.0       0.00      0.00      0.00         3

    accuracy                           0.57        90
   macro avg       0.23      0.26      0.23        90
weighted avg       0.44      0.57      0.49        90



### Naive Bayes with GaussianNB

In [20]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB().fit(X_train, y_train)

In [21]:
gnb_pred = gnb.predict(X_test)
gnb_pred

array([0., 0., 0., 4., 0., 0., 0., 1., 4., 0., 1., 0., 0., 0., 0., 0., 0.,
       1., 2., 0., 1., 1., 4., 0., 4., 0., 4., 1., 0., 0., 0., 0., 4., 0.,
       0., 0., 0., 0., 4., 4., 0., 2., 4., 1., 0., 1., 0., 1., 0., 0., 0.,
       4., 0., 0., 4., 4., 0., 0., 4., 1., 0., 0., 0., 0., 0., 1., 4., 0.,
       2., 4., 1., 4., 4., 0., 4., 0., 0., 0., 0., 0., 0., 4., 4., 3., 2.,
       0., 4., 4., 0., 4.])

In [22]:
confusion_matrix(y_test, gnb_pred)

array([[44,  4,  0,  0,  1],
       [ 5,  3,  1,  1,  7],
       [ 0,  2,  1,  0,  7],
       [ 1,  1,  2,  0,  7],
       [ 0,  2,  0,  0,  1]], dtype=int64)

In [23]:
print(classification_report(y_test, gnb_pred))

              precision    recall  f1-score   support

         0.0       0.88      0.90      0.89        49
         1.0       0.25      0.18      0.21        17
         2.0       0.25      0.10      0.14        10
         3.0       0.00      0.00      0.00        11
         4.0       0.04      0.33      0.08         3

    accuracy                           0.54        90
   macro avg       0.28      0.30      0.26        90
weighted avg       0.56      0.54      0.54        90



# Deep learning Model 

In [24]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
encoder = LabelEncoder()
encoder.fit(y_train)
encoded_y_train = encoder.transform(y_train)
dummy_y_train = np_utils.to_categorical(encoded_y_train)

Using TensorFlow backend.


In [25]:
dummy_y_train

array([[0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       ...,
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.]], dtype=float32)

In [26]:
encoder = LabelEncoder()
encoder.fit(y_test)
encoded_y_test = encoder.transform(y_test)
dummy_y_test = np_utils.to_categorical(encoded_y_test)

In [184]:
dummy_y_test[:5]

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.]], dtype=float32)

### Model Generation

In [43]:
model = Sequential() 
model.add(layers.Dense(64, input_dim=13, activation='relu')) 
model.add(layers.Dense(32, activation = 'relu')) 
model.add(layers.Dropout(0.2)) 
model.add(layers.Dense(16, activation = 'relu')) 
model.add(layers.Dense(5, activation='sigmoid'))

In [44]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 64)                896       
_________________________________________________________________
dense_5 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 85        
Total params: 3,589
Trainable params: 3,589
Non-trainable params: 0
_________________________________________________________________
None


In [45]:
model.compile(loss ='categorical_crossentropy',optimizer = RMSprop(), metrics = ['categorical_accuracy'])
model.fit(X_train, dummy_y_train, epochs=500, batch_size=100,verbose=0)

<tensorflow.python.keras.callbacks.History at 0x1b70b6b4e88>

In [46]:
loss, accuracy = model.evaluate(X_test, dummy_y_test,verbose=0)
print('Model Loss: %.2f, Accuracy: %.2f' % ((loss*100),(accuracy*100)))

Model Loss: 104.03, Accuracy: 56.67


In [47]:
deep_pred = model.predict_classes(X_test)
deep_pred

array([0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 2, 0, 0, 2,
       0, 3], dtype=int64)

In [48]:
confusion_matrix(y_test, deep_pred)

array([[49,  0,  0,  0,  0],
       [14,  0,  2,  1,  0],
       [ 7,  0,  2,  1,  0],
       [ 7,  0,  3,  0,  1],
       [ 3,  0,  0,  0,  0]], dtype=int64)

In [49]:
print(classification_report(y_test, deep_pred))

              precision    recall  f1-score   support

         0.0       0.61      1.00      0.76        49
         1.0       0.00      0.00      0.00        17
         2.0       0.29      0.20      0.24        10
         3.0       0.00      0.00      0.00        11
         4.0       0.00      0.00      0.00         3

    accuracy                           0.57        90
   macro avg       0.18      0.24      0.20        90
weighted avg       0.37      0.57      0.44        90

