# Objective
- base model without oversampling and feature selection & fine tune it
- base model with oversampling and feature selection & fine tune it

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
import models.uci_heart_disease_dataset as uci
warnings.filterwarnings("ignore")

In [8]:
data = pd.read_csv(uci.UCIHeartDiseaseDataFile.cleveland_preprocessed, names = uci.get_standard_features())
print(f'Data shape: {data.shape}.')
data.head(3)

Data shape: (299, 14).


Unnamed: 0,Age,Gender,Chest Pain,BP Systolic,Cholesterol,Blood Sugar,Rest ECG,Exe. Max Heartrate,Exe. Induced Angina,Exe. ST Depression,Exe. ST Segment Slope,Major Vessels,Thalassemia,Target
0,63,1,1,145,233.0,1,2,150,0,2.3,3,0,6,0
1,67,1,4,160,286.0,0,2,108,1,1.5,2,3,3,1
2,67,1,4,120,229.0,0,2,129,1,2.6,2,2,7,1


In [9]:
X = data.drop(uci.UCIHeartDiseaseData.target, axis=1)
y = data[uci.UCIHeartDiseaseData.target]

columns = X.columns
X

Unnamed: 0,Age,Gender,Chest Pain,BP Systolic,Cholesterol,Blood Sugar,Rest ECG,Exe. Max Heartrate,Exe. Induced Angina,Exe. ST Depression,Exe. ST Segment Slope,Major Vessels,Thalassemia
0,63,1,1,145,233.0,1,2,150,0,2.3,3,0,6
1,67,1,4,160,286.0,0,2,108,1,1.5,2,3,3
2,67,1,4,120,229.0,0,2,129,1,2.6,2,2,7
3,37,1,3,130,250.0,0,0,187,0,3.5,3,0,3
4,41,0,2,130,204.0,0,2,172,0,1.4,1,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,68,1,4,144,193.0,1,0,141,0,3.4,2,2,7
295,57,1,4,130,131.0,0,0,115,1,1.2,2,1,7
296,57,0,2,130,236.0,0,2,174,0,0.0,2,1,3
297,56,1,4,120,100.0,0,0,120,1,1.5,2,0,7


In [None]:
y

In [None]:
def build_base_model():
    svc= SVC()
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    acc=accuracy_score(y_test, y_pred)*100
    print("SVM - Accuracy: {:.3f}.".format(acc))
    print("\nClassification Report")
    print(classification_report(y_test, y_pred))

## Feature Selection

In [None]:
from sklearn.feature_selection import chi2,SelectKBest

chi_squared = SelectKBest(score_func=chi2, k=13)
chi_squared.fit(X, y)
feature_score = pd.DataFrame({"Score":chi_squared.scores_, "P_Value":np.round(chi_squared.pvalues_,3)}, index=X.columns)
feature_score.nlargest(n=13,columns="Score")

## Cross Balancing

In [None]:
from collections import Counter
# Need to install imbalanced-learn
from imblearn.over_sampling import SMOTE
X_b, y_b = SMOTE().fit_resample(X, y)

plt.subplots(figsize=(3,3))
sns.countplot(x=y_b)
print(Counter(y_b))
X_b.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_b, y_b, test_size = 0.20, random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [11]:
X[0:5]

array([[ 0.94044585,  0.68768191, -2.24829037,  0.74975985, -0.26286667,
         2.43997713,  1.01690995,  0.02912357, -0.70356236,  1.06947501,
         2.26429883, -0.71830622,  0.64724282],
       [ 1.38414338,  0.68768191,  0.86873662,  1.59635425,  0.74772238,
        -0.40983991,  1.01690995, -1.79044733,  1.42133811,  0.380309  ,
         0.64073684,  2.48726932, -0.900962  ],
       [ 1.38414338,  0.68768191,  0.86873662, -0.66123081, -0.33913754,
        -0.40983991,  1.01690995, -0.88066188,  1.42133811,  1.32791226,
         0.64073684,  1.41874414,  1.16331109],
       [-1.94358809,  0.68768191, -0.17027238, -0.09683454,  0.06128454,
        -0.40983991, -0.99670645,  1.63207888, -0.70356236,  2.10322401,
         2.26429883, -0.71830622, -0.900962  ],
       [-1.49989056, -1.4541607 , -1.20928137, -0.09683454, -0.81583048,
        -0.40983991,  1.01690995,  0.98223213, -0.70356236,  0.29416325,
        -0.98282515, -0.71830622, -0.900962  ]])

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state=0)

In [None]:
X_test[0:5]

In [None]:
y_test.head()

# Base model
- classification report
- confusion matrix

In [13]:
svc= SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
acc=accuracy_score(y_test, y_pred)*100
print("SVM - Accuracy: {:.3f}.".format(acc))
print("\nClassification Report")
print(classification_report(y_test, y_pred))

SVM - Accuracy: 85.000.

Classification Report
              precision    recall  f1-score   support

           0       0.78      0.97      0.86        29
           1       0.96      0.74      0.84        31

    accuracy                           0.85        60
   macro avg       0.87      0.85      0.85        60
weighted avg       0.87      0.85      0.85        60



In [None]:
# There were 60 records in test with equal class (1 and 0).
# Meanwhile, the prediction was 24:36
pd.DataFrame(y_pred).value_counts()

- <b>Precision</b> - 92% (22/24) prediction was correct in overall disease prediction
- <b>Recall</b> - 73% (22/30) prediction for disease was correct

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
plt.figure(figsize=(5, 5))
sns.heatmap(cm, annot=True, linewidths=.5, square=True, cmap='Blues', cbar=False)
# plt.ylabel('Actual');
# plt.xlabel('Predicted');
plt.ylabel('Truth');
plt.xlabel('Predicted');
plt.title("Confusion Matrix for Predicting Heart Disease (True=1, False=0)", size=12);

### GridSerachCV

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from numpy import arange
grid_svc= SVC()

parameters = dict()
parameters['kernel'] = ['rbf', 'poly', 'linear', 'sigmoid']
parameters['C'] = arange(1, 10, 1)
parameters['gamma'] = ['scale', 'auto']
parameters['class_weight'] = ['dict', 'balanced']

## Building Grid Search algorithm with cross-validation and acc score.

# grid_search_svc = GridSearchCV(grid_svc, parameters, scoring='accuracy', cv=5, n_jobs=-1)
grid_search_svc = GridSearchCV(grid_svc, parameters, scoring='accuracy', cv=5)

grid_search_svc.fit(X_train,y_train)
best_parameters_svc = grid_search_svc.best_params_
best_score_svc = grid_search_svc.best_score_
print(best_parameters_svc)
print(best_score_svc)

y_pred = grid_search_svc.predict(X_test)

# Get the accuracy score
svc_acc=accuracy_score(y_test, y_pred)*100
svc_pre=precision_score(y_test, y_pred, average='micro')
svc_recall=recall_score(y_test, y_pred, average='micro')
svc_f1_=f1_score(y_test, y_pred, average='micro')

print("\nSVM - Accuracy: {:.3f}.".format(svc_acc))
print("SVM - Precision: {:.3f}.".format(svc_pre))
print("SVM - Recall: {:.3f}.".format(svc_recall))
print("SVM - F1_Score: {:.3f}.".format(svc_f1_))
print("\nClassification Report")
print(classification_report(y_test, y_pred))

### RandomSearchCV

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from numpy import arange
rand_svc= SVC()

parameters = dict()
parameters['kernel'] = ['rbf', 'poly', 'linear', 'sigmoid']
parameters['C'] = arange(1, 10, 1)
parameters['gamma'] = ['scale', 'auto']
parameters['class_weight'] = ['dict', 'balanced']

## Building Grid Search algorithm with cross-validation and acc score.

rand_search_svc = RandomizedSearchCV(rand_svc, parameters, scoring='accuracy', cv=5)

rand_search_svc.fit(X_train,y_train)
best_parameters_svc = rand_search_svc.best_params_
best_score_svc = rand_search_svc.best_score_
print(best_parameters_svc)
print(best_score_svc)

y_pred = rand_search_svc.predict(X_test)

# Get the accuracy score
svc_acc=accuracy_score(y_test, y_pred)*100
svc_pre=precision_score(y_test, y_pred, average='micro')
svc_recall=recall_score(y_test, y_pred, average='micro')
svc_f1_=f1_score(y_test, y_pred, average='micro')

print("\nSVM - Accuracy: {:.3f}.".format(svc_acc))
print("SVM - Precision: {:.3f}.".format(svc_pre))
print("SVM - Recall: {:.3f}.".format(svc_recall))
print("SVM - F1_Score: {:.3f}.".format(svc_f1_))
print("\nClassification Report")
print(classification_report(y_test, y_pred))