In [105]:
# loading dataset
import numpy as np
import pandas as pd
# visualisation
import matplotlib.pyplot as plt
# import seaborn as sns
# data preprocessing
from sklearn.preprocessing import StandardScaler
# data splitting
from sklearn.model_selection import train_test_split
# search for best hyperparameters
from sklearn.model_selection import GridSearchCV
# classification algorithms
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import xgboost as xgb
import lightgbm as lgb
# accuracy metrics
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, classification_report, auc, roc_auc_score, ConfusionMatrixDisplay, RocCurveDisplay

In [106]:
data = pd.read_csv('../Dataset/heart-saver.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,thalach,exang,thal,target
0,63,1,3,145,233,1,150,0,1,1
1,37,1,2,130,250,0,187,0,2,1
2,41,0,1,130,204,0,172,0,2,1
3,56,1,1,120,236,0,178,0,2,1
4,57,0,0,120,354,0,163,1,2,1


In [107]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
print("//Independent features//")
print(X.head())
print("\n\n//Dependent feature//")
print(y.head())

//Independent features//
   age  sex  cp  trestbps  chol  fbs  thalach  exang  thal
0   63    1   3       145   233    1      150      0     1
1   37    1   2       130   250    0      187      0     2
2   41    0   1       130   204    0      172      0     2
3   56    1   1       120   236    0      178      0     2
4   57    0   0       120   354    0      163      1     2


//Dependent feature//
0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64


In [108]:
print(X.shape)

(301, 9)


In [109]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [110]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [111]:
gridparameters_dt = {'max_depth': [1, 2, 3, 4, 5], 'criterion': ['gini', 'entropy']}
gridsearch_dt = GridSearchCV(DecisionTreeClassifier(), gridparameters_dt, cv=10, scoring='accuracy', n_jobs=-1)
gridsearch_dt.fit(X_train, y_train)
print("\n//Best parameters//")
print(gridsearch_dt.best_params_)
print("\n//Best score//")
print(gridsearch_dt.best_score_)


//Best parameters//
{'criterion': 'gini', 'max_depth': 1}

//Best score//
0.7604743083003952


In [112]:
dt = DecisionTreeClassifier(**gridsearch_dt.best_params_)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

In [113]:
dt_con_matrix = confusion_matrix(y_test, y_pred_dt)
dt_acc = accuracy_score(y_test, y_pred_dt)
print('Confusion Matrix', '\n', dt_con_matrix, '\n')
print('Accuracy of Decision Tree Classification :', dt_acc*100, '\n')
print('Classification Report', '\n', classification_report(y_test, y_pred_dt))

Confusion Matrix 
 [[29  9]
 [10 28]] 

Accuracy of Decision Tree Classification : 75.0 

Classification Report 
               precision    recall  f1-score   support

           0       0.74      0.76      0.75        38
           1       0.76      0.74      0.75        38

    accuracy                           0.75        76
   macro avg       0.75      0.75      0.75        76
weighted avg       0.75      0.75      0.75        76



In [114]:
gridparameters_rf = {'n_estimators': [60, 70, 80, 90, 100], 'criterion': ['gini', 'entropy'], 'max_depth': [1, 2, 3, 4, 5]}
gridsearch_rf = GridSearchCV(RandomForestClassifier(), gridparameters_rf, cv=10, scoring='accuracy', n_jobs=-1)
gridsearch_rf.fit(X_train, y_train)
print("\n//Best parameters//")
print(gridsearch_rf.best_params_)
print("\n//Best score//")
print(gridsearch_rf.best_score_)


//Best parameters//
{'criterion': 'gini', 'max_depth': 3, 'n_estimators': 70}

//Best score//
0.8041501976284584


In [115]:
rf = RandomForestClassifier(**gridsearch_rf.best_params_)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [116]:
rf_con_matrix = confusion_matrix(y_test, y_pred_rf)
rf_acc = accuracy_score(y_test, y_pred_rf)
print('Confusion Matrix', '\n', rf_con_matrix, '\n')
print('Accuracy of Random Forest Classification :', rf_acc*100, '\n')
print('Classification Report', '\n', classification_report(y_test, y_pred_rf))

Confusion Matrix 
 [[30  8]
 [ 5 33]] 

Accuracy of Random Forest Classification : 82.89473684210526 

Classification Report 
               precision    recall  f1-score   support

           0       0.86      0.79      0.82        38
           1       0.80      0.87      0.84        38

    accuracy                           0.83        76
   macro avg       0.83      0.83      0.83        76
weighted avg       0.83      0.83      0.83        76



In [117]:
gridparameters_knn = {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'algorithm': ['ball_tree', 'kd_tree', 'brute']}
gridsearch_knn = GridSearchCV(KNeighborsClassifier(), gridparameters_knn, cv=10, scoring='accuracy', n_jobs=-1)
gridsearch_knn.fit(X_train, y_train)
print("\n//Best parameters//")
print(gridsearch_knn.best_params_)
print("\n//Best score//")
print(gridsearch_knn.best_score_)


//Best parameters//
{'algorithm': 'ball_tree', 'n_neighbors': 6}

//Best score//
0.7826086956521741


In [118]:
knn = KNeighborsClassifier(**gridsearch_knn.best_params_)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

In [119]:
knn_con_matrix = confusion_matrix(y_test, y_pred_knn)
knn_acc = accuracy_score(y_test, y_pred_knn)
print('Confusion Matrix', '\n', knn_con_matrix, '\n')
print('Accuracy of KNN Classification :', knn_acc*100, '\n')
print('Classification Report', '\n', classification_report(y_test, y_pred_knn))

Confusion Matrix 
 [[32  6]
 [ 9 29]] 

Accuracy of KNN Classification : 80.26315789473685 

Classification Report 
               precision    recall  f1-score   support

           0       0.78      0.84      0.81        38
           1       0.83      0.76      0.79        38

    accuracy                           0.80        76
   macro avg       0.80      0.80      0.80        76
weighted avg       0.80      0.80      0.80        76

