In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
import xgboost as xgb
from sklearn.metrics import confusion_matrix
from sklearn.metrics import log_loss
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from keras.models import Sequential
from keras.layers import Dense
from prettytable import PrettyTable
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("C:\processed.cleveland.data")
df.head()

In [None]:
df.columns = ['age', 'sex', 'chest pain', 'blood pressure', 'cholestrol','blood sugar', 
              'restecg', 'heart rate(max)', 'exercise induced agina', 'oldpeak', 'slope', 'num vessels', 'thalesemia', 'target']

In [None]:
print(df)

In [None]:
df.dtypes

In [None]:
for col in df.columns:
    df.drop(df.index[df[col] == '?'], inplace = True)

In [None]:
df['num vessels'] = pd.to_numeric(df['num vessels'])
df['thalesemia'] = pd.to_numeric(df['thalesemia'])
df['target'] = df['target'].replace({4:1,2:1,3:1})

In [None]:
df.head()

In [None]:
correlation = df.corr()
plt.figure(figsize=(12,8))
plt.title('Correlation Heatmap ')
ax = sns.heatmap(correlation, square=True, annot=True, fmt='.2f', linecolor='white')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.set_yticklabels(ax.get_yticklabels(), rotation=30)           
plt.show()

In [None]:
df['target'].unique()

In [None]:
X = df.drop('target',axis = 1)
Y = df['target']

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = 0.2,random_state = 42)

In [None]:
scale = StandardScaler()
scale.fit(x_train)
x_train = scale.transform(x_train)
x_test = scale.transform(x_test)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

### Classification Models Implementation

### Logistic Regression

In [None]:
log_reg = LogisticRegression()
log_reg.fit(x_train, y_train)
y_log_reg_pred = log_reg.predict(x_test)

In [None]:
con_mat_lr = confusion_matrix(y_true=y_test, y_pred=y_log_reg_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_lr, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_lr.shape[0]):
    for y in range(con_mat_lr.shape[1]):
        ax.text(x=y, y=x,s=con_mat_lr[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, y_log_reg_pred), ": is the confusion matrix")
print(classification_report(y_test, y_log_reg_pred))
print(accuracy_score(y_test, y_log_reg_pred), ": is the accuracy score")
print(precision_score(y_test,y_log_reg_pred), ": is the precision score")
print(recall_score(y_test, y_log_reg_pred), ": is the recall score")
print(f1_score(y_test, y_log_reg_pred), ": is the f1 score")

### Logistic Regression Hyperparameter Tuning

In [None]:
param_grid = [
    {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : np.logspace(-4, 4, 20),
    'solver' : ['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'],
    'max_iter' : [100, 1000, 2500, 5000]
    }
]

In [None]:
clf = GridSearchCV(log_reg, param_grid=param_grid, verbose = True, cv = 3, n_jobs = -1)
clf.fit(x_train, y_train)

In [None]:
clf.best_estimator_

In [None]:
clf.best_score_

In [None]:
clf_lr_pred = clf.predict(x_test)

In [None]:
con_mat_lr = confusion_matrix(y_true=y_test, y_pred=clf_lr_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_lr, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_lr.shape[0]):
    for y in range(con_mat_lr.shape[1]):
        ax.text(x=y, y=x,s=con_mat_lr[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, clf_lr_pred), ": is the confusion matrix")
print(classification_report(y_test, clf_lr_pred))
print(accuracy_score(y_test, clf_lr_pred), ": is the accuracy score")
print(precision_score(y_test, clf_lr_pred), ": is the precision score")
print(recall_score(y_test, clf_lr_pred), ": is the recall score")
print(f1_score(y_test, clf_lr_pred), ": is the f1 score")

### Naive Bayes

In [None]:
nav_bay = GaussianNB()
nav_bay.fit(x_train,y_train)
y_nav_bay_pred = nav_bay.predict(x_test)

In [None]:
con_mat_nav = confusion_matrix(y_true=y_test, y_pred=y_nav_bay_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_nav, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_nav.shape[0]):
    for y in range(con_mat_nav.shape[1]):
        ax.text(x=y, y=x,s=con_mat_nav[x,y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
# Prediction scores before hyperparameter tuning
print(confusion_matrix(y_test, y_nav_bay_pred), ": is the confusion matrix")
print(classification_report(y_test, y_nav_bay_pred))
print(accuracy_score(y_test, y_nav_bay_pred), ": is the accuracy score")
print(precision_score(y_test, y_nav_bay_pred), ": is the precision score")
print(recall_score(y_test, y_nav_bay_pred), ": is the recall score")
print(f1_score(y_test, y_nav_bay_pred), ": is the f1 score")

### Naive Bayes Hyperparameter Tuning

In [None]:
cv_method = RepeatedStratifiedKFold(n_splits=5, 
                                    n_repeats=3, 
                                    random_state=999)

In [None]:
from sklearn.preprocessing import PowerTransformer

In [None]:
params_NB = {'var_smoothing': np.logspace(0,-9, num=100)}
nb_classifier = GaussianNB()
gs_NB = GridSearchCV(estimator=nb_classifier, 
                     param_grid=params_NB, 
                     cv=cv_method,
                     verbose=1, 
                     scoring='accuracy')

In [None]:
Data_transformed = PowerTransformer().fit_transform(x_test)

In [None]:
gs_NB.fit(Data_transformed, y_test)

In [None]:
gs_NB.best_params_

In [None]:
gs_NB.best_score_

In [None]:
nb = GaussianNB(var_smoothing = 0.43)
nb.fit(x_train,y_train)
gs_nb_pred = nb.predict(x_test)

In [None]:
con_matrix_nav = confusion_matrix(y_true=y_test, y_pred=gs_nb_pred)

In [None]:
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_nav, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_nav.shape[0]):
    for y in range(con_matrix_nav.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_nav[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
# Prediction scores after hyperparameter tuning
print(confusion_matrix(y_test, gs_nb_pred), ": is the confusion matrix")
print(classification_report(y_test, gs_nb_pred))
print(accuracy_score(y_test, gs_nb_pred), ": is the accuracy score")
print(precision_score(y_test, gs_nb_pred), ": is the precision score")
print(recall_score(y_test, gs_nb_pred), ": is the recall score")
print(f1_score(y_test, gs_nb_pred), ": is the f1 score")

### Support Vector Machine

In [None]:
sup_vec = svm.SVC(kernel='linear')
sup_vec.fit(x_train, y_train)
y_pred_sup_vec = sup_vec.predict(x_test)

In [None]:
con_mat_sup = confusion_matrix(y_true=y_test, y_pred=y_pred_sup_vec)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_sup, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_sup.shape[0]):
    for y in range(con_mat_sup.shape[1]):
        ax.text(x=y, y=x,s=con_mat_sup[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
# Prediction scores before hyperparameter tuning
print(confusion_matrix(y_test, y_pred_sup_vec), ": is the confusion matrix")
print(classification_report(y_test, y_pred_sup_vec))
print(accuracy_score(y_test, y_pred_sup_vec), ": is the accuracy score")
print(precision_score(y_test, y_pred_sup_vec), ": is the precision score")
print(recall_score(y_test, y_pred_sup_vec), ": is the recall score")
print(f1_score(y_test, y_pred_sup_vec), ": is the f1 score")

### SVM Hyperparameter Tuning

In [None]:
param_grid = {'C' : [0.1,1, 10, 100],
             'gamma' : [1, 0.1, 0.01, 0.001],
             'kernel' : ['rbf']}

In [None]:
SVM_grid = GridSearchCV(svm.SVC(), param_grid, scoring = 'accuracy', refit = True, verbose = 4, cv = 3)

In [None]:
SVM_grid.fit(x_train, y_train)

In [None]:
SVM_grid.best_params_

In [None]:
SVM_grid.best_score_

In [None]:
svm_pred_grid = SVM_grid.predict(x_test)

In [None]:
con_matrix_sup = confusion_matrix(y_true=y_test, y_pred=svm_pred_grid)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_sup, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_sup.shape[0]):
    for y in range(con_matrix_sup.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_sup[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, svm_pred_grid), ": is the confusion matrix")
print(classification_report(y_test, svm_pred_grid))
print(accuracy_score(y_test, svm_pred_grid), ": is the accuracy score")
print(precision_score(y_test, svm_pred_grid), ": is the precision score")
print(recall_score(y_test, svm_pred_grid), ": is the recall score")
print(f1_score(y_test, svm_pred_grid), ": is the f1 score")

### K Nearest Neighbors

In [None]:
k_neig = KNeighborsClassifier()
k_neig.fit(x_train,y_train)
y_knn_pred=k_neig.predict(x_test)

In [None]:
con_mat_knn = confusion_matrix(y_true=y_test, y_pred=y_knn_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_knn, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_knn.shape[0]):
    for y in range(con_mat_knn.shape[1]):
        ax.text(x=y, y=x,s=con_mat_knn[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, y_knn_pred), ": is the confusion matrix")
print(classification_report(y_test, y_knn_pred))
print(accuracy_score(y_test, y_knn_pred), ": is the accuracy score")
print(precision_score(y_test, y_knn_pred), ": is the precision score")
print(recall_score(y_test, y_knn_pred), ": is the recall score")
print(f1_score(y_test, y_knn_pred), ": is the f1 score")

### K Nearest Neighbors Hyperparameter Tuning

In [None]:
gs = {'n_neighbors' : np.arange(1,50)}
knn_gs = KNeighborsClassifier()
knn_grid = GridSearchCV(knn_gs, gs, cv = 10)
knn_grid.fit(x_train, y_train)

In [None]:
print(knn_grid.best_params_)

In [None]:
print(knn_grid.best_score_)

In [None]:
k_neig = KNeighborsClassifier(n_neighbors = 15)
k_neig.fit(x_train,y_train)
knn_pred=k_neig.predict(x_test)

In [None]:
con_matrix_knn = confusion_matrix(y_true=y_test, y_pred=knn_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_knn, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_knn.shape[0]):
    for y in range(con_matrix_knn.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_knn[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, knn_pred), ": is the confusion matrix")
print(classification_report(y_test,  knn_pred))
print(accuracy_score(y_test,  knn_pred), ": is the accuracy score")
print(precision_score(y_test,  knn_pred), ": is the precision score")
print(recall_score(y_test,  knn_pred), ": is the recall score")
print(f1_score(y_test,  knn_pred), ": is the f1 score")

### Decision Tree

In [None]:
dec_tree = DecisionTreeClassifier()
dec_tree.fit(x_train,y_train)
y_pred_dec_tree = dec_tree.predict(x_test)

In [None]:
con_mat_dc = confusion_matrix(y_true=y_test, y_pred=y_pred_dec_tree)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_dc, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_dc.shape[0]):
    for y in range(con_mat_dc.shape[1]):
        ax.text(x=y, y=x,s=con_mat_dc[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, y_pred_dec_tree), ": is the confusion matrix")
print(classification_report(y_test, y_pred_dec_tree))
print(accuracy_score(y_test,  y_pred_dec_tree), ": is the accuracy score")
print(precision_score(y_test,  y_pred_dec_tree), ": is the precision score")
print(recall_score(y_test,  y_pred_dec_tree), ": is the recall score")
print(f1_score(y_test, y_pred_dec_tree), ": is the f1 score")

### Decision Tree Hyperparameter Tuning

In [None]:
param_grid = {'criterion': ['gini', 'entropy'],
              'min_samples_split': [2, 10, 20],
              'max_depth': [5, 10, 20, 25, 30],
              'min_samples_leaf': [1, 5, 10],
              'max_leaf_nodes': [2, 5, 10, 20]}

In [None]:
dt = DecisionTreeClassifier()
grid_search_cv = GridSearchCV(dt, param_grid, cv = 3, scoring = 'accuracy')
grid_search_cv.fit(x_train, y_train)

In [None]:
grid_search_cv.best_params_

In [None]:
grid_search_cv.best_score_

In [None]:
for hps, values in grid_search_cv.best_params_.items():
  print(f"{hps}: {values}")

In [None]:
best_model = DecisionTreeClassifier(criterion = 'gini', max_depth = 5, 
                                    min_samples_leaf = 5, max_leaf_nodes = 10, min_samples_split = 2)
best_model.fit(x_train, y_train)
preds_dc = best_model.predict(x_test)

In [None]:
con_matrix_dc = confusion_matrix(y_true=y_test, y_pred=preds_dc)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_dc, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_dc.shape[0]):
    for y in range(con_matrix_dc.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_dc[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, preds_dc), ": is the confusion matrix")
print(classification_report(y_test,  preds_dc))
print(accuracy_score(y_test,  preds_dc), ": is the accuracy score")
print(precision_score(y_test,  preds_dc), ": is the precision score")
print(recall_score(y_test,  preds_dc), ": is the recall score")
print(f1_score(y_test,  preds_dc), ": is the f1 score")

### Random Forest

In [None]:
ran_fos = RandomForestClassifier()
ran_fos.fit(x_train,y_train)
Y_rf_pred = ran_fos.predict(x_test)

In [None]:
con_mat_rf = confusion_matrix(y_true=y_test, y_pred=Y_rf_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_rf, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_rf.shape[0]):
    for y in range(con_mat_rf.shape[1]):
        ax.text(x=y, y=x,s=con_mat_rf[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Predictions Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, Y_rf_pred), ": is the confusion matrix")
print(classification_report(y_test,  Y_rf_pred))
print(accuracy_score(y_test,  Y_rf_pred), ": is the accuracy score")
print(precision_score(y_test,  Y_rf_pred), ": is the precision score")
print(recall_score(y_test,  Y_rf_pred), ": is the recall score")
print(f1_score(y_test,  Y_rf_pred), ": is the f1 score")

### Random Forest Hyperparameter Tuning

In [None]:
n_estimators = [100, 300, 500, 800, 1200]
max_depth = [5, 10, 15, 25, 30]
min_samples_split = [2, 5, 10, 15, 100]
min_samples_leaf = [1, 2, 5, 10]
hyper_rf = dict(n_estimators = n_estimators, max_depth = max_depth, 
              min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf)

In [None]:
rf = RandomForestClassifier()
gridrf = GridSearchCV(rf, hyper_rf, cv = 3, verbose = 1, n_jobs = -1)
bestrf = gridrf.fit(x_train, y_train)

In [None]:
bestrf.best_params_

In [None]:
rf = RandomForestClassifier(max_depth = 30, min_samples_leaf = 1, min_samples_split = 100, n_estimators = 100)
rf.fit(x_train, y_train)
preds_rf = rf.predict(x_test)

In [None]:
con_matrix_rf = confusion_matrix(y_true=y_test, y_pred=preds_rf)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_rf, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_rf.shape[0]):
    for y in range(con_matrix_rf.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_rf[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, preds_rf), ": is the confusion matrix")
print(classification_report(y_test,  preds_rf))
print(accuracy_score(y_test,  preds_rf), ": is the accuracy score")
print(precision_score(y_test,  preds_rf), ": is the precision score")
print(recall_score(y_test,  preds_rf), ": is the recall score")
print(f1_score(y_test,  preds_rf), ": is the f1 score")

### XGBoost

In [None]:
xg_boost = xgb.XGBClassifier()
xg_boost.fit(x_train, y_train)
y_pred_xg_boost = xg_boost.predict(x_test)

In [None]:
con_mat_xg = confusion_matrix(y_true=y_test, y_pred=y_pred_xg_boost)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_xg, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_xg.shape[0]):
    for y in range(con_mat_xg.shape[1]):
        ax.text(x=y, y=x,s=con_mat_xg[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, y_pred_xg_boost), ": is the confusion matrix")
print(classification_report(y_test,  y_pred_xg_boost))
print(accuracy_score(y_test,  y_pred_xg_boost), ": is the accuracy score")
print(precision_score(y_test,  y_pred_xg_boost), ": is the precision score")
print(recall_score(y_test,  y_pred_xg_boost), ": is the recall score")
print(f1_score(y_test,  y_pred_xg_boost), ": is the f1 score")

### XGBoost Hyperparameter Tuning

In [None]:
params = {'learning_rate': [0.001,0.01,0.1,0.25,0.5,0.4],
         'max_depth': [1,2,3,4,5,6],
         'max_features': [1,2,3,4,5,6],
         'n_estimators': [20,40,50,70,100]}

In [None]:
XG_grid_search = GridSearchCV(xg_boost, param_grid = params, cv=5, n_jobs=-1, verbose = True)
XG_grid_search.fit(x_train, y_train)

In [None]:
XG_grid_search.best_params_

In [None]:
XG_grid_search.best_score_

In [None]:
XG_clf = xgb.XGBClassifier(learning_rate = 0.1, max_depth = 1, max_features = 1, n_estimators = 100)
XG_clf.fit(x_train, y_train)
XG_clf_pred = XG_clf.predict(x_test)

In [None]:
con_matrix_xg = confusion_matrix(y_true=y_test, y_pred=XG_clf_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_xg, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_xg.shape[0]):
    for y in range(con_matrix_xg.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_xg[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, XG_clf_pred), ": is the confusion matrix")
print(classification_report(y_test,  XG_clf_pred))
print(accuracy_score(y_test,  XG_clf_pred), ": is the accuracy score")
print(precision_score(y_test,  XG_clf_pred), ": is the precision score")
print(recall_score(y_test,  XG_clf_pred), ": is the recall score")
print(f1_score(y_test,  XG_clf_pred), ": is the f1 score")

### AdaBoost Classifier

In [None]:
adab = AdaBoostClassifier()
adab.fit(x_train, y_train)
adab_pred = adab.predict(x_test)

In [None]:
con_mat_adab = confusion_matrix(y_true=y_test, y_pred=adab_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_mat_adab, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_mat_adab.shape[0]):
    for y in range(con_mat_adab.shape[1]):
        ax.text(x=y, y=x,s=con_mat_adab[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, adab_pred), ": is the confusion matrix")
print(classification_report(y_test,  adab_pred))
print(accuracy_score(y_test, adab_pred), ": is the accuracy score")
print(precision_score(y_test, adab_pred), ": is the precision score")
print(recall_score(y_test, adab_pred), ": is the recall score")
print(f1_score(y_test, adab_pred), ": is the f1 score")

### AdaBoost Hyperparameter Tuning

In [None]:
adb_clf = AdaBoostClassifier()
search_grid={'n_estimators':[500,1000,2000],'learning_rate':[.001,0.01,.1]}
ada_clf = GridSearchCV(estimator = adb_clf, param_grid = search_grid, cv=5, verbose=1, n_jobs=-1, scoring = 'accuracy')

In [None]:
ada_clf.fit(x_train, y_train)

In [None]:
ada_clf.best_params_

In [None]:
ada_clf.best_score_

In [None]:
adab_clf_pred = ada_clf.predict(x_test)

In [None]:
con_matrix_adab = confusion_matrix(y_true=y_test, y_pred=adab_clf_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(con_matrix_adab, cmap=plt.cm.Oranges, alpha=0.3)
for x in range(con_matrix_adab.shape[0]):
    for y in range(con_matrix_adab.shape[1]):
        ax.text(x=y, y=x,s=con_matrix_adab[x, y], va='center', ha='center', size='xx-large')
plt.xlabel('Prediction Classes', fontsize=18)
plt.ylabel('Actual Classes', fontsize=18)
plt.title('Confusion Matrix', fontsize=18, fontweight = 'bold')
plt.show()

In [None]:
print(confusion_matrix(y_test, adab_clf_pred), ": is the confusion matrix")
print(classification_report(y_test,  adab_clf_pred))
print(accuracy_score(y_test, adab_clf_pred), ": is the accuracy score")
print(precision_score(y_test, adab_clf_pred), ": is the precision score")
print(recall_score(y_test, adab_clf_pred), ": is the recall score")
print(f1_score(y_test, adab_clf_pred), ": is the f1 score")

### Neural Network Implementation

In [None]:
model = Sequential()
model.add(Dense(11,activation='relu',input_dim=13))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
model.fit(x_train,y_train,epochs=300)

In [None]:
pred_NN = model.predict(x_test)
r = [round(x[0]) for x in pred_NN]
pred_NN = r
print(accuracy_score(y_test,  pred_NN), ": is the accuracy score")

### Comparing performance metrics of all ML models without hyperparameter tuning

In [None]:
mytable = PrettyTable(['Algorithm_Name','Accuracy_Score','Precision_Score','Recall_score','F1_Score'])

In [None]:
mytable.add_row(['Logistic Regression','0.90','0.90','0.83','0.87'])
mytable.add_row(['Naive Bayes','0.93','1.0','0.83','0.90'])
mytable.add_row(['Support Vector Machine','0.90','0.95','0.79','0.86'])
mytable.add_row(['K Nearest Neighbors','0.88','0.84','0.88','0.86'])
mytable.add_row(['Decision Tree','0.80','0.73','0.79','0.76'])
mytable.add_row(['Random Forest','0.90','0.90','0.83','0.87'])
mytable.add_row(['XGBoost','0.86','0.83','0.83','0.83'])
mytable.add_row(['AdaBoost','0.83','0.82','0.75','0.78'])
mytable.add_row(['Neural Networks','0.85','NA','NA','NA'])

In [None]:
print(mytable)

In [None]:
w = 0.1
Algorithms = ["LR", "NB", "SVM", "KNN", "DT", "RF", "XGB", "ADB"]
Accuracy_Score = [0.90,0.93,0.90,0.88,0.80,0.90,0.86,0.83]
Precision_Score = [0.90,1.00,0.95,0.84,0.73,0.90,0.83,0.82]
Recall_Score = [0.83,0.83,0.79,0.88,0.79,0.83,0.83,0.75]
F1_Score = [0.87,0.90,0.86,0.86,0.76,0.87,0.83,0.78]

In [None]:
bar1 = np.arange(len(Algorithms))
bar2 = [i+w for i in bar1]
bar3 = [i+w for i in bar2]
bar4 = [i+w for i in bar3]

In [None]:
plt.figure(figsize = (12,8))
plt.bar(bar1, Accuracy_Score, w, label = 'Accuracy Score')
plt.bar(bar2, Precision_Score, w, label = 'Precision Score')
plt.bar(bar3, Recall_Score, w, label = 'Recall Score')
plt.bar(bar4, F1_Score, w, label = 'F1 Score')
plt.xlabel("Algorithms", fontweight = 'bold', fontsize = 14)
plt.ylabel("Scores", fontweight = 'bold', fontsize = 14)
plt.title("Performance Metrics Comparison of all classification models before Hyperparameter Tuning", fontweight = 'bold', fontsize = 18)
plt.xticks(bar1+w, Algorithms, fontsize = 12)
plt.yticks(fontsize = 12)
plt.legend(loc = 'best', fontsize = 12)
plt.show()

### Comparison of Performance Metrics of all ML models with Hyperparameter Tuning

In [None]:
pmwht = PrettyTable(['Algorithm_Name','Accuracy_Score','Precision_Score','Recall_score','F1_Score'])

In [None]:
pmwht.add_row(['Logistic Regression','0.93','0.95','0.86','0.91'])
pmwht.add_row(['Naive Bayes','0.93','1.0','0.83','0.90'])
pmwht.add_row(['Support Vector Machine','0.92','1.0','0.79','0.88'])
pmwht.add_row(['K Nearest Neighbor','0.87','0.86','0.79','0.83'])
pmwht.add_row(['Decision Tree','0.80','0.80','0.67','0.72'])
pmwht.add_row(['Random Forest','0.92','0.95','0.83','0.89'])
pmwht.add_row(['XGBoost','0.90','0.91','0.83','0.87'])
pmwht.add_row(['AdaBoost','0.93','0.95','0.88','0.91'])
pmwht.add_row(['Neural Networks','0.85','NA','NA','NA'])

In [None]:
print(pmwht)

In [None]:
w = 0.15
algorithms = ["LR", "NB", "SVM", "KNN", "DT", "RF", "XGB", "ADB"]
accuracy_score = [0.93,0.93,0.92,0.87,0.80,0.92,0.90,0.93]
precision_score = [0.95,1.0,1.0,0.86,0.80,0.95,0.91,0.95]
recall_score = [0.86,0.83,0.79,0.79,0.67,0.83,0.83,0.88]
f1_score = [0.91,0.90,0.88,0.83,0.72,0.89,0.87,0.91]

In [None]:
bar_1 = np.arange(len(algorithms))
bar_2 = [i+w for i in bar1]
bar_3 = [i+w for i in bar2]
bar_4 = [i+w for i in bar3]

In [None]:
plt.figure(figsize = (12,10))
plt.bar(bar_1, accuracy_score, w, label = 'Accuracy Score')
plt.bar(bar_2, precision_score, w, label = 'Precision Score')
plt.bar(bar_3, recall_score, w, label = 'Recall Score')
plt.bar(bar_4, f1_score, w, label = 'F1 Score')
plt.xlabel("Algorithms", fontweight = 'bold', fontsize = 14)
plt.ylabel("Scores", fontweight = 'bold', fontsize = 14)
plt.title("Performance metrics comparison of Classification models after hyperparameter tuning", 
          fontweight = 'bold', fontsize = 20)
plt.xticks(bar1+w, Algorithms, fontsize = 12)
plt.yticks(fontsize = 12)
plt.legend(loc = 'best', fontsize = 12)
plt.show()

### Graphical representation of Accuracy scores of all Classification models

In [None]:
df1 = pd.DataFrame({'Algorithms':['LR','NB','SVM','KNN','DT','RF','XGB','ADB','NN'],
                   'Accuracy Scores': [0.93,0.93,0.92,0.87,0.80,0.92,0.90,0.93,0.85]})

In [None]:
plt.figure(figsize=(10,8))
splot=sns.barplot(x="Algorithms",y="Accuracy Scores",data=df1)
plt.xlabel("Algorithms", fontsize=14, fontweight = 'bold')
plt.ylabel("Accuracy", size=14, fontweight = 'bold')
plt.title("Comparison of Accuracy of all Classification Algorithms", fontsize = 18)
plt.bar_label(splot.containers[0], fontsize = 10, fontweight = 'bold')
plt.savefig("annotate_barplot_with_Matplotlib_bar_label_Python.png")