In [2]:
import ipynb
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import OrdinalEncoder
from numpy import asarray
from statistics import mean

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from ipynb.fs.full.TrainingFunction import Train_Models_CV10
from ipynb.fs.full.TrainingFunction import SavePredictionsToFile

from ipynb.fs.full.ParameterTuning import ParameterTuning

In [4]:
preprocessed_df = pd.read_csv("../data/preprocessedHeart.csv", index_col=0)

# Reseting the index
preprocessed_df.reset_index(drop=True, inplace=True)

categorical_features = [ 'sex_0','sex_1','cp_0','cp_1','cp_2','cp_3','exang_0','exang_1','slope_0','slope_1',
                        'slope_2','ca_0','ca_1','ca_2','ca_3','ca_4','thal_0','thal_1','thal_2','thal_3']
target = "target"

# Convert categorical feature types
preprocessed_df[categorical_features] = preprocessed_df[categorical_features].astype('category') 
preprocessed_df[target] = preprocessed_df[target].astype('category') 

print('Revenue distribution:\n'+ str(preprocessed_df[target].value_counts()))

Revenue distribution:
1    165
0    165
Name: target, dtype: int64


In [5]:
preprocessed_df

Unnamed: 0,thalach,oldpeak,chol,age,trestbps,target,sex_0,sex_1,cp_0,cp_1,...,slope_2,ca_0,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3
0,0.234095,1.777495,-0.836098,-0.150692,140,0,0,1,1,0,...,0,1,0,0,0,0,0,0,0,1
1,-0.990359,1.518686,0.149501,1.172774,135,0,0,1,1,0,...,0,0,1,0,0,0,0,0,0,1
2,-0.771706,0.138373,2.062724,0.069886,132,0,0,1,1,0,...,0,0,1,0,0,0,0,0,0,1
3,-0.509323,0.742260,-0.758796,0.731619,140,0,0,1,1,0,...,1,0,1,0,0,0,0,0,0,1
4,-3.439267,-0.034166,-0.179032,1.393352,120,0,0,1,1,0,...,0,1,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,0.846322,-0.896862,-0.121055,0.180175,120,1,0,1,0,1,...,0,1,0,0,0,0,0,0,1,0
326,0.715131,0.138373,1.850144,0.069886,132,1,1,0,0,1,...,1,1,0,0,0,0,0,0,1,0
327,1.414819,-0.896862,-1.725070,-1.474158,120,1,0,1,0,1,...,1,1,0,0,0,0,0,0,1,0
328,1.021244,-0.896862,-1.377212,-1.805024,138,1,0,1,0,0,...,1,0,0,0,0,1,0,0,1,0


# Models

Get Parameters from "MarketingCampaign_ParameterTuning"

In [6]:
knn_model = KNeighborsClassifier(metric='manhattan', n_neighbors=11, weights='distance')
svm_model = SVC(kernel= 'rbf')
dt_model = DecisionTreeClassifier(criterion='entropy', max_depth= 9, min_samples_leaf= 4, min_samples_split = 8)
rf_model = RandomForestClassifier(bootstrap= True, max_depth=10, max_features= 3, min_samples_leaf= 3, min_samples_split = 8, n_estimators = 300)
mlp_model = MLPClassifier(activation='relu', alpha=0.0001, hidden_layer_sizes=(10,30,10), learning_rate='constant', solver='adam')
gde_model = GradientBoostingClassifier(learning_rate=1, max_depth=5, n_estimators=250)

models = [knn_model, svm_model, dt_model, rf_model, mlp_model, gde_model]

model_names = []

for mod in models:
    model_names.append(type(mod).__name__)

In [7]:
stats = Train_Models_CV10 (preprocessed_df, target, models)

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10


In [8]:
print(stats)

{'KNeighborsClassifier': [0.8484848484848485, 0.8484848484848485, 0.9393939393939394, 0.7878787878787878, 0.8484848484848485, 0.8787878787878788, 0.9090909090909091, 0.8181818181818182, 0.9090909090909091, 0.9090909090909091], 'SVC': [0.5454545454545454, 0.42424242424242425, 0.45454545454545453, 0.5454545454545454, 0.6666666666666666, 0.6060606060606061, 0.48484848484848486, 0.6060606060606061, 0.5454545454545454, 0.7575757575757576], 'DecisionTreeClassifier': [0.8484848484848485, 0.8181818181818182, 0.7878787878787878, 0.8484848484848485, 0.8484848484848485, 0.8181818181818182, 0.8181818181818182, 0.7575757575757576, 0.8181818181818182, 0.8787878787878788], 'RandomForestClassifier': [0.8484848484848485, 0.7878787878787878, 0.9090909090909091, 0.8484848484848485, 0.8484848484848485, 0.8181818181818182, 0.8787878787878788, 0.7878787878787878, 0.9393939393939394, 0.8787878787878788], 'MLPClassifier': [0.8484848484848485, 0.7575757575757576, 0.5151515151515151, 0.8181818181818182, 0.84848

In [9]:
SavePredictionsToFile("../accuracy/accuracyHeart.csv", stats)

# Overall Results

In [11]:
# To copy-paste into the report

outputString = ""
print("      KNN       SVM       DT         RF        MLP       GDE")
for i in range (10):
    outputString = outputString+str(i+1)+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (stats[model_names[0]][i],stats[model_names[1]][i],stats[model_names[2]][i],stats[model_names[3]][i],stats[model_names[4]][i],stats[model_names[5]][i])

outputString = outputString+'\\hline avg'+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (mean(stats[model_names[0]]),mean(stats[model_names[1]]),mean(stats[model_names[2]]),mean(stats[model_names[3]]),mean(stats[model_names[4]]),mean(stats[model_names[5]]))
outputString = outputString+'std'+' & %.5f & %.5f & %.5f & %.5f & %.5f & %.5f \\\ \\hline \n' % (np.std(stats[model_names[0]]),np.std(stats[model_names[1]]),np.std(stats[model_names[2]]),np.std(stats[model_names[3]]),np.std(stats[model_names[4]]),np.std(stats[model_names[5]]))

print(outputString)

      KNN       SVM       DT         RF        MLP       GDE
1 & 0.84848 & 0.54545 & 0.84848 & 0.84848 & 0.84848 & 0.84848 \\ \hline 
2 & 0.84848 & 0.42424 & 0.81818 & 0.78788 & 0.75758 & 0.84848 \\ \hline 
3 & 0.93939 & 0.45455 & 0.78788 & 0.90909 & 0.51515 & 0.87879 \\ \hline 
4 & 0.78788 & 0.54545 & 0.84848 & 0.84848 & 0.81818 & 0.84848 \\ \hline 
5 & 0.84848 & 0.66667 & 0.84848 & 0.84848 & 0.84848 & 0.90909 \\ \hline 
6 & 0.87879 & 0.60606 & 0.81818 & 0.81818 & 0.51515 & 0.84848 \\ \hline 
7 & 0.90909 & 0.48485 & 0.81818 & 0.87879 & 0.81818 & 0.84848 \\ \hline 
8 & 0.81818 & 0.60606 & 0.75758 & 0.78788 & 0.75758 & 0.84848 \\ \hline 
9 & 0.90909 & 0.54545 & 0.81818 & 0.93939 & 0.87879 & 0.87879 \\ \hline 
10 & 0.90909 & 0.75758 & 0.87879 & 0.87879 & 0.87879 & 0.84848 \\ \hline 
avg & 0.86970 & 0.56364 & 0.82424 & 0.85455 & 0.76364 & 0.86061 \\ \hline 
std & 0.04505 & 0.09506 & 0.03264 & 0.04655 & 0.13055 & 0.02010 \\ \hline 

