# Initial Steps

## Importing Libraries

In [2]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

### setting plot size

In [3]:
matplotlib.rcParams['figure.figsize'] = (15, 8)

## importing dataset

In [4]:
df = pd.read_csv("heart_2020_cleaned.csv")

In [5]:
df.head(7)

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No
5,Yes,28.87,Yes,No,No,6.0,0.0,Yes,Female,75-79,Black,No,No,Fair,12.0,No,No,No
6,No,21.63,No,No,No,15.0,0.0,No,Female,70-74,White,No,Yes,Fair,4.0,Yes,No,Yes


In [6]:
df.HeartDisease.value_counts()

No     292422
Yes     27373
Name: HeartDisease, dtype: int64

# EDA


In [7]:
# paste EDA from EDA notebook here

In [8]:
df_num = df.select_dtypes(np.number)

# Modelling

In [9]:
from sklearn.model_selection import train_test_split, KFold, GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

from sklearn.metrics import precision_score,recall_score
from sklearn.metrics import f1_score

<a id="29"></a>

<font color='DeepSkyBlue'>


## PREPROCESSING

In [10]:
df["id"]=range(1,319796)
df=df.set_index("id")

In [11]:
df

Unnamed: 0_level_0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,No,16.60,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
2,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
3,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
4,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
5,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319791,Yes,27.41,Yes,No,No,7.0,0.0,Yes,Male,60-64,Hispanic,Yes,No,Fair,6.0,Yes,No,No
319792,No,29.84,Yes,No,No,0.0,0.0,No,Male,35-39,Hispanic,No,Yes,Very good,5.0,Yes,No,No
319793,No,24.24,No,No,No,0.0,0.0,No,Female,45-49,Hispanic,No,Yes,Good,6.0,No,No,No
319794,No,32.81,No,No,No,0.0,0.0,No,Female,25-29,Hispanic,No,No,Good,12.0,No,No,No


In [12]:
df_cat=df.select_dtypes(include=["object"])

In [13]:
df_categorical=df_cat.columns
df_categorical

Index(['HeartDisease', 'Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking',
       'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity',
       'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'],
      dtype='object')

In [14]:
for var in df_categorical:
    Dummy=pd.get_dummies(df[var], drop_first= True).add_prefix(var)
    df=df.merge(Dummy,on="id")
    df=df.drop([var],axis=1)

In [15]:
df.head(5)

Unnamed: 0_level_0,BMI,PhysicalHealth,MentalHealth,SleepTime,HeartDiseaseYes,SmokingYes,AlcoholDrinkingYes,StrokeYes,DiffWalkingYes,SexMale,...,DiabeticYes,DiabeticYes (during pregnancy),PhysicalActivityYes,GenHealthFair,GenHealthGood,GenHealthPoor,GenHealthVery good,AsthmaYes,KidneyDiseaseYes,SkinCancerYes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,16.6,3.0,30.0,5.0,0,1,0,0,0,0,...,1,0,1,0,0,0,1,1,0,1
2,20.34,0.0,0.0,7.0,0,0,0,1,0,0,...,0,0,1,0,0,0,1,0,0,0
3,26.58,20.0,30.0,8.0,0,1,0,0,0,1,...,1,0,1,1,0,0,0,1,0,0
4,24.21,0.0,0.0,6.0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
5,23.71,28.0,0.0,8.0,0,0,0,0,1,0,...,0,0,1,0,0,0,1,0,0,0


In [16]:
y=df.HeartDiseaseYes

In [17]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
BMI,319795.0,28.325399,6.3561,12.02,24.03,27.34,31.42,94.85
PhysicalHealth,319795.0,3.37171,7.95085,0.0,0.0,0.0,2.0,30.0
MentalHealth,319795.0,3.898366,7.955235,0.0,0.0,0.0,3.0,30.0
SleepTime,319795.0,7.097075,1.436007,1.0,6.0,7.0,8.0,24.0
HeartDiseaseYes,319795.0,0.085595,0.279766,0.0,0.0,0.0,0.0,1.0
SmokingYes,319795.0,0.412477,0.492281,0.0,0.0,0.0,1.0,1.0
AlcoholDrinkingYes,319795.0,0.068097,0.251912,0.0,0.0,0.0,0.0,1.0
StrokeYes,319795.0,0.03774,0.190567,0.0,0.0,0.0,0.0,1.0
DiffWalkingYes,319795.0,0.13887,0.345812,0.0,0.0,0.0,0.0,1.0
SexMale,319795.0,0.475273,0.499389,0.0,0.0,0.0,1.0,1.0


In [18]:
X=df.drop(["HeartDiseaseYes"],axis=1)
y= df.HeartDiseaseYes

In [19]:
X.head(5)

Unnamed: 0_level_0,BMI,PhysicalHealth,MentalHealth,SleepTime,SmokingYes,AlcoholDrinkingYes,StrokeYes,DiffWalkingYes,SexMale,AgeCategory25-29,...,DiabeticYes,DiabeticYes (during pregnancy),PhysicalActivityYes,GenHealthFair,GenHealthGood,GenHealthPoor,GenHealthVery good,AsthmaYes,KidneyDiseaseYes,SkinCancerYes
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,16.6,3.0,30.0,5.0,1,0,0,0,0,0,...,1,0,1,0,0,0,1,1,0,1
2,20.34,0.0,0.0,7.0,0,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,0
3,26.58,20.0,30.0,8.0,1,0,0,0,1,0,...,1,0,1,1,0,0,0,1,0,0
4,24.21,0.0,0.0,6.0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
5,23.71,28.0,0.0,8.0,0,0,0,1,0,0,...,0,0,1,0,0,0,1,0,0,0


<a id="30"></a>

<font color='DeepSkyBlue'>


## NORMALIZATION

In [20]:
from sklearn.preprocessing import MinMaxScaler

In [21]:
# scaler=MinMaxScaler()
# columns=X.columns
# s=scaler.fit_transform(X)

# X_scaled_data=pd.DataFrame(s,columns=columns)
# X_scaled_data.head(5)

In [22]:
# X_scaled_data.columns

In [23]:
# X_scaled_data.columns

In [24]:
y.value_counts()

0    292422
1     27373
Name: HeartDiseaseYes, dtype: int64

In [25]:
# X_train,X_test_,y_train,y_test_=train_test_split(X,
#                                                y,
#                                                test_size=0.10,
#                                                random_state=1)

In [26]:
# y_test_.value_counts()

In [27]:
stratified_sample = df.groupby('HeartDiseaseYes', group_keys=False).apply(lambda x: x.sample(7500))
X_test = stratified_sample.drop("HeartDiseaseYes", axis=1)
y_test= stratified_sample.HeartDiseaseYes

In [28]:
df_stratified = pd.concat((X_test,y_test), axis=1)
df_stratified["HeartDiseaseYes"].value_counts()

0    7500
1    7500
Name: HeartDiseaseYes, dtype: int64

In [29]:
df_stratified.columns

Index(['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime', 'SmokingYes',
       'AlcoholDrinkingYes', 'StrokeYes', 'DiffWalkingYes', 'SexMale',
       'AgeCategory25-29', 'AgeCategory30-34', 'AgeCategory35-39',
       'AgeCategory40-44', 'AgeCategory45-49', 'AgeCategory50-54',
       'AgeCategory55-59', 'AgeCategory60-64', 'AgeCategory65-69',
       'AgeCategory70-74', 'AgeCategory75-79', 'AgeCategory80 or older',
       'RaceAsian', 'RaceBlack', 'RaceHispanic', 'RaceOther', 'RaceWhite',
       'DiabeticNo, borderline diabetes', 'DiabeticYes',
       'DiabeticYes (during pregnancy)', 'PhysicalActivityYes',
       'GenHealthFair', 'GenHealthGood', 'GenHealthPoor', 'GenHealthVery good',
       'AsthmaYes', 'KidneyDiseaseYes', 'SkinCancerYes', 'HeartDiseaseYes'],
      dtype='object')

In [30]:
df_all = df.merge(df_stratified.drop_duplicates(), on= ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime', 'SmokingYes',
       'AlcoholDrinkingYes', 'StrokeYes', 'DiffWalkingYes', 'SexMale',
       'AgeCategory25-29', 'AgeCategory30-34', 'AgeCategory35-39',
       'AgeCategory40-44', 'AgeCategory45-49', 'AgeCategory50-54',
       'AgeCategory55-59', 'AgeCategory60-64', 'AgeCategory65-69',
       'AgeCategory70-74', 'AgeCategory75-79', 'AgeCategory80 or older',
       'RaceAsian', 'RaceBlack', 'RaceHispanic', 'RaceOther', 'RaceWhite',
       'DiabeticNo, borderline diabetes', 'DiabeticYes',
       'DiabeticYes (during pregnancy)', 'PhysicalActivityYes',
       'GenHealthFair', 'GenHealthGood', 'GenHealthPoor', 'GenHealthVery good',
       'AsthmaYes', 'KidneyDiseaseYes', 'SkinCancerYes', 'HeartDiseaseYes'], how="left", indicator='True')

df_all

Unnamed: 0,BMI,PhysicalHealth,MentalHealth,SleepTime,HeartDiseaseYes,SmokingYes,AlcoholDrinkingYes,StrokeYes,DiffWalkingYes,SexMale,...,DiabeticYes (during pregnancy),PhysicalActivityYes,GenHealthFair,GenHealthGood,GenHealthPoor,GenHealthVery good,AsthmaYes,KidneyDiseaseYes,SkinCancerYes,True
0,16.60,3.0,30.0,5.0,0,1,0,0,0,0,...,0,1,0,0,0,1,1,0,1,left_only
1,20.34,0.0,0.0,7.0,0,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,left_only
2,26.58,20.0,30.0,8.0,0,1,0,0,0,1,...,0,1,1,0,0,0,1,0,0,left_only
3,24.21,0.0,0.0,6.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,left_only
4,23.71,28.0,0.0,8.0,0,0,0,0,1,0,...,0,1,0,0,0,1,0,0,0,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319790,27.41,7.0,0.0,6.0,1,1,0,0,1,1,...,0,0,1,0,0,0,1,0,0,left_only
319791,29.84,0.0,0.0,5.0,0,1,0,0,0,1,...,0,1,0,0,0,1,1,0,0,left_only
319792,24.24,0.0,0.0,6.0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,left_only
319793,32.81,0.0,0.0,12.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,left_only


In [31]:
df.columns

Index(['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime', 'HeartDiseaseYes',
       'SmokingYes', 'AlcoholDrinkingYes', 'StrokeYes', 'DiffWalkingYes',
       'SexMale', 'AgeCategory25-29', 'AgeCategory30-34', 'AgeCategory35-39',
       'AgeCategory40-44', 'AgeCategory45-49', 'AgeCategory50-54',
       'AgeCategory55-59', 'AgeCategory60-64', 'AgeCategory65-69',
       'AgeCategory70-74', 'AgeCategory75-79', 'AgeCategory80 or older',
       'RaceAsian', 'RaceBlack', 'RaceHispanic', 'RaceOther', 'RaceWhite',
       'DiabeticNo, borderline diabetes', 'DiabeticYes',
       'DiabeticYes (during pregnancy)', 'PhysicalActivityYes',
       'GenHealthFair', 'GenHealthGood', 'GenHealthPoor', 'GenHealthVery good',
       'AsthmaYes', 'KidneyDiseaseYes', 'SkinCancerYes'],
      dtype='object')

In [32]:
train_dataset = df_all[df_all['True'] == 'left_only']
X_train = train_dataset.drop(["HeartDiseaseYes", "True"], axis=1)
y_train = train_dataset.HeartDiseaseYes

<a id="31"></a>

<font color='DeepSkyBlue'>


## OverSampling


In [34]:
from imblearn.over_sampling import SMOTE
oversample = SMOTE()
X_train_os, y_train_os = oversample.fit_resample(X_train, y_train)

In [35]:
y_train_os.value_counts()

0    283537
1    283537
Name: HeartDiseaseYes, dtype: int64

In [36]:
models=[KNeighborsClassifier(),LogisticRegression(),XGBClassifier(),RandomForestClassifier(), GaussianNB()]
scores=dict()

performance_table = ['Model', 'Accuracy', 'Precision', 'Percentage_mislabbled'
                     'Total_Ones', 'Mislabble_Ones', 'Percent_Mislabbled_Ones', 'Recall_Score', 'F1_Score']

Model = []
Accuracy = []
Precision = []
Percentage_mislabbled = []
Percent_Mislabbled_zeroes =[]
Percent_Mislabbled_Ones =[]
Recall_Score =[]
F1_Score = []


for m in models:
    m.fit(X_train_os,y_train_os)
    y_pred=m.predict(X_test)
    percent_mislabbled = (((y_test != y_pred).sum())/X_test.shape[0])*100
    

    y_pred_df = pd.DataFrame(np.array(y_pred), columns= ['ypred'])
    y_test_df = pd.DataFrame(y_test.values, columns = ['y_test'])
    df_test = pd.concat([y_pred_df, y_test_df], axis=1)
    df_test1 = df_test[df_test['y_test'] == 1]
    df_test0 = df_test[df_test['y_test'] == 0]
    percent_how_many_ones_mislabbled = ((df_test1.y_test != df_test1.ypred).sum()) / (len(df_test[df_test['y_test'] == 1])) * 100
    percent_how_many_zeroes_mislabbled = ((df_test0.y_test != df_test0.ypred).sum()) / (len(df_test[df_test['y_test'] == 0])) * 100
    
    Model.append(m)    
    Accuracy.append(accuracy_score(y_test,y_pred))
    Precision.append(precision_score(y_test,y_pred))
    Percentage_mislabbled.append(percent_mislabbled)
    Percent_Mislabbled_zeroes.append(percent_how_many_zeroes_mislabbled)    
    Percent_Mislabbled_Ones.append(percent_how_many_ones_mislabbled)
    Recall_Score.append(recall_score(y_test,y_pred))
    F1_Score.append(f1_score(y_test,y_pred))

performance_table = {'Model':Model , 'Accuracy':Accuracy, 'Precision': Precision, 'Percentage_mislabbled':Percentage_mislabbled, 'Percent_Mislabbled_zeroes': Percent_Mislabbled_zeroes,
                                            'Percent_Mislabbled_Ones': Percent_Mislabbled_Ones , 'Recall_Score': Recall_Score, 
                                            'F1_Score' :F1_Score}

performance_table_oversample = pd.DataFrame(performance_table)
performance_table_oversample

# Class Weightage

In [None]:
classweight = {0: 1, 1: 9}

In [54]:
from sklearn.tree import DecisionTreeClassifier
models=[KNeighborsClassifier(),LogisticRegression(class_weight=classweight),RandomForestClassifier(class_weight=classweight), GaussianNB()]

scores=dict()

performance_table = ['Model', 'Accuracy', 'Precision', 'Percentage_mislabbled'
                                            'Total_Ones', 'Mislabble_Ones', 'Percent_Mislabbled_Ones', 'Recall_Score', 'F1_Score']

Model = []
Accuracy = []
Precision = []
Percentage_mislabbled = []
Percent_Mislabbled_zeroes =[]
Percent_Mislabbled_Ones =[]
Recall_Score =[]
F1_Score = []


for m in models:
    m.fit(X_train,y_train)
    y_pred=m.predict(X_test)
    percent_mislabbled = (((y_test != y_pred).sum())/X_test.shape[0])*100
    

    y_pred_df = pd.DataFrame(np.array(y_pred), columns= ['ypred'])
    y_test_df = pd.DataFrame(y_test.values, columns = ['y_test'])
    df_test = pd.concat([y_pred_df, y_test_df], axis=1)
    df_test1 = df_test[df_test['y_test'] == 1]
    df_test0 = df_test[df_test['y_test'] == 0]
    percent_how_many_ones_mislabbled = ((df_test1.y_test != df_test1.ypred).sum()) / (len(df_test[df_test['y_test'] == 1])) * 100
    percent_how_many_zeroes_mislabbled = ((df_test0.y_test != df_test0.ypred).sum()) / (len(df_test[df_test['y_test'] == 0])) * 100
    
    

    Model.append(m)    
    Accuracy.append(accuracy_score(y_test,y_pred))
    Precision.append(precision_score(y_test,y_pred))
    Percentage_mislabbled.append(percent_mislabbled)
    Percent_Mislabbled_zeroes.append(percent_how_many_zeroes_mislabbled)    
    Percent_Mislabbled_Ones.append(percent_how_many_ones_mislabbled)
    Recall_Score.append(recall_score(y_test,y_pred))
    F1_Score.append(f1_score(y_test,y_pred))

performance_table = {'Model':Model , 'Accuracy':Accuracy, 'Precision': Precision, 'Percentage_mislabbled':Percentage_mislabbled, 'Percent_Mislabbled_zeroes': Percent_Mislabbled_zeroes,
                                            'Percent_Mislabbled_Ones': Percent_Mislabbled_Ones , 'Recall_Score': Recall_Score, 
                                            'F1_Score' :F1_Score}

performance_table = pd.DataFrame(performance_table)
performance_table

Unnamed: 0,Model,Accuracy,Precision,Percentage_mislabbled,Percent_Mislabbled_zeroes,Percent_Mislabbled_Ones,Recall_Score,F1_Score
0,KNeighborsClassifier(),0.5176,0.883721,48.24,0.533333,95.946667,0.040533,0.077511
1,"LogisticRegression(class_weight={0: 1, 1: 100})",0.6256,0.573709,37.44,72.64,2.24,0.9776,0.723077
2,"(DecisionTreeClassifier(max_features='sqrt', r...",0.5256,0.803797,47.44,1.653333,93.226667,0.067733,0.124939
3,GaussianNB(),0.739,0.741414,26.1,25.6,26.6,0.734,0.737688


In [None]:
performance_table.to_csv("classweightage_performance.csv")

# SMOTETOMEK

In [None]:
from imblearn.combine import SMOTETomek
st = SMOTETomek(0.75)
X_train_st, y_train_st = st.fit_resample(X_train, y_train)

In [None]:
models=[LogisticRegression(),XGBClassifier(),RandomForestClassifier(), GaussianNB()]
scores=dict()

performance_table = ['Model', 'Accuracy', 'Precision', 'Percentage_mislabbled'
                                            'Total_Ones', 'Mislabble_Ones', 'Percent_Mislabbled_Ones', 'Recall_Score', 'F1_Score']

Model = []
Accuracy = []
Precision = []
Percentage_mislabbled = []
Percent_Mislabbled_zeroes =[]
Percent_Mislabbled_Ones =[]
Recall_Score =[]
F1_Score = []


for m in models:
    m.fit(X_train_st,y_train_st)
    y_pred=m.predict(X_test)
    percent_mislabbled = (((y_test != y_pred).sum())/X_test.shape[0])*100
    

    y_pred_df = pd.DataFrame(np.array(y_pred), columns= ['ypred'])
    y_test_df = pd.DataFrame(y_test.values, columns = ['y_test'])
    df_test = pd.concat([y_pred_df, y_test_df], axis=1)
    df_test1 = df_test[df_test['y_test'] == 1]
    df_test0 = df_test[df_test['y_test'] == 0]
    percent_how_many_ones_mislabbled = ((df_test1.y_test != df_test1.ypred).sum()) / (len(df_test[df_test['y_test'] == 1])) * 100
    percent_how_many_zeroes_mislabbled = ((df_test0.y_test != df_test0.ypred).sum()) / (len(df_test[df_test['y_test'] == 0])) * 100
    
    

    Model.append(m)    
    Accuracy.append(accuracy_score(y_test,y_pred))
    Precision.append(precision_score(y_test,y_pred))
    Percentage_mislabbled.append(percent_mislabbled)
    Percent_Mislabbled_zeroes.append(percent_how_many_zeroes_mislabbled)    
    Percent_Mislabbled_Ones.append(percent_how_many_ones_mislabbled)
    Recall_Score.append(recall_score(y_test,y_pred))
    F1_Score.append(f1_score(y_test,y_pred))

performance_table = {'Model':Model , 'Accuracy':Accuracy, 'Precision': Precision, 'Percentage_mislabbled':Percentage_mislabbled, 
                     'Percent_Mislabbled_zeroes': Percent_Mislabbled_zeroes,
                                            'Percent_Mislabbled_Ones': Percent_Mislabbled_Ones , 'Recall_Score': Recall_Score, 
                                            'F1_Score' :F1_Score}

performance_table = pd.DataFrame(performance_table)
performance_table