In [2]:
#Add needed imports
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder
from sklearn.dummy import DummyClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix, precision_score, recall_score,f1_score
from sklearn.tree import DecisionTreeClassifier  
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn import svm

import os
#Read data
proccessed_data_path =os.path.join(os.path.pardir,os.path.pardir,'data','processed')
train_path = os.path.join(proccessed_data_path,'dataset7.csv')
df = pd.read_csv(train_path)
labels=df['Churn']
x = df.drop(columns=['Churn'],axis = 'columns')
y=np.ravel(labels)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)
oversample = SMOTE()
x_train, y_train = oversample.fit_resample(x_train, y_train)

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [5]:
svm_model = svm.SVC(gamma='auto')
rf_model=RandomForestClassifier()
dt_model=DecisionTreeClassifier(criterion='entropy',max_depth = 7,min_samples_leaf=30) 
lr_model= LogisticRegression(random_state=0)
mlp_model =MLPClassifier(activation='relu', solver='sgd',learning_rate='adaptive')
xgb_model = XGBClassifier( learning_rate=0.05, max_depth=7)
gmb_model= GradientBoostingClassifier(n_estimators=20,learning_rate=0.75,max_features=4,max_depth=5,random_state=0)

model_params = {
    'svm': {
        'model': svm_model,
        'params' : {
            'C': [15,10],
            'kernel': ['rbf','linear']
        }  
     },
    'rf': {
        'model': rf_model,
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
     'dt': {
        'model': dt_model,
        'params' : {}
    },
    'lr' : {
        'model':lr_model,
        'params': {
            'C': [1,5,10]
        }
    },
    'mlp' : {
        'model':mlp_model,
        'params': {}
    },
    'xg_boost' : {
        'model':xgb_model,
        'params': {}
    },
    'gbm' : {
        'model':gmb_model,
        'params': {}
    }
}

In [6]:
scores = []
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=100)
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=cv, return_train_score=False)
    clf.fit(x_train,y_train)
    conf_matrix =confusion_matrix(y_test,clf.predict(x_test))
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_,
        'precision':precision_score(y_test,clf.predict(x_test)),
        'recall':recall_score(y_test,clf.predict(x_test)),
        'f1_score':f1_score(y_test,clf.predict(x_test)),
        'true positives':conf_matrix[0][0],
        'true negatives':conf_matrix[1][1],
        'false postives':conf_matrix[0][1],
        'false negatives':conf_matrix[1][0]
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','precision','recall','f1_score','true positives','true negatives','false postives','false negatives','best_params'])
print(df)



      model  best_score  precision    recall  f1_score  true positives  \
0       svm    0.707143   0.686441  0.810000  0.743119             189   
1        rf    0.698571   0.780000  0.650000  0.709091             245   
2        dt    0.692381   0.708609  0.713333  0.710963             212   
3        lr    0.708333   0.710145  0.816667  0.759690             200   
4       mlp    0.690714   0.689150  0.783333  0.733229             194   
5  xg_boost    0.723095   0.777049  0.790000  0.783471             232   
6       gbm    0.695000   0.740864  0.743333  0.742097             222   

   true negatives  false postives  false negatives  \
0             243             111               57   
1             195              55              105   
2             214              88               86   
3             245             100               55   
4             235             106               65   
5             237              68               63   
6             223            