In [1]:
#Add needed imports
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder
from sklearn.dummy import DummyClassifier
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix, precision_score, recall_score,f1_score
from sklearn.tree import DecisionTreeClassifier  
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn import svm

import os
#Read data
proccessed_data_path =os.path.join(os.path.pardir,os.path.pardir,'data','processed')
train_path = os.path.join(proccessed_data_path,'dataset8.csv')
df = pd.read_csv(train_path)
labels=df['Churn']
x = df.drop(columns=['Churn'],axis = 'columns')
y=np.ravel(labels)
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)
oversample = SMOTE()
x_train, y_train = oversample.fit_resample(x_train, y_train)

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [4]:
svm_model = svm.SVC(gamma='auto')
rf_model=RandomForestClassifier()
dt_model=DecisionTreeClassifier(criterion='entropy',max_depth = 7,min_samples_leaf=30) 
lr_model= LogisticRegression(random_state=0)
mlp_model =MLPClassifier(activation='relu', solver='sgd',learning_rate='adaptive')
xgb_model = XGBClassifier( learning_rate=0.05, max_depth=7)
gmb_model= GradientBoostingClassifier(n_estimators=20,learning_rate=0.75,max_features=4,max_depth=5,random_state=0)

model_params = {
    'svm': {
        'model': svm_model,
        'params' : {
            'C': [15,10],
            'kernel': ['rbf','linear']
        }  
     },
    'rf': {
        'model': rf_model,
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
     'dt': {
        'model': dt_model,
        'params' : {}
    },
    'lr' : {
        'model':lr_model,
        'params': {
            'C': [1,5,10]
        }
    },
    'mlp' : {
        'model':mlp_model,
        'params': {}
    },
    'xg_boost' : {
        'model':xgb_model,
        'params': {}
    },
    'gbm' : {
        'model':gmb_model,
        'params': {}
    }
}

In [5]:
scores = []
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=100)
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=cv, return_train_score=False)
    clf.fit(x_train,y_train)
    conf_matrix =confusion_matrix(y_test,clf.predict(x_test))
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_,
        'precision':precision_score(y_test,clf.predict(x_test)),
        'recall':recall_score(y_test,clf.predict(x_test)),
        'f1_score':f1_score(y_test,clf.predict(x_test)),
        'true positives':conf_matrix[0][0],
        'true negatives':conf_matrix[1][1],
        'false postives':conf_matrix[0][1],
        'false negatives':conf_matrix[1][0]
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','precision','recall','f1_score','true positives','true negatives','false postives','false negatives','best_params'])
print(df)



      model  best_score  precision    recall  f1_score  true positives  \
0       svm    0.925954   0.433824  0.517544  0.472000             709   
1        rf    0.918641   0.634146  0.684211  0.658228             741   
2        dt    0.851255   0.662069  0.842105  0.741313             737   
3        lr    0.759283   0.271845  0.736842  0.397163             561   
4       mlp    0.826576   0.423423  0.824561  0.559524             658   
5  xg_boost    0.925490   0.716418  0.842105  0.774194             748   
6       gbm    0.903908   0.516340  0.692982  0.591760             712   

   true negatives  false postives  false negatives                 best_params  
0              59              77               55  {'C': 15, 'kernel': 'rbf'}  
1              78              45               36        {'n_estimators': 10}  
2              96              49               18                          {}  
3              84             225               30                   {'C': 10}  
4 