In [60]:
from Datapipeline import Datapipeline, print_score
import numpy as np
import pandas as pd
from statistics import mean
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score

In [61]:
df = pd.read_csv('../train.csv')
labels = df['Churn']
df = df.drop('Churn', axis='columns')
pl = Datapipeline()

In [62]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
scores_acc = []
scores_pre = []
scores_recall = []
scores_f1 = []
models = []
  
for train_index, test_index in skf.split(df, labels):
    x_train_fold, x_test_fold = df.iloc[train_index], df.iloc[test_index]
    y_train_fold, y_test_fold = labels.iloc[train_index], labels.iloc[test_index]
    model = pl.fit(x_train_fold, y_train_fold)
    models.append(model)
    pred = pl.predict(x_test_fold)
    scores_acc.append(accuracy_score(pred, y_test_fold))
    scores_pre.append(precision_score(pred, y_test_fold))
    scores_recall.append(recall_score(pred, y_test_fold))
    scores_f1.append(f1_score(pred, y_test_fold))

In [63]:
for i in range(len(models)):
    print("Score in fold {}:\n\tAcc:{:.2f}\tPre:{:.2f}\tRecall:{:.2f}\tF1:{:.2f}".format(i,scores_acc[i], scores_pre[i], scores_recall[i], scores_f1[i]))

Score in fold 0:
	Acc:0.96	Pre:0.89	Recall:0.87	F1:0.88
Score in fold 1:
	Acc:0.95	Pre:0.90	Recall:0.84	F1:0.87
Score in fold 2:
	Acc:0.96	Pre:0.88	Recall:0.88	F1:0.88
Score in fold 3:
	Acc:0.94	Pre:0.84	Recall:0.79	F1:0.82
Score in fold 4:
	Acc:0.97	Pre:0.92	Recall:0.89	F1:0.90
Score in fold 5:
	Acc:0.96	Pre:0.89	Recall:0.90	F1:0.89
Score in fold 6:
	Acc:0.94	Pre:0.84	Recall:0.83	F1:0.84
Score in fold 7:
	Acc:0.95	Pre:0.82	Recall:0.86	F1:0.84
Score in fold 8:
	Acc:0.93	Pre:0.85	Recall:0.78	F1:0.81
Score in fold 9:
	Acc:0.95	Pre:0.87	Recall:0.84	F1:0.86


In [64]:
print("Mean score:\n\tAcc:{:.2f}\tPre:{:.2f}\tRecall:{:.2f}\tF1:{:.2f}".format(mean(scores_acc), mean(scores_pre), mean(scores_recall), mean(scores_f1)))

Mean score:
	Acc:0.95	Pre:0.87	Recall:0.85	F1:0.86


In [65]:
df_test = pd.read_csv('../test.csv')
labels_test = df_test['Churn']
customers_test = df_test.drop('Churn', axis='columns')

model_max = models[np.argmax(scores_f1)]
pred_test = model_max.predict(customers_test)

In [66]:
print_score(labels_test, pred_test)

Accuracy 0.9578152753108348
Precision 0.8796791443850267
Recall 0.8680738786279684
F1 0.8738379814077025
