In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score, fbeta_score, confusion_matrix, ConfusionMatrixDisplay

from aml import AutoMLClassifier

import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


In [2]:
# sample data
cols = ['Attrition_Flag', 'Customer_Age', 'Gender',
       'Dependent_count', 'Education_Level', 'Marital_Status',
       'Income_Category', 'Card_Category', 'Months_on_book',
       'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio']

churn_data = pd.read_csv('data/BankChurners.csv', usecols=cols)

In [3]:
#churn_data.info()

In [4]:
label = 'Attrition_Flag'

y = churn_data[label]
X = churn_data.drop(label, axis=1)

# convert to list
labels = list(y.unique())
y = y.apply(lambda x: labels.index(x))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,random_state=1234, test_size=0.2)

In [8]:
def show_test_scores(m):
    for pipe, *_ in m.best_models:
        print("Model Type:", str(type(pipe[-1]))) #last step is a classifier
        y_pred = pipe.predict(X_test)
        roc_auc = roc_auc_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        print("ROC AUC: ", roc_auc)
        print("Precision: ", precision)
        print("Recall :", recall)
        print("F1 score:", 2*precision*recall/(precision+recall))
        print()

In [11]:
aml = AutoMLClassifier('roc_auc_score', 300, try_LR=True, try_DT=False, try_RF=False, try_GB=False, try_SVC=False, try_KM=False)

In [12]:
%%time
aml.fit(X_train, y_train)

CPU times: user 5.9 s, sys: 689 ms, total: 6.59 s
Wall time: 24.6 s
        nan        nan        nan        nan 0.84586           nan
        nan        nan        nan        nan        nan        nan
 0.84882509        nan 0.84882509        nan        nan        nan
        nan        nan        nan        nan 0.84882509 0.84844195
        nan        nan 0.84882509 0.84882509        nan        nan
        nan        nan 0.84875156 0.84844195        nan        nan
        nan 0.8484419         nan        nan        nan        nan
        nan        nan 0.84867945        nan        nan        nan
        nan        nan        nan 0.84882509        nan        nan
 0.84882509        nan        nan        nan        nan        nan
 0.84875156        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.84875156        nan        nan        nan        nan        nan
 0.84882509 0.84792719        nan        nan        nan      

In [13]:
show_test_scores(aml)

Model Type: <class 'sklearn.linear_model._logistic.LogisticRegression'>
ROC AUC:  0.854049654049654
Precision:  0.5175600739371534
Recall : 0.8615384615384616
F1 score: 0.6466512702078523

