In [161]:
from decision_tree import DecisionTreeModel
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import f1_score, roc_auc_score
import pandas as pd
import numpy as np

In [162]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [163]:
decision_tree_model = DecisionTreeModel(name='DecisionTree')
print(decision_tree_model)
decision_tree_model._is_train()

DecisionTree


False

In [164]:
df = pd.read_csv('../data/WA_Fn-UseC_-Telco-Customer-Churn-encoded.csv')
df.shape

(7043, 41)

In [165]:
estimator = DecisionTreeModel().model
cross_val_score(estimator, df.iloc[:,:-1], df.iloc[:,-1], cv = 5, scoring='roc_auc').mean()

0.6558016438696729

In [166]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1], df.iloc[:,-1], test_size=0.2, random_state=42)

In [167]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (5634, 40)
X_test shape: (1409, 40)
y_train shape: (5634,)
y_test shape: (1409,)


In [168]:
decision_tree_model.fit(X_train, y_train)
y_pred = decision_tree_model.predict(X_test)

roc_auc_score(y_pred, y_test)

0.633155716207303

In [169]:
grid_model = DecisionTreeModel("Model with grid search")
grid_search_parameters = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': range(3, 10),
    'min_samples_split': range(5, 10),
    'min_impurity_decrease': np.linspace(1e-4,1,10)
}
grid_model.gs_parameter_tune(df.iloc[:,:-1], df.iloc[:,-1], grid_search_parameters, scoring='f1_macro')

Best Parameters: {'criterion': 'gini', 'max_depth': 5, 'min_impurity_decrease': 0.0001, 'min_samples_split': 5}
Validation Accuracy: 0.7302465000830413


In [170]:
hyper_model = DecisionTreeModel("Model with hyper parameters")
hyper_params = {'criterion': 'log_loss', 'max_depth': 5, 'min_samples_split': 2}
hyper_model.hyper_parameter(hyper_params)
hyper_model.fit(X_train, y_train)

y_pred_hyper = hyper_model.predict(X_test)

f1_score(y_pred_hyper, y_test)

0.6180836707152496