In [2]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [4]:
df=pd.read_csv("tel_churn.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,0,29.85,29.85,0,True,False,False,True,True,...,False,False,True,False,True,False,False,False,False,False
1,1,0,56.95,1889.5,0,False,True,True,False,True,...,False,False,False,True,False,False,True,False,False,False
2,2,0,53.85,108.15,1,False,True,True,False,True,...,False,False,False,True,True,False,False,False,False,False
3,3,0,42.3,1840.75,0,False,True,True,False,True,...,True,False,False,False,False,False,False,True,False,False
4,4,0,70.7,151.65,1,True,False,True,False,True,...,False,False,True,False,True,False,False,False,False,False


In [6]:
df=df.drop('Unnamed: 0',axis=1)

In [8]:
x=df.drop('Churn',axis=1)
y=df['Churn']

In [10]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

## test Decision Tree Classifier

In [13]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [15]:
model_dt.fit(x_train,y_train)

In [17]:
y_pred=model_dt.predict(x_test)
y_pred

array([0, 0, 0, ..., 0, 0, 0], shape=(1407,))

In [19]:
model_dt.score(x_test,y_test)

0.7995735607675906

In [21]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.85      0.89      0.87      1044
           1       0.63      0.53      0.58       363

    accuracy                           0.80      1407
   macro avg       0.74      0.71      0.72      1407
weighted avg       0.79      0.80      0.79      1407



In [23]:
# this looks like imbalanced dataset so use smote tecniqe

In [27]:
from imblearn.combine import SMOTEENN

sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x, y)


In [33]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [35]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [37]:

model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.9255499153976311
              precision    recall  f1-score   support

           0       0.92      0.91      0.91       522
           1       0.93      0.94      0.93       660

    accuracy                           0.93      1182
   macro avg       0.93      0.92      0.92      1182
weighted avg       0.93      0.93      0.93      1182



In [39]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[473  49]
 [ 39 621]]


## test Random Forest Classifier

In [42]:
from sklearn.ensemble import RandomForestClassifier

In [46]:
model_rf=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [48]:
model_rf.fit(x_train,y_train)

In [50]:
y_pred=model_rf.predict(x_test)

In [52]:
model_rf.score(x_test,y_test)

0.8187633262260128

In [54]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.84      0.93      0.88      1044
           1       0.71      0.50      0.59       363

    accuracy                           0.82      1407
   macro avg       0.78      0.72      0.74      1407
weighted avg       0.81      0.82      0.81      1407



In [63]:
from imblearn.combine import SMOTEENN

sm = SMOTEENN()
X_resampled1, y_resampled1 = sm.fit_resample(x, y)

In [65]:
xr_train1,xr_test1,yr_train1,yr_test1=train_test_split(X_resampled1, y_resampled1,test_size=0.2)

In [67]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [69]:
model_rf_smote.fit(xr_train1,yr_train1)

In [71]:
yr_predict1 = model_rf_smote.predict(xr_test1)

In [73]:
model_score_r1 = model_rf_smote.score(xr_test1, yr_test1)

In [75]:
print(model_score_r1)
print(metrics.classification_report(yr_test1, yr_predict1))

0.9344262295081968
              precision    recall  f1-score   support

           0       0.96      0.89      0.92       514
           1       0.92      0.97      0.94       645

    accuracy                           0.93      1159
   macro avg       0.94      0.93      0.93      1159
weighted avg       0.94      0.93      0.93      1159



In [77]:
print(metrics.confusion_matrix(yr_test1, yr_predict1))

[[459  55]
 [ 21 624]]


In [79]:
import pickle

In [81]:
filename = 'model.sav'

In [83]:
pickle.dump(model_rf_smote, open(filename, 'wb'))

In [85]:
load_model = pickle.load(open(filename, 'rb'))

In [87]:
model_score_r1 = load_model.score(xr_test1, yr_test1)

In [89]:
model_score_r1

0.9344262295081968