**Importing Libraries**

In [None]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from imblearn.combine import SMOTEENN

In [None]:
from google.colab import files
uploaded = files.upload()

**Loading the dataset**

In [None]:
df=pd.read_csv("tel_churn.csv", index_col=0)
df.head()

Unnamed: 0,SeniorCitizen,MonthlyCharges,TotalCharges,Churn,gender_Female,gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_1 - 12,tenure_group_13 - 24,tenure_group_25 - 36,tenure_group_37 - 48,tenure_group_49 - 60,tenure_group_61 - 72
0,0,29.85,29.85,0,1,0,0,1,1,0,...,0,0,1,0,1,0,0,0,0,0
1,0,56.95,1889.5,0,0,1,1,0,1,0,...,0,0,0,1,0,0,1,0,0,0
2,0,53.85,108.15,1,0,1,1,0,1,0,...,0,0,0,1,1,0,0,0,0,0
3,0,42.3,1840.75,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,1,0,0
4,0,70.7,151.65,1,1,0,1,0,1,0,...,0,0,1,0,1,0,0,0,0,0


In [None]:
x=df.drop('Churn',axis=1)
x.shape

(7032, 50)

In [None]:
y=df['Churn']

In [None]:
#Splitting the dataset
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

# Decision Tree Classifier

In [None]:
model_dt=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [None]:
model_dt.fit(x_train,y_train)

In [None]:
y_pred=model_dt.predict(x_test)
y_pred

array([0, 1, 0, ..., 0, 0, 0])

In [None]:
#Accuracy
model_dt.score(x_test,y_test)

0.7924662402274343

In [None]:
print(classification_report(y_test, y_pred, labels=[0,1]))

              precision    recall  f1-score   support

           0       0.84      0.89      0.86      1026
           1       0.64      0.54      0.59       381

    accuracy                           0.79      1407
   macro avg       0.74      0.71      0.72      1407
weighted avg       0.78      0.79      0.79      1407



As it's an imbalanced dataset, we shouldn't consider Accuracy as our metrics to measure the model, because Accuracy is cursed in imbalanced datasets.
Hence, we need to check recall, precision & f1 score for the minority class, and it's quite evident that the precision, recall & f1 score is too low for Class 1, i.e. churned customers.

**SMOTEENN** (UpSampling + ENN)

In [None]:
sm = SMOTEENN()
X_resampled, y_resampled = sm.fit_resample(x,y)


In [None]:
xr_train,xr_test,yr_train,yr_test=train_test_split(X_resampled, y_resampled,test_size=0.2)

In [None]:
model_dt_smote=DecisionTreeClassifier(criterion = "gini",random_state = 100,max_depth=6, min_samples_leaf=8)

In [None]:
model_dt_smote.fit(xr_train,yr_train)
yr_predict = model_dt_smote.predict(xr_test)
model_score_r = model_dt_smote.score(xr_test, yr_test)
print(model_score_r)
print(metrics.classification_report(yr_test, yr_predict))

0.9459002535925612
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       551
           1       0.94      0.96      0.95       632

    accuracy                           0.95      1183
   macro avg       0.95      0.94      0.95      1183
weighted avg       0.95      0.95      0.95      1183



In [None]:
print(metrics.confusion_matrix(yr_test, yr_predict))

[[512  39]
 [ 25 607]]


Now we can see quite better results, i.e. Accuracy: 95 %, and a very good recall, precision & f1 score for minority class.
Let's try with some other classifier.

# Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model_rf_smote=RandomForestClassifier(n_estimators=100, criterion='gini', random_state = 100,max_depth=6, min_samples_leaf=8)

In [None]:
model_rf_smote.fit(xr_train,yr_train)

In [None]:
yr_predict1 = model_rf_smote.predict(xr_test)
model_score_r1 = model_rf_smote.score(xr_test, yr_test)

print(model_score_r1)
print(metrics.classification_report(yr_test, yr_predict))

0.9298393913778529
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       551
           1       0.94      0.96      0.95       632

    accuracy                           0.95      1183
   macro avg       0.95      0.94      0.95      1183
weighted avg       0.95      0.95      0.95      1183



In [None]:
#Confusion matrix
print(metrics.confusion_matrix(yr_test, yr_predict))

[[512  39]
 [ 25 607]]


Decision tree Classifier has given higher Accuracy than the random forest Classifier