In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree

warnings.filterwarnings('ignore')

In [12]:
df=pd.read_csv("telco_customer.csv")
print(df.head())
print(df.shape)
print(df.columns)
print(df.describe())

   SeniorCitizen  tenure  MonthlyCharges Churn
0              0       1           29.85    No
1              0      34           56.95    No
2              0       2           53.85   Yes
3              0      45           42.30    No
4              0       2           70.70   Yes
(5517, 4)
Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'Churn'], dtype='object')
       SeniorCitizen       tenure  MonthlyCharges
count    5517.000000  5517.000000     5517.000000
mean        0.197571    32.875657       76.844254
std         0.398203    24.593647       21.924661
min         0.000000     0.000000       23.450000
25%         0.000000     9.000000       59.950000
50%         0.000000    30.000000       79.450000
75%         0.000000    56.000000       94.450000
max         1.000000    72.000000      118.750000


In [13]:
X=df[['SeniorCitizen', 'tenure',
       'MonthlyCharges']]
y=df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [19]:
clf = DecisionTreeClassifier(class_weight='balanced', random_state=42)
clf.fit(X_train, y_train)

In [20]:
y_pred=clf.predict(X_test)

In [21]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

Model Accuracy: 0.677536231884058


In [None]:
plt.figure(figsize=(12,8))
tree.plot_tree(clf, filled=True, feature_names=['SeniorCitizen', 'tenure',
       'MonthlyCharges'], class_names=['No Churn', 'Churn'])
plt.title('Decision Tree for Predicting Customer Churn')
plt.show()

In [26]:
from sklearn.metrics import classification_report, confusion_matrix

# Predict on test data
y_pred = clf.predict(X_test)

# Print confusion matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Print precision, recall, f1-score
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[863 260]
 [274 259]]

Classification Report:
               precision    recall  f1-score   support

          No       0.76      0.77      0.76      1123
         Yes       0.50      0.49      0.49       533

    accuracy                           0.68      1656
   macro avg       0.63      0.63      0.63      1656
weighted avg       0.68      0.68      0.68      1656



In [27]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(class_weight='balanced', random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


Confusion Matrix:
 [[941 182]
 [278 255]]

Classification Report:
               precision    recall  f1-score   support

          No       0.77      0.84      0.80      1123
         Yes       0.58      0.48      0.53       533

    accuracy                           0.72      1656
   macro avg       0.68      0.66      0.66      1656
weighted avg       0.71      0.72      0.71      1656



In [None]:
#We implemented a Decision Tree and a Random Forest classifier to predict customer churn. After evaluating both models using accuracy, precision, recall, and F1-score, we found that the Random Forest (with class weighting) outperformed the Decision Tree in terms of overall accuracy (72%) and precision for identifying actual churners. The model is slightly conservative in predicting churn but does so with higher confidence. Further improvements could include hyperparameter tuning, cross-validation, and SMOTE to address recall more deeply.