# Decision Tree Classifier for Telco Customer Churn

This notebook implements a **Decision Tree Classifier** as the third modeling method for the final project. We will use the cleaned dataset to predict customer churn.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

# Set plot style
sns.set(style="whitegrid")

## 1. Load Data

In [None]:
# Load the cleaned dataset
df = pd.read_csv('telco_customer_churn_cleaned.csv')

# Display first few rows
df.head()

## 2. Data Preprocessing
We need to convert categorical variables into numeric format for the Decision Tree.

In [None]:
# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns

# Initialize LabelEncoder
le = LabelEncoder()

# Apply Label Encoding to all categorical columns
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

# Verify changes
df.head()

## 3. Train-Test Split

In [None]:
# Define features (X) and target (y)
# We drop 'customerID' as it's not a predictor, and 'Churn' as it's the target
X = df.drop(['customerID', 'Churn'], axis=1)
y = df['Churn']

# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

## 4. Model Building

In [None]:
# Initialize Decision Tree Classifier
# We limit max_depth to prevent overfitting and make the tree interpretable
dt_model = DecisionTreeClassifier(max_depth=5, random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

## 5. Model Evaluation

In [None]:
# Make predictions on test set
y_pred = dt_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Decision Tree')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

## 6. Feature Importance

In [None]:
# Get feature importances
importances = dt_model.feature_importances_
feature_names = X.columns

# Create a DataFrame
feature_imp_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
feature_imp_df = feature_imp_df.sort_values(by='Importance', ascending=False)

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_imp_df.head(10), palette='viridis')
plt.title('Top 10 Feature Importances - Decision Tree')
plt.show()

## 7. Tree Visualization (Optional)
Visualizing the top levels of the tree to understand the decision logic.

In [None]:
plt.figure(figsize=(20, 10))
plot_tree(dt_model, feature_names=X.columns, class_names=['No Churn', 'Churn'], filled=True, max_depth=3, fontsize=10)
plt.title('Decision Tree Visualization (Top 3 Levels)')
plt.show()