In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import os
%matplotlib inline

# Check working directory
print("Working directory:", os.getcwd())


In [None]:
# Load the dataset (make sure customer_churn.csv is in the same folder)
df = pd.read_csv("customer_churn.csv")
df.head()


In [None]:
df.drop(columns=['CustomerID'], inplace=True)


In [None]:
label_cols = ['Gender', 'Subscription Type', 'Contract Length']
le = LabelEncoder()
for col in label_cols:
    df[col] = le.fit_transform(df[col])
df.head()


In [None]:
X = df.drop("Churn", axis=1)
y = df["Churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)


In [None]:
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
plt.figure(figsize=(16, 8))
plot_tree(clf, feature_names=X.columns, class_names=['Not Churned', 'Churned'], filled=True, rounded=True)
plt.title("Decision Tree - Customer Churn")
plt.show()


In [None]:
importance = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)
plt.figure(figsize=(8, 4))
sns.barplot(x=importance, y=importance.index)
plt.title("Feature Importance")
plt.xlabel("Importance Score")
plt.show()


In [None]:
pruned_tree = DecisionTreeClassifier(max_depth=3, random_state=42)
pruned_tree.fit(X_train, y_train)
y_pruned_pred = pruned_tree.predict(X_test)
print("Pruned Tree Accuracy:", accuracy_score(y_test, y_pruned_pred))
