In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('churn.csv')
df.head()

Unnamed: 0,State,Account Length,Area Code,Phone,Int'l Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,...,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Churn?
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,...,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False.
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,...,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False.
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,...,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False.
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.9,...,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False.
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,...,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False.


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3333 entries, 0 to 3332
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   State           3333 non-null   object 
 1   Account Length  3333 non-null   int64  
 2   Area Code       3333 non-null   int64  
 3   Phone           3333 non-null   object 
 4   Int'l Plan      3333 non-null   object 
 5   VMail Plan      3333 non-null   object 
 6   VMail Message   3333 non-null   int64  
 7   Day Mins        3333 non-null   float64
 8   Day Calls       3333 non-null   int64  
 9   Day Charge      3333 non-null   float64
 10  Eve Mins        3333 non-null   float64
 11  Eve Calls       3333 non-null   int64  
 12  Eve Charge      3333 non-null   float64
 13  Night Mins      3333 non-null   float64
 14  Night Calls     3333 non-null   int64  
 15  Night Charge    3333 non-null   float64
 16  Intl Mins       3333 non-null   float64
 17  Intl Calls      3333 non-null   i

there is no null value

In [4]:
# Drop irrelevant columns
df = df.drop(['Phone'], axis=1)

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder


In [6]:
# Convert 'Churn?' to binary
df['Churn?'] = df['Churn?'].apply(lambda x: 1 if x.strip() == 'True.' else 0)

# Encode categorical features
label_enc = LabelEncoder()
for col in ['State', "Int'l Plan", 'VMail Plan']:
    df[col] = label_enc.fit_transform(df[col])


In [7]:
X = df.drop('Churn?', axis=1)
y = df['Churn?']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
clf = DecisionTreeClassifier(random_state=42, class_weight='balanced')
clf.fit(X_train, y_train)


In [9]:
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9190404797601199
              precision    recall  f1-score   support

           0       0.96      0.95      0.95       566
           1       0.72      0.75      0.74       101

    accuracy                           0.92       667
   macro avg       0.84      0.85      0.84       667
weighted avg       0.92      0.92      0.92       667



### 🔍 Model Insight

Used `DecisionTreeClassifier(class_weight='balanced')`.

- **Class 0 (No Churn)**: F1 = 0.95 → ✅ Excellent
- **Class 1 (Churn)**: F1 = 0.74 → ⚠️ Needs improvement

📌 The model handles non-churners well but struggles with churners. To improve generalization and stability, we’ll apply **cross-validation** in the next step.


In [10]:
from sklearn.model_selection import cross_val_score

# Define the classifier with class_weight='balanced'
clf = DecisionTreeClassifier(random_state=42, class_weight='balanced')

# Apply 5-Fold Cross-Validation
scores = cross_val_score(clf, X, y, cv=5, scoring='f1')

# Print results
print("Cross-validated F1 scores (churn class focus):", scores)
print("Mean F1 score:", np.mean(scores))


Cross-validated F1 scores (churn class focus): [0.73267327 0.68926554 0.77777778 0.62983425 0.7184466 ]
Mean F1 score: 0.7095994875826136


### 🔄 Cross-Validation Results

Using 5-fold cross-validation, the Decision Tree with `class_weight='balanced'` achieved a mean F1-score of **~0.71** for the churn class.  
This shows consistent, moderate performance across folds and highlights areas for further improvement.


In [11]:
# Model classes
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    GradientBoostingClassifier,
    AdaBoostClassifier,
    ExtraTreesClassifier,
    StackingClassifier
)
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# Preprocessing and pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Model evaluation and selection
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, f1_score

# Utilities
from scipy.stats import mode


In [12]:
base_models = [
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=1)),
    ('rf', RandomForestClassifier(n_estimators=50, random_state=2)),
    ('gb', GradientBoostingClassifier(n_estimators=50, random_state=3)),
    ('ada', AdaBoostClassifier(n_estimators=50, random_state=4)),
    ('et', ExtraTreesClassifier(n_estimators=50, random_state=5)),
    ('lr', LogisticRegression(max_iter=1000, random_state=6)),
    ('nb', GaussianNB()),
    ('knn', KNeighborsClassifier(n_neighbors=5)),
    ('svc', SVC(probability=True, random_state=7)),
    ('sgd', SGDClassifier(max_iter=1000, tol=1e-3, random_state=8))
]

In [13]:
# Identify categorical and numeric columns in X
categorical_cols = [c for c in X.columns if X[c].dtype == 'object']
numeric_cols = [c for c in X.columns if X[c].dtype != 'object']

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

model_f1_scores = []

for name, model in base_models:
    y_true_all = []
    y_pred_all = []
    print(f"\n===== Model: {name} =====")

    # Define preprocessing pipeline per model
    preprocessor = ColumnTransformer(transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', model)
    ])

    for train_idx, test_idx in kf.split(X, y):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        y_true_all.extend(y_test)
        y_pred_all.extend(y_pred)

    print(classification_report(y_true_all, y_pred_all, target_names=['No Churn', 'Churn']))

    # Calculate and store mean F1 for churn class (1)
    f1 = f1_score(y_true_all, y_pred_all, pos_label=1)
    model_f1_scores.append((name, f1))

# Sort by F1-score descending
model_f1_scores.sort(key=lambda x: x[1], reverse=True)

print("\n=== Models Ranked by Mean F1-Score (Churn Class) ===")
for name, f1 in model_f1_scores:
    print(f"{name}: {f1:.4f}")


===== Model: dt =====
              precision    recall  f1-score   support

    No Churn       0.95      0.98      0.96      2850
       Churn       0.84      0.70      0.76       483

    accuracy                           0.94      3333
   macro avg       0.89      0.84      0.86      3333
weighted avg       0.93      0.94      0.93      3333


===== Model: rf =====
              precision    recall  f1-score   support

    No Churn       0.95      0.99      0.97      2850
       Churn       0.93      0.71      0.80       483

    accuracy                           0.95      3333
   macro avg       0.94      0.85      0.89      3333
weighted avg       0.95      0.95      0.95      3333


===== Model: gb =====
              precision    recall  f1-score   support

    No Churn       0.95      0.99      0.97      2850
       Churn       0.92      0.72      0.81       483

    accuracy                           0.95      3333
   macro avg       0.94      0.85      0.89      3333
weigh

selected base model:\
gb: 0.8065
rf: 0.8024
dt: 0.7607
et: 0.7120

In [17]:
# Load and preprocess data
df = pd.read_csv("churn.csv")
df.rename(columns={'Churn?': 'Churn'}, inplace=True)
X = df.drop(columns=['Churn', 'State', 'Phone'])
y = LabelEncoder().fit_transform(df['Churn'])

categorical_cols = [c for c in X.columns if X[c].dtype == 'object']
numeric_cols = [c for c in X.columns if X[c].dtype != 'object']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=21
)

X_train = preprocessor.fit_transform(X_train_raw)
X_test = preprocessor.transform(X_test_raw)

base_models = [
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=1)),
    ('rf', RandomForestClassifier(n_estimators=50, random_state=2)),
    ('gb', GradientBoostingClassifier(n_estimators=50, random_state=3)),
    #('ada', AdaBoostClassifier(n_estimators=50, random_state=4)),
    ('et', ExtraTreesClassifier(n_estimators=50, random_state=5)),
    #('lr', LogisticRegression(max_iter=1000, random_state=6)),
    #('nb', GaussianNB()),
    #('knn', KNeighborsClassifier(n_neighbors=5)),
    #('svc', SVC(probability=True, random_state=7)),
    #('sgd', SGDClassifier(max_iter=1000, tol=1e-3, random_state=8))
]

# Define 5 meta-models
meta_models = {
    'naive_bayes': GaussianNB(),
    'knn': KNeighborsClassifier(),
    'svm': SVC(probability=True, random_state=42),
    'logistic_regression': LogisticRegression(max_iter=1000, random_state=42),
    'random_forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

# Train stacking models with different meta-models and collect predictions
predictions = []

for name, meta in meta_models.items():
    stacking = StackingClassifier(
        estimators=base_models,
        final_estimator=meta,
        cv=5,
        n_jobs=-1,
        passthrough=False
    )
    stacking.fit(X_train, y_train)
    preds = stacking.predict(X_test)
    predictions.append(preds)
    print(f"\nMeta-model: {name}")
    print(classification_report(y_test, preds))

# Convert to numpy array (shape: 5 x n_samples)
predictions = np.array(predictions)

# Majority vote across meta-model predictions for each sample
final_preds, _ = mode(predictions, axis=0)
final_preds = final_preds.flatten()

print("\nFinal Majority Vote Ensemble Classification Report:")
print(classification_report(y_test, final_preds))




Meta-model: naive_bayes
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       855
           1       0.73      0.84      0.78       145

    accuracy                           0.93      1000
   macro avg       0.85      0.89      0.87      1000
weighted avg       0.94      0.93      0.93      1000


Meta-model: knn
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       855
           1       0.82      0.79      0.81       145

    accuracy                           0.94      1000
   macro avg       0.89      0.88      0.89      1000
weighted avg       0.94      0.94      0.94      1000


Meta-model: svm
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       855
           1       0.83      0.81      0.82       145

    accuracy                           0.95      1000
   macro avg       0.90      0.89      0.90      1000
weighted avg   

    'naive_bayes': GaussianNB(),
    'logistic_regression': LogisticRegression(max_iter=1000,random_state=42)
    'svm': SVC(probability=True, random_state=42) these 3 are best meta model


In [18]:
# Define 3 meta-models
meta_models = {
    'naive_bayes': GaussianNB(),
    #'knn': KNeighborsClassifier(),
    'svm': SVC(probability=True, random_state=42),
    'logistic_regression': LogisticRegression(max_iter=1000, random_state=42),
    #'random_forest': RandomForestClassifier(n_estimators=100, random_state=42)
}

# Train stacking models with different meta-models and collect predictions
predictions = []

for name, meta in meta_models.items():
    stacking = StackingClassifier(
        estimators=base_models,
        final_estimator=meta,
        cv=5,
        n_jobs=-1,
        passthrough=False
    )
    stacking.fit(X_train, y_train)
    preds = stacking.predict(X_test)
    predictions.append(preds)
    print(f"\nMeta-model: {name}")
    print(classification_report(y_test, preds))

# Convert to numpy array (shape: 3 x n_samples)
predictions = np.array(predictions)

# Majority vote across meta-model predictions for each sample
final_preds, _ = mode(predictions, axis=0)
final_preds = final_preds.flatten()

print("\nFinal Majority Vote Ensemble Classification Report:")
print(classification_report(y_test, final_preds))




Meta-model: naive_bayes
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       855
           1       0.73      0.84      0.78       145

    accuracy                           0.93      1000
   macro avg       0.85      0.89      0.87      1000
weighted avg       0.94      0.93      0.93      1000


Meta-model: svm
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       855
           1       0.83      0.81      0.82       145

    accuracy                           0.95      1000
   macro avg       0.90      0.89      0.90      1000
weighted avg       0.95      0.95      0.95      1000


Meta-model: logistic_regression
              precision    recall  f1-score   support

           0       0.97      0.98      0.97       855
           1       0.85      0.79      0.82       145

    accuracy                           0.95      1000
   macro avg       0.91      0.88      0.89      1000

In [19]:
# Store predicted probabilities from each meta-model
meta_probs = []

for name, meta in meta_models.items():
    stacking = StackingClassifier(
        estimators=base_models,
        final_estimator=meta,
        cv=5,
        n_jobs=-1,
        passthrough=False
    )
    stacking.fit(X_train, y_train)

    # Predict probability of positive class (churn=1)
    probs = stacking.predict_proba(X_test)[:, 1]
    meta_probs.append(probs)

    preds = stacking.predict(X_test)
    print(f"\nMeta-model: {name}")
    print(classification_report(y_test, preds))

# Convert meta-model probabilities to numpy array: shape (3, n_samples)
meta_probs = np.array(meta_probs)

# Average predicted probabilities across meta-models (soft voting)
avg_probs = np.mean(meta_probs, axis=0)

# Final binary prediction with 0.5 threshold
final_preds = (avg_probs >= 0.5).astype(int)

print("\nFinal Ensemble (Soft Voting) Classification Report:")
print(classification_report(y_test, final_preds))


Meta-model: naive_bayes
              precision    recall  f1-score   support

           0       0.97      0.95      0.96       855
           1       0.73      0.84      0.78       145

    accuracy                           0.93      1000
   macro avg       0.85      0.89      0.87      1000
weighted avg       0.94      0.93      0.93      1000


Meta-model: svm
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       855
           1       0.83      0.81      0.82       145

    accuracy                           0.95      1000
   macro avg       0.90      0.89      0.90      1000
weighted avg       0.95      0.95      0.95      1000


Meta-model: logistic_regression
              precision    recall  f1-score   support

           0       0.97      0.98      0.97       855
           1       0.85      0.79      0.82       145

    accuracy                           0.95      1000
   macro avg       0.91      0.88      0.89      1000