In [3]:
import pandas as pd

data = pd.read_csv("churn.csv")
data.head()

Unnamed: 0,CustomerID,Age,Subscription_Duration_Months,Contract_Type,Monthly_Logins,Last_Purchase_Days_Ago,App_Usage_Time_Min,Monthly_Spend,Discount_Usage_Percentage,Customer_Support_Calls,Satisfaction_Score,Is_Churn
0,CUST-00001,35,25,Monthly,14,16,9.0,171.14,0.1,3,3,0
1,CUST-00002,50,33,Annual,13,27,26.1,28.98,0.33,1,4,0
2,CUST-00003,58,16,Monthly,10,11,30.5,229.94,0.33,1,5,0
3,CUST-00004,56,50,Monthly,7,3,18.5,46.95,0.15,0,5,0
4,CUST-00005,35,43,Monthly,13,14,27.7,168.75,0.32,1,5,0


# Define features and target
            


In [12]:
X = data.drop("Is_Churn", axis=1)   # independent variables
y = data["Is_Churn"]                # target variable


####ðŸ”¹ Step 3: Split into Train/Test Sets

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


### ðŸ”¹ Step 4: Preprocess Data


In [14]:
numeric_features = X.select_dtypes(include=['int64','float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns


In [15]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])


### ðŸ”¹ Step 5: Build a Baseline Model

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

model.fit(X_train, y_train)
y_pred = model.predict(X_test)


### ðŸ”¹ Step 6: Evaluate the Model

In [17]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9386666666666666
Precision: 0.7582938388625592
Recall: 0.5460750853242321
F1 Score: 0.6349206349206349
ROC-AUC: 0.7636175204973581
Confusion Matrix:
 [[2656   51]
 [ 133  160]]


### ðŸ”¹ Step 7: Try Advanced Models

In [18]:
## Swap out the classifier in the pipeline.
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

rf_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=200, random_state=42))
])

gb_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', GradientBoostingClassifier())
])

rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)


### ðŸ”¹ Step 8: Hyperparameter Tuning

In [20]:
ðŸ”¹ Step 1: Make Predictions

Best Parameters: {'classifier__max_depth': None, 'classifier__n_estimators': 200}
Best Score: 0.4793107133769781


In [21]:
# Predictions
rf_pred = rf_model.predict(X_test)
gb_pred = gb_model.predict(X_test)


### Evaluate Performance

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Random Forest results
print("Random Forest Results:")
print("Accuracy:", accuracy_score(y_test, rf_pred))
print("Precision:", precision_score(y_test, rf_pred))
print("Recall:", recall_score(y_test, rf_pred))
print("F1 Score:", f1_score(y_test, rf_pred))
print("ROC-AUC:", roc_auc_score(y_test, rf_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_pred))

# Gradient Boosting results
print("\nGradient Boosting Results:")
print("Accuracy:", accuracy_score(y_test, gb_pred))
print("Precision:", precision_score(y_test, gb_pred))
print("Recall:", recall_score(y_test, gb_pred))
print("F1 Score:", f1_score(y_test, gb_pred))
print("ROC-AUC:", roc_auc_score(y_test, gb_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, gb_pred))


Random Forest Results:
Accuracy: 0.932
Precision: 0.856
Recall: 0.3651877133105802
F1 Score: 0.5119617224880383
ROC-AUC: 0.6792691429500813
Confusion Matrix:
 [[2689   18]
 [ 186  107]]

Gradient Boosting Results:
Accuracy: 0.94
Precision: 0.8121546961325967
Recall: 0.5017064846416383
F1 Score: 0.620253164556962
ROC-AUC: 0.7445732275443138
Confusion Matrix:
 [[2673   34]
 [ 146  147]]


In [23]:
import joblib

# Save model to file
joblib.dump(model, "model.pkl")

# Later, load it back
loaded_model = joblib.load("model.pkl")
