In [18]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import joblib


In [12]:
train = pd.read_csv('../data/processed/train_data.csv')
test = pd.read_csv('../data/processed/test_data.csv')

In [13]:
X_train = train.drop(columns=['CHURN'])
y_train = train['CHURN'].map({'Yes':1,'No':0})
X_test = test.drop(columns=['CHURN'])
y_test = test['CHURN'].map({'Yes':1,'No':0})

In [24]:
models = {
    'LogisticRegression': LogisticRegression(max_iter=1000,class_weight='balanced'),
    'RandomForest': RandomForestClassifier(n_estimators=100,random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False,eval_metric='logloss')    
}

In [25]:
model_scores = {}


for name,model in models.items():
    print(f'Training {name}...')
    pipeline = Pipeline([
        ('scaler',StandardScaler()),
        ('classifier',model)
    ])
    pipeline.fit(X_train,y_train)
    y_pred = pipeline.predict(X_test)
    print(f'{name} Classification Report:')
    print(classification_report(y_test,y_pred))

Training LogisticRegression...
LogisticRegression Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1603
           1       0.50      0.02      0.04        88

    accuracy                           0.95      1691
   macro avg       0.72      0.51      0.51      1691
weighted avg       0.93      0.95      0.92      1691

Training RandomForest...
RandomForest Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1603
           1       0.00      0.00      0.00        88

    accuracy                           0.95      1691
   macro avg       0.47      0.50      0.49      1691
weighted avg       0.90      0.95      0.92      1691

Training XGBoost...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1603
           1       0.20      0.02      0.04        88

    accuracy                           0.94      1691
   macro avg       0.57      0.51      0.51      1691
weighted avg       0.91      0.94      0.92      1691



In [26]:
import os
os.makedirs("../models", exist_ok=True)
joblib.dump(pipeline, f"../models/{name.lower().replace(' ', '_')}_pipeline.joblib")


['../models/xgboost_pipeline.joblib']

In [27]:
model_scores[name] = f1_score(y_test,y_pred)

In [29]:
best_model_name = max(model_scores, key=model_scores.get)
print(f"\n Best Model : {best_model_name} with F1 Score : {model_scores[best_model_name]}")




 Best Model : XGBoost with F1 Score : 0.04081632653061224


In [30]:
best_model_path = "../models/best_model_pipeline.joblib"
best_model = joblib.load(f"../models/{best_model_name.lower().replace(' ', '_')}_pipeline.joblib")
joblib.dump(best_model, best_model_path)
print(f" Best model saved as {best_model_path}")


 Best model saved as ../models/best_model_pipeline.joblib
