In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
!pip install fpdf
from fpdf import FPDF
import joblib

In [None]:
train_df = pd.read_csv("/content/training_dataset.csv")
test_df = pd.read_csv("/content/churn_data.csv")


train_df.dropna(inplace=True)
test_df.dropna(inplace=True)


X_train = train_df.drop(columns=['churned'])
y_train = train_df['churned']
X_test = test_df.drop(columns=['churned'])
y_test = test_df['churned']


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)


xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False
)

xgb_model.fit(X_train_resampled, y_train_resampled)


In [None]:
y_pred = xgb_model.predict(X_test_scaled)
test_acc = (y_pred == y_test).mean()
print(f"Test Accuracy: {test_acc:.4f}")



joblib.dump(xgb_model, "/content/churn_xgboost_model.pkl")
print("XGBoost model saved successfully!")


churn_counts = y_test.value_counts()
plt.figure(figsize=(6, 4))
plt.bar(['Retained', 'Churned'], churn_counts, color=['blue', 'red'])
plt.xlabel("Customer Status")
plt.ylabel("Count")
plt.title("Churned vs Retained Customers")
plt.savefig("/content/churn_bar_chart.png")
print("Bar chart saved successfully!")


pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Churn Prediction Results (XGBoost)", ln=True, align='C')
pdf.image("/content/churn_bar_chart.png", x=10, y=20, w=180)
pdf.output("/content/churn_report.pdf")
print("PDF report saved.")
