In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load & Prepare Data
def load_and_prepare_data(filename):
    df = pd.read_csv(filename)

    # Convert TotalCharges to numeric
    df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
    df["TotalCharges"].fillna(df["TotalCharges"].median(), inplace=True)

    # Convert Churn to numeric
    df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})

    # Drop Customer ID if exists
    if "customerID" in df.columns:
        df.drop("customerID", axis=1, inplace=True)

    # Convert categorical columns to numeric
    df = pd.get_dummies(df, drop_first=True)

    return df

# 2. Split Data
def split_data(df):
    X = df.drop("Churn", axis=1)
    y = df["Churn"]
    return train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Scale Data
def scale_data(X_train, X_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test

# 4. Train Logistic Regression
def train_logistic(X_train, y_train, X_test):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    return model.predict(X_test)

# 5. Train Random Forest
def train_random_forest(X_train, y_train, X_test):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model.predict(X_test)


# 6. Main Program
data = load_and_prepare_data("customer_churn.csv")
X_train, X_test, y_train, y_test = split_data(data)
X_train, X_test = scale_data(X_train, X_test)

# Logistic Regression
lr_pred = train_logistic(X_train, y_train, X_test)
print("\n--- Logistic Regression ---")
print("Accuracy:", accuracy_score(y_test, lr_pred))
print(classification_report(y_test, lr_pred))

# Random Forest
rf_pred = train_random_forest(X_train, y_train, X_test)
print("\n--- Random Forest ---")
print("Accuracy:", accuracy_score(y_test, rf_pred))
print(classification_report(y_test, rf_pred))

# Confusion Matrix (Random Forest)
cm = confusion_matrix(y_test, rf_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Customer Churn Confusion Matrix")
plt.show()
