Load Dataset

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import warnings; warnings.filterwarnings('ignore')

df = pd.read_csv('dataset.csv')
df.drop('customerID', axis=1, inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce').fillna(df['TotalCharges'].median())
df['Churn'] = (df['Churn'] == 'Yes').astype(int)

cat_cols = df.select_dtypes('object').columns.drop('Churn')
df = pd.get_dummies(df, columns=cat_cols)
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(f"Dataset shape: {df.shape}")


EDA

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('dark_background')
fig, axes = plt.subplots(2, 2, figsize=(15,12))

sns.countplot(x='Churn', data=df, ax=axes[0,0])
axes[0,0].set_title('Churn Distribution')

num_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
for i, col in enumerate(num_cols):
    sns.boxplot(x='Churn', y=col, data=df, ax=axes[(i//2), (i%2)])
    axes[(i//2), (i%2)].set_title(f'{col} vs Churn')

sns.heatmap(pd.DataFrame(X_train, columns=X.columns).corr(), annot=True, cmap='coolwarm', ax=axes[1,1])
plt.tight_layout()
plt.show()


Classical ML

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

models = {
    'Logistic': LogisticRegression(),
    'RandomForest': RandomForestClassifier(n_estimators=100),
    'SVM': SVC()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n{name} Results:")
    print(classification_report(y_test, y_pred))


Neural Network

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

y_pred_nn = (model.predict(X_test) > 0.5).astype(int)
print("\nNeural Network Results:")
print(classification_report(y_test, y_pred_nn))
