# Customer Churn Prediction - E-commerce Project

This project aims to predict customer churn using behavioral and transactional data.

In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Load Dataset
df = pd.read_csv("ecommerce_churn_data.csv")
df.head()

In [None]:
# Drop customer_id as it's not useful for prediction
df = df.drop(columns=['customer_id'])

# Check for nulls
print(df.isnull().sum())

# Fill any missing values if necessary (example)
df['cart_abandon_rate'].fillna(df['cart_abandon_rate'].mean(), inplace=True)

In [None]:
# Define features and target
X = df.drop('is_churned', axis=1)
y = df['is_churned']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)


In [None]:
# Evaluation
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
# Feature Importance
feat_importance = pd.Series(model.feature_importances_, index=X.columns)
feat_importance.sort_values().plot(kind='barh', figsize=(10,6))
plt.title("Feature Importance in Churn Prediction")
plt.show()

## Key Insights
- Customers with more complaints and high cart abandonment are more likely to churn.
- Recency and frequency of purchases are strong predictors.
- Recommended retention campaigns for high-risk segments based on model output.