# ðŸ§  Customer Data Analysis â€” RFM, Churn Prediction, Segmentation

In [None]:
# STEP 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
sns.set(style='whitegrid')
pd.set_option('display.max_columns', None)

In [None]:
# STEP 2: Load Data
df = pd.read_csv('customer_data.csv', parse_dates=['signup_date', 'last_purchase_date'])
print('Shape:', df.shape)
df.info()
df.head()

In [None]:
# STEP 3: EDA
plt.figure(figsize=(6,4))
sns.countplot(x='gender', data=df, palette='pastel')
plt.title('Gender Distribution')
plt.show()

plt.figure(figsize=(6,4))
sns.countplot(x='churn_status', data=df, palette='coolwarm')
plt.title('Churn Status')
plt.xticks([0,1], ['Active','Churned'])
plt.show()

In [None]:
# STEP 4: RFM-like analysis
reference_date = df['last_purchase_date'].max() + pd.Timedelta(days=1)
rfm = df[['customer_id', 'last_purchase_date', 'total_orders', 'total_spent']].copy()
rfm['Recency'] = (reference_date - rfm['last_purchase_date']).dt.days
rfm['Frequency'] = rfm['total_orders']
rfm['Monetary'] = rfm['total_spent']

rfm['R_score'] = pd.qcut(rfm['Recency'], 5, labels=[5,4,3,2,1]).astype(int)
rfm['F_score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 5, labels=[1,2,3,4,5]).astype(int)
rfm['M_score'] = pd.qcut(rfm['Monetary'], 5, labels=[1,2,3,4,5]).astype(int)
rfm['RFM_score'] = rfm[['R_score','F_score','M_score']].sum(axis=1)

def rfm_label(x):
    if x >= 13: return 'Champions'
    elif x >= 10: return 'Loyal'
    elif x >= 7: return 'Potential'
    elif x >= 5: return 'At Risk'
    else: return 'Lost'

rfm['segment'] = rfm['RFM_score'].apply(rfm_label)
rfm.segment.value_counts().plot(kind='bar', figsize=(8,4), color='coral')
plt.title('Customer Segments by RFM')
plt.show()

In [None]:
# STEP 5: Churn Prediction
features = ['age','loyalty_score','total_orders','total_spent']
X = df[features]
y = df['churn_status'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))