# Demand Forecasting & RFM Segmentation

We build a simple forecasting baseline and perform RFM segmentation for customers.

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt

df = pd.read_csv(r"/mnt/data/Supermart-Grocery-Sales-Retail-Analytics/data/raw/supermart_sales.csv")
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.month
df['Day'] = df['Order Date'].dt.day

# Forecasting: predict monthly sales for a category using simple features
cat = 'Beverages'
ts = (df[df['Category']==cat]
      .groupby(pd.Grouper(key='Order Date', freq='M'))['Sales']
      .sum()
      .reset_index())
ts['t'] = np.arange(len(ts))
ts['month'] = ts['Order Date'].dt.month
ts['year'] = ts['Order Date'].dt.year
X = ts[['t','month','year']]
y = ts['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
m = Ridge(alpha=1.0).fit(X_train, y_train)
preds = m.predict(X_test)
mae = mean_absolute_error(y_test, preds)
print("Forecasting baseline (Ridge) MAE:", round(mae,2))

plt.figure()
plt.plot(ts['Order Date'], ts['Sales'], label='Actual')
plt.plot(ts.loc[X_test.index, 'Order Date'], preds, label='Predicted')
plt.legend()
plt.title('Monthly Sales – Actual vs Predicted (Ridge baseline)')
plt.tight_layout()
plt.show()


In [None]:

# RFM Segmentation
snapshot_date = df['Order Date'].max() + pd.Timedelta(days=1)
rfm = df.groupby('Customer ID').agg({
    'Order Date': lambda x: (snapshot_date - x.max()).days,
    'Order ID': 'nunique',
    'Sales': 'sum'
}).rename(columns={'Order Date':'Recency','Order ID':'Frequency','Sales':'Monetary'})

# score into quartiles
r_labels = [4,3,2,1]
f_labels = [1,2,3,4]
m_labels = [1,2,3,4]
r_quartiles = pd.qcut(rfm['Recency'], 4, labels=r_labels)
f_quartiles = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=f_labels)
m_quartiles = pd.qcut(rfm['Monetary'], 4, labels=m_labels)

rfm = rfm.assign(R=r_quartiles.astype(int),
                 F=f_quartiles.astype(int),
                 M=m_quartiles.astype(int))
rfm['RFM_Score'] = rfm[['R','F','M']].sum(axis=1)
display(rfm.head())

seg_map = {
    (4,4): "Champions",
    (3,4): "Loyal",
    (4,3): "Potential Loyalist"
}
rfm['Segment'] = np.where(rfm['RFM_Score']>=10, 'Champions',
                   np.where(rfm['RFM_Score']>=8, 'Loyal',
                   np.where(rfm['RFM_Score']>=6, 'Potential Loyalist', 'Others')))
rfm.head()
