## Superstore Marketing Analysis Notebook
This notebook loads the data, engineers features, builds RFM segments, and generates visuals and exports.

In [None]:
# Imports and basic setup
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

# Display settings
pd.set_option('display.max_columns', 100)
print('Imports ready')

In [None]:
# Load dataset
# The file should be in the working directory
df = pd.read_csv('superstore_data.csv', encoding='ascii')
print(df.head())
print(df.describe())
print('Data loaded')

In [None]:
# Feature engineering: parse dates, total spend, kids, frequency
df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], errors='coerce')
spend_cols = ['MntWines','MntFruits','MntMeatProducts','MntFishProducts','MntSweetProducts','MntGoldProds']
df['TotalSpend'] = df[spend_cols].sum(axis=1)
df['Kids'] = df['Kidhome'] + df['Teenhome']
df['Frequency'] = df[['NumWebPurchases','NumCatalogPurchases','NumStorePurchases']].sum(axis=1)
print(df.head())
print('Engineered features')

In [None]:
# RFM scoring and segmentation
# Recency is smaller is better; Frequency and Monetary larger is better
df['R'] = pd.qcut(df['Recency'], 4, labels=[4,3,2,1])
df['F'] = pd.qcut(df['Frequency'].rank(method='first'), 4, labels=[1,2,3,4])
df['M'] = pd.qcut(df['TotalSpend'].rank(method='first'), 4, labels=[1,2,3,4])
df['RFM_Score'] = df[['R','F','M']].astype(int).sum(axis=1)

# Simple segment map
def seg_map(score):
    if score >= 11:
        return 'Champions'
    if score >= 9:
        return 'Loyal'
    if score >= 7:
        return 'Potential'
    return 'At Risk'

df['Segment'] = df['RFM_Score'].apply(seg_map)
print(df[['Income','TotalSpend','Frequency','Recency','RFM_Score','Segment']].head())
print('RFM computed')

In [None]:
# Plot 1: Spend by Segment
sns.boxplot(data=df, x='Segment', y='TotalSpend', order=['At Risk','Potential','Loyal','Champions'])
plt.title('Customer Spend by RFM Segment')
plt.tight_layout()
plt.show()
print('Shown: Spend by Segment')

In [None]:
# Plot 2: Channel mix by Segment (stacked)
channels = ['NumWebPurchases','NumCatalogPurchases','NumStorePurchases','NumDealsPurchases']
seg_mean = df.groupby('Segment')[channels].mean().reindex(['At Risk','Potential','Loyal','Champions'])
seg_norm = seg_mean.div(seg_mean.sum(axis=1), axis=0)
seg_norm.plot(kind='bar', stacked=True, figsize=(8,4), colormap='tab20')
plt.title('Channel Mix by Segment')
plt.tight_layout()
plt.show()
print('Shown: Channel Mix by Segment')

In [None]:
# Plot 3: Income vs Spend colored by Response
sns.scatterplot(data=df, x='Income', y='TotalSpend', hue='Response', alpha=0.6)
plt.title('Income vs Spend by Campaign Response')
plt.tight_layout()
plt.show()
print('Shown: Income vs Spend by Response')

In [None]:
# Exports: enriched CSV and PNGs
plt.figure(figsize=(8,4))
sns.boxplot(data=df, x='Segment', y='TotalSpend', order=['At Risk','Potential','Loyal','Champions'])
plt.title('Customer Spend by RFM Segment')
plt.tight_layout()
plt.savefig('plot_spend_by_segment.png', dpi=150)
plt.close()

seg_mean = df.groupby('Segment')[channels].mean().reindex(['At Risk','Potential','Loyal','Champions'])
seg_norm = seg_mean.div(seg_mean.sum(axis=1), axis=0)
ax = seg_norm.plot(kind='bar', stacked=True, figsize=(8,4), colormap='tab20')
plt.title('Channel Mix by Segment')
plt.tight_layout()
plt.savefig('plot_channel_mix_by_segment.png', dpi=150)
plt.close()

sns.scatterplot(data=df, x='Income', y='TotalSpend', hue='Response', alpha=0.6)
plt.title('Income vs Spend by Campaign Response')
plt.tight_layout()
plt.savefig('plot_income_vs_spend_response.png', dpi=150)
plt.close()

# Save enriched data
df.to_csv('superstore_marketing_enriched.csv', index=False)
print('Saved exports')