## Computed RFM Segments and Visualized Marketing Insights

In [None]:
# Load context from paste.txt if available and preview a few lines
context_text = ''
try:
    with open('paste.txt', 'r', encoding='utf-8') as f:
        context_text = f.read()
except Exception as e:
    context_text = ''

print(context_text[:800])
print('Loaded context from paste.txt')

In [None]:
# Load Superstore dataset and show head/describe for verification
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('superstore_data.csv', encoding='ascii')
print(df.head())
print(df.describe())
print('Loaded superstore_data.csv')

In [None]:
# Feature engineering for RFM calculations
import pandas as pd
from datetime import datetime

if 'Dt_Customer' in df.columns:
    df['Dt_Customer'] = pd.to_datetime(df['Dt_Customer'], errors='coerce')

spend_cols = ['MntWines','MntFruits','MntMeatProducts','MntFishProducts','MntSweetProducts','MntGoldProds']
for c in spend_cols:
    if c not in df.columns:
        df[c] = 0

df['TotalSpend'] = df[spend_cols].sum(axis=1)

purch_cols = ['NumWebPurchases','NumCatalogPurchases','NumStorePurchases']
for c in purch_cols:
    if c not in df.columns:
        df[c] = 0

df['Frequency'] = df[purch_cols].sum(axis=1)
print(df[['TotalSpend','Frequency']].head())
print('Engineered features for RFM')

In [None]:
# Compute RFM scores and segments
import pandas as pd

# Recency: lower is better
df['R'] = pd.qcut(df['Recency'], 4, labels=[4,3,2,1])
# Frequency and Monetary: higher is better
df['F'] = pd.qcut(df['Frequency'].rank(method='first'), 4, labels=[1,2,3,4])
df['M'] = pd.qcut(df['TotalSpend'].rank(method='first'), 4, labels=[1,2,3,4])

df['RFM_Score'] = df[['R','F','M']].astype(int).sum(axis=1)

def segment_map(score):
    if score >= 11:
        return 'Champions'
    if score >= 9:
        return 'Loyal'
    if score >= 7:
        return 'Potential'
    return 'At Risk'

df['Segment'] = df['RFM_Score'].apply(segment_map)
print(df[['Recency','Frequency','TotalSpend','RFM_Score','Segment']].head())
print('Computed RFM segments')

In [None]:
# Visual 1: Spend by Segment
import seaborn as sns
import matplotlib.pyplot as plt

sns.boxplot(data=df, x='Segment', y='TotalSpend', order=['At Risk','Potential','Loyal','Champions'])
plt.title('Customer Spend by RFM Segment')
plt.tight_layout()
plt.show()
print('Displayed Spend by Segment')

In [None]:
# Visual 2: Channel Mix by Segment (stacked bar)
channels = ['NumWebPurchases','NumCatalogPurchases','NumStorePurchases','NumDealsPurchases']
for c in channels:
    if c not in df.columns:
        df[c] = 0
seg_mean = df.groupby('Segment')[channels].mean().reindex(['At Risk','Potential','Loyal','Champions'])
seg_norm = seg_mean.div(seg_mean.sum(axis=1), axis=0)
seg_norm.plot(kind='bar', stacked=True, figsize=(8,4), colormap='tab20')
plt.title('Channel Mix by Segment')
plt.tight_layout()
plt.show()
print('Displayed Channel Mix by Segment')

In [None]:
# Visual 3: Income vs TotalSpend colored by Response
if 'Income' in df.columns and 'Response' in df.columns:
    sns.scatterplot(data=df, x='Income', y='TotalSpend', hue='Response', alpha=0.6)
    plt.title('Income vs TotalSpend by Campaign Response')
    plt.tight_layout()
    plt.show()
else:
    print('Income or Response not found; skipping scatter plot')
print('Visuals complete')

In [None]:
# Export enriched dataset and plots
import matplotlib.pyplot as plt

plt.figure(figsize=(8,4))
sns.boxplot(data=df, x='Segment', y='TotalSpend', order=['At Risk','Potential','Loyal','Champions'])
plt.title('Customer Spend by RFM Segment')
plt.tight_layout()
plt.savefig('rfm_spend_by_segment.png', dpi=150)
plt.close()

channels = ['NumWebPurchases','NumCatalogPurchases','NumStorePurchases','NumDealsPurchases']
seg_mean = df.groupby('Segment')[channels].mean().reindex(['At Risk','Potential','Loyal','Champions'])
seg_norm = seg_mean.div(seg_mean.sum(axis=1), axis=0)
ax = seg_norm.plot(kind='bar', stacked=True, figsize=(8,4), colormap='tab20')
plt.title('Channel Mix by Segment')
plt.tight_layout()
plt.savefig('rfm_channel_mix_by_segment.png', dpi=150)
plt.close()

if 'Income' in df.columns and 'Response' in df.columns:
    sns.scatterplot(data=df, x='Income', y='TotalSpend', hue='Response', alpha=0.6)
    plt.title('Income vs TotalSpend by Campaign Response')
    plt.tight_layout()
    plt.savefig('rfm_income_vs_spend_response.png', dpi=150)
    plt.close()

df.to_csv('superstore_rfm_enriched.csv', index=False)
print('Saved enriched CSV and plots')