In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Data
df = pd.read_csv('data/customer_spending.csv')


In [None]:
# 1. True Value Scatter
# Feature Engineering
freq_map = {'Weekly': 52, 'Bi-Weekly': 26, 'Fortnightly': 26, 'Monthly': 12, 'Quarterly': 4, 'Annually': 1}
df['multiplier'] = df['Frequency of Purchases'].map(freq_map).fillna(1)
df['Annual_Value'] = df['Purchase Amount (USD)'] * df['multiplier']

# Filter
df_scatter = df[df['Previous Purchases'] > 10]

# Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df_scatter, x='Age', y='Annual_Value', hue='Subscription Status')
plt.title('True Value Scatter')
plt.legend(title='Subscription Status')
plt.show()


In [None]:
# 2. Category Performance Matrix
stats = df.groupby('Category').agg(
    x=('Review Rating', 'mean'),
    y=('Purchase Amount (USD)', 'sum')
)

# Calculate Discount Ratio
def get_ratio(g):
    return (g['Discount Applied'] == 'Yes').sum() / len(g) * 100

stats['size'] = df.groupby('Category').apply(get_ratio)
avg_rev = df.groupby('Category')['Purchase Amount (USD)'].sum().mean()

# Plot
plt.figure(figsize=(12, 8))
sns.scatterplot(data=stats, x='x', y='y', size='size', sizes=(100, 1000))
plt.axhline(y=avg_rev, color='r', linestyle='--')
plt.title('Category Performance Matrix')
plt.show()


In [None]:
# 3. Correlation Heatmap
cols = ['Age', 'Previous Purchases', 'Review Rating', 'Annual_Value']
corr = df[cols].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Drivers of Value')
plt.show()
