In [None]:
# %% [markdown]
# # Price Elasticity Modeling
# 
# This notebook trains and evaluates price elasticity models for each SKU.

# %%
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from src.models import train_model
from src.data import make_dataset, process_data
from src.features import build_features

# %%
# Load and prepare data
df, _ = make_dataset.load_raw_data()
df = process_data.correct_outliers(df)
df = process_data.clean_data(df)

# %%
# Create features and split data
train_df, test_df = build_features.time_based_split(df)
train_df, test_df = build_features.create_product_clusters(train_df, test_df)

# NEW: Add column alignment logic
train_df = build_features.engineer_features(train_df)
full_feature_columns = train_df.columns
test_df = build_features.engineer_features(test_df, full_columns=full_feature_columns)

# %%
# Train models for all SKUs
results_df = train_model.train_sku_models(train_df, test_df)

# %%
# Display model results
print("Average Model Performance Across All SKUs:")
print(results_df[['TrainMAE', 'TestMAE', 'TrainR2', 'TestR2']].median())

# %%
# Visualize elasticity estimates
plt.figure(figsize=(10, 6))
sns.boxplot(data=results_df, y='Elasticity')
plt.title('Distribution of Price Elasticity Estimates')
plt.ylabel('Elasticity Coefficient')
plt.show()

# %%
# Highlight significant elasticities
significant = results_df[results_df['ElasticityPval'] < 0.05].sort_values('Elasticity')

plt.figure(figsize=(12, 6))
sns.barplot(data=significant, x='StockCode', y='Elasticity', 
            hue=np.where(significant['Elasticity'] < 0, 'Elastic', 'Inelastic'))
plt.title('Statistically Significant Price Elasticities (p < 0.05)')
plt.xticks(rotation=45)
plt.axhline(0, color='black', linestyle='--')
plt.show()