In [30]:
from sklearn.linear_model import ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
import pandas as pd
import numpy as np

In [2]:
scope_data = pd.read_csv("scope_data.csv")

In [4]:
scope_data = scope_data.dropna()

In [5]:
def season(month):
    if month in [3, 4, 5]:
        return "Spring"
    elif month in (6,7,8):
        return "Summer"
    elif month in (9,10,11):
        return "Fall"
    else:
        return "Winter"

In [7]:
scope_data['season'] = scope_data["month"].apply(season)

In [9]:
scope_data

Unnamed: 0,fisc_wk_strt_dt,month,year,week_of_month,mkt_lvl,value,total_wgt,total_sales,avg_unit_price,avg_unit_price_adj,log_price,log_units,season
0,5/5/2013,5,2013,18,North,236.526,1642565.196,3474335.31,2.115189,0.894273,-0.111744,14.311770,Spring
1,5/5/2013,5,2013,18,South,236.526,1422200.660,2850561.14,2.004331,0.847404,-0.165578,14.167716,Spring
2,5/5/2013,5,2013,18,West,236.526,694843.260,1440919.45,2.073733,0.876746,-0.131538,13.451442,Spring
3,5/12/2013,5,2013,19,North,236.526,1587125.754,3533834.04,2.226562,0.941360,-0.060429,14.277435,Spring
4,5/12/2013,5,2013,19,South,236.526,1376157.075,2888754.41,2.099146,0.887491,-0.119357,14.134805,Spring
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1786,2/19/2023,2,2023,8,South,320.569,1618429.450,3117399.82,1.926188,0.600865,-0.509384,14.296967,Winter
1787,2/19/2023,2,2023,8,West,320.569,544322.140,1164748.93,2.139815,0.667505,-0.404208,13.207297,Winter
1788,2/26/2023,2,2023,9,North,320.569,465500.305,984571.92,2.115083,0.659790,-0.415833,13.050868,Winter
1790,2/26/2023,2,2023,9,South,320.569,686842.140,1314373.48,1.913647,0.596953,-0.515916,13.439860,Winter


In [23]:
def fit_elasticnet_tidymodels(data):
    
    # Prepare the data
    X = data[['log_price', 'month', 'week_of_month', 'year']]
    y = data['log_units']
    
    # Define pipeline for the model
 
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
#     categorical_transformer = Pipeline(steps=[
#         ('onehot', OneHotEncoder(handle_unknown='ignore'))
#     ])

#     preprocessor = ColumnTransformer(
#         transformers=[
#             ('num', numeric_transformer, X.columns),
#             ('cat', categorical_transformer, [])
#         ])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, X.columns)
        ])
    

    elasticnet_model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('elasticnet', ElasticNet())
    ])
    
    # Define the hyperparameter grid for tuning
    param_grid = {
        'elasticnet__alpha': np.logspace(-6, -1, num=20),
        'elasticnet__l1_ratio': [0.5]
    }
    
    # Create a 10-fold cross-validation object
    sales_cv = KFold(n_splits=10, shuffle=True, random_state=42)
    
    # Perform cross-validated grid search
    grid_search = GridSearchCV(
        elasticnet_model, 
        param_grid=param_grid, 
        cv=sales_cv, 
        scoring={
            'rmse': make_scorer(mean_squared_error, squared=False),
            'r2': make_scorer(r2_score)
        },
        refit='rmse'
    )
    
    # Fit the model
    grid_search.fit(X, y)
    
    # Extract the best model
    best_model = grid_search.best_estimator_
    
    # Fit the best model on the whole dataset
    best_model.fit(X, y)
    
    # Extract the log_price coefficient
    coefficients = pd.DataFrame(best_model.named_steps['elasticnet'].coef_, index=X.columns, columns=['estimate'])
    normalized_stats = pd.DataFrame(best_model.named_steps['preprocessor'].named_transformers_['num'].named_steps['scaler'].var_, index=X.columns[:4], columns=['sd'])
    coefs_denormalized = coefficients.join(normalized_stats)
    coefs_denormalized['beta_denormalized'] = coefs_denormalized['estimate'] / coefs_denormalized['sd']
    log_price_beta = coefs_denormalized.loc['log_price', 'beta_denormalized']
    
    result = {'price_elasticity': log_price_beta}
    
    return result


In [69]:
spring = scope_data[scope_data["season"] == "Spring"]

In [70]:
df= spring.groupby("mkt_lvl").apply(lambda x : pd.DataFrame({'data': [x]})).reset_index().drop(columns=['level_1'])

In [94]:
df

Unnamed: 0,mkt_lvl,data
0,North,fisc_wk_strt_dt month year week_of_mon...
1,South,fisc_wk_strt_dt month year week_of_mon...
2,West,fisc_wk_strt_dt month year week_of_mon...


In [95]:
price_elasticities_tidymodels_spring = df.assign(elasticities=df['data'].map(fit_elasticnet_tidymodels))

In [96]:
price_elasticities_tidymodels_spring

Unnamed: 0,mkt_lvl,data,elasticities
0,North,fisc_wk_strt_dt month year week_of_mon...,{'price_elasticity': -0.4315148326126413}
1,South,fisc_wk_strt_dt month year week_of_mon...,{'price_elasticity': 0.0}
2,West,fisc_wk_strt_dt month year week_of_mon...,{'price_elasticity': 0.0}


In [73]:
price_elasticities_tidymodels_spring["elasticities"]= price_elasticities_tidymodels_spring["elasticities"].str['price_elasticity']

In [75]:
price_elasticities_tidymodels_spring = price_elasticities_tidymodels_spring[["mkt_lvl","elasticities"]]

In [87]:
price_elasticities_tidymodels_spring.pivot(columns = "mkt_lvl", values = "elasticities").fillna(0)

mkt_lvl,North,South,West
0,-0.431515,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0


In [84]:
price_elasticities_tidymodels_spring.T

Unnamed: 0,0,1,2
mkt_lvl,North,South,West
elasticities,-0.431515,0.0,0.0


In [92]:
mkt = price_elasticities_tidymodels_spring["mkt_lvl"]
elasticity_spring = price_elasticities_tidymodels_spring["elasticities"]


In [93]:
elasticity_spring

0   -0.431515
1    0.000000
2    0.000000
Name: elasticities, dtype: float64