In [123]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import mean_squared_error, r2_score,make_scorer
from sklearn.compose import ColumnTransformer
import warnings

# ignore future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
scope_data = pd.read_csv("scope_data.csv")

In [3]:
scope_data = scope_data.dropna()

In [4]:
def season(month):
    if month in [3, 4, 5]:
        return "Spring"
    elif month in (6,7,8):
        return "Summer"
    elif month in (9,10,11):
        return "Fall"
    else:
        return "Winter"

In [5]:
scope_data['season'] = scope_data["month"].apply(season)

In [70]:
def fit_elasticnet_tidymodels(data):
    
    # Prepare the data
    X = data[['log_price', 'month', 'week_of_month', 'year']]
    y = data['log_units']
    
    # Define pipeline for the model
 
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, X.columns)
        ])
    

    elasticnet_model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('elasticnet', ElasticNet())
    ])
    
    # Define the hyperparameter grid for tuning
    param_grid = {
        'elasticnet__alpha': np.logspace(-6, -1, num=20),
        'elasticnet__l1_ratio': [0.5], "elasticnet__max_iter":[5000]
    }
    
    # Create a 10-fold cross-validation object
    sales_cv = KFold(n_splits=10, shuffle=True, random_state=42)
    
    # Perform cross-validated grid search
    grid_search = GridSearchCV(
        elasticnet_model, 
        param_grid=param_grid, 
        cv=sales_cv, 
        scoring={
            'rmse': make_scorer(mean_squared_error, squared=False),
            'r2': make_scorer(r2_score)
        },
        refit='rmse'
    )
    
    # Fit the model
    grid_search.fit(X, y,)
    
    # Extract the best model
    best_model = grid_search.best_estimator_
    
    # Fit the best model on the whole dataset
    best_model.fit(X, y)
    
    # Extract the log_price coefficient
    coefficients = pd.DataFrame(best_model.named_steps['elasticnet'].coef_, index=X.columns, columns=['estimate'])
    normalized_stats = pd.DataFrame(best_model.named_steps['preprocessor'].named_transformers_['num'].named_steps['scaler'].var_, index=X.columns[:4], columns=['sd'])
    coefs_denormalized = coefficients.join(normalized_stats)
    coefs_denormalized['beta_denormalized'] = coefs_denormalized['estimate'] / coefs_denormalized['sd']
    log_price_beta = coefs_denormalized.loc['log_price', 'beta_denormalized']
    
    predict = best_model.predict(X)
    mse = mean_squared_error(y, predict)
    rmse = np.sqrt(mse)
    
    result = {'price_elasticity': log_price_beta, "rmse":rmse}
    
    return result

In [124]:
combined_data = pd.DataFrame()

seasons = scope_data["season"].unique()

for s in seasons:
    season_df = scope_data[scope_data["season"] == s]
    df= season_df.groupby("mkt_lvl").apply(lambda x : pd.DataFrame({'data': [x]})).reset_index().drop(columns=['level_1'])
    applied = df["data"].apply(fit_elasticnet_tidymodels)
    for i, j in zip(df["mkt_lvl"], applied):
        j["mkt_lvl"] = i
        j["season"] = s
         
    for i in applied:
        dictionary = {"Season":i["season"], "mkt_lvl": i["mkt_lvl"], 
                              "ElasticNet_elast":i['price_elasticity'],
                             "ElasticNet_rmse":i['rmse']}
        combined_data = combined_data.append(dictionary, ignore_index=True)
        

In [125]:
combined_data

Unnamed: 0,Season,mkt_lvl,ElasticNet_elast,ElasticNet_rmse
0,Spring,North,-0.431515,0.714021
1,Spring,South,0.0,1.040513
2,Spring,West,0.0,0.597929
3,Summer,North,-0.366245,0.69514
4,Summer,South,-0.0,0.622237
5,Summer,West,0.0,0.578115
6,Fall,North,-0.0,0.682106
7,Fall,South,-0.0,0.600466
8,Fall,West,0.0,0.58434
9,Winter,North,-6.377328,0.806628
