In [1]:
import pandas as pd
import numpy as np
import math

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer

In [2]:
df = pd.read_csv("elastic_df.csv")
cal_wk = pd.read_csv("cal_wk.csv")
data = pd.read_excel('chicken_db.xlsx')

In [3]:
scope_data1 = df.merge(cal_wk, left_on = "cal_mo_id", right_on ="fisc_wk_id")

In [4]:
data["month"] = pd.to_datetime(data["month"], format = "%Y %b").dt.strftime("%Y%m").astype(int)

In [5]:
data = data.rename(columns={'month': 'cal_mo_id'})

In [6]:
scope_data = scope_data1.merge(data[['cal_mo_id', 'value']], on='cal_mo_id', how='left')

In [7]:
scope_data['fisc_wk_strt_dt'] = pd.to_datetime(scope_data['fisc_wk_strt_dt'])

In [8]:
scope_data['month'] = scope_data['fisc_wk_strt_dt'].dt.month
scope_data['year'] = scope_data['fisc_wk_strt_dt'].dt.year

In [9]:
scope_data['week_of_month'] = scope_data['fisc_wk_strt_dt'].apply(lambda x: (x.day-1) // 7 + 1)

In [10]:
scope_data = (
    scope_data.groupby(['fisc_wk_strt_dt', 'month', 'year', 'week_of_month', 'mkt_lvl', 'value'], as_index=False)
    .agg(total_wgt=('total_wgt', 'sum'),total_sales=('total_sales', 'sum'))
             )

In [11]:
# Calculate additional metrics
scope_data['avg_unit_price'] = scope_data['total_sales'] / scope_data['total_wgt']
scope_data['avg_unit_price_adj'] = (scope_data['avg_unit_price'] / scope_data['value']) * 100


In [12]:
scope_data['log_price'] = np.log(scope_data['avg_unit_price_adj'])
scope_data['log_units'] = np.log(scope_data['total_wgt'])

In [13]:
scope_data = pd.read_csv("scope_data.csv")

In [14]:
scope_data = scope_data.dropna()

In [15]:
def season(month):
    if month in [3, 4, 5]:
        return "Spring"
    elif month in (6,7,8):
        return "Summer"
    elif month in (9,10,11):
        return "Fall"
    else:
        return "Winter"
    

In [16]:
scope_data["month"].unique()

array([ 5,  6,  7,  8,  9, 10, 11, 12,  1,  2,  3,  4], dtype=int64)

In [17]:
scope_data['season'] = scope_data["month"].apply(season)

In [18]:
def fit_elasticnet_tidymodels(data):
    # Prepare the data
    X = data[['log_price', 'month', 'week_of_month', 'year']]
    y = data['log_units']
    
    # Define the recipe steps
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, X.columns)
        ])
    
    # Define the models and their parameters
    models = {
        'Linear Regression': TransformedTargetRegressor(regressor=LinearRegression(), transformer=StandardScaler()),
        'Lasso Regression': TransformedTargetRegressor(regressor=Lasso(), transformer=StandardScaler()),
        'Ridge Regression': TransformedTargetRegressor(regressor=Ridge(), transformer=StandardScaler()),
        'Elastic Net Regression': TransformedTargetRegressor(regressor=ElasticNet(), transformer=StandardScaler())
    }
    parameters = {
        'Linear Regression': {},
        'Lasso Regression': {},
        'Ridge Regression': {},
        'Elastic Net Regression': {}
    }
#     parameters = {
#         'Linear Regression': {},
#         'Lasso Regression': {'alpha': np.logspace(-6, -1, num=20)},
#         'Ridge Regression': {'alpha': np.logspace(-6, -1, 20)},
#         'Elastic Net Regression': {'alpha': np.logspace(-6, -1, 20), 'l1_ratio': np.linspace(0, 1, 11)}
#     }
    
    # Perform cross-validation and grid search for each model
    results = {}
    kf = KFold(n_splits=10, shuffle=True)
    for name, model in models.items():
        # print('Fitting', name)
        pipe = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('regressor', model)
        ])
        clf = GridSearchCV(pipe, parameters[name], scoring=['neg_mean_squared_error', 'r2'], refit='neg_mean_squared_error', cv=kf)
        clf.fit(X, y)
        results[name] = {
            'best_params': clf.best_params_,
            'best_score': clf.best_score_,
            'mse': -clf.cv_results_['mean_test_neg_mean_squared_error'][clf.best_index_],
            'r2': clf.cv_results_['mean_test_r2'][clf.best_index_],
            'coefficients': pd.DataFrame({'variable': X.columns, 'coef': clf.best_estimator_.named_steps["regressor"].regressor_.coef_})
        }
    
    # Return the results
    return results


In [19]:
rows = []

seasons = scope_data['season'].unique()

for season in seasons:
    s = scope_data[scope_data["season"] == season]
    grouped  = s.groupby("mkt_lvl")
    groups = grouped.groups.keys()
    for group in groups:
        region = grouped.get_group(group)
        result = fit_elasticnet_tidymodels(region)
        models_rmses = {}
        for key in result.keys():
            models_rmses[key] = np.sqrt(result[key]["mse"])
        best = min(models_rmses, key = models_rmses.get)
        rmse = models_rmses[best]
        rows.append([season, group, best, rmse])   

In [20]:
pd.DataFrame(rows, columns = ["season", "mkt_lvl", "best_model", "rmse"])

Unnamed: 0,season,mkt_lvl,best_model,rmse
0,Spring,North,Ridge Regression,0.669645
1,Spring,South,Ridge Regression,0.9975
2,Spring,West,Ridge Regression,0.541028
3,Summer,North,Ridge Regression,0.643813
4,Summer,South,Ridge Regression,0.571214
5,Summer,West,Ridge Regression,0.52689
6,Fall,North,Ridge Regression,0.638027
7,Fall,South,Ridge Regression,0.542382
8,Fall,West,Ridge Regression,0.525815
9,Winter,North,Linear Regression,0.776496
