In [28]:
import pandas as pd
import statsmodels.api as sm

In [29]:
summer_df = pd.read_csv('SUMMER_TRAINING_DATA.csv')
winter_df = pd.read_csv('WINTER_TRAINING_DATA.csv')
shoulder_df = pd.read_csv('SHOULDER_TRAINING_DATA.csv')

In [30]:
# Extract the day of the week
summer_df['dateTimeStmp'] = pd.to_datetime(summer_df['dateTimeStmp'])
summer_df['DayOfWeek'] = summer_df['dateTimeStmp'].dt.day_name()
winter_df['dateTimeStmp'] = pd.to_datetime(winter_df['dateTimeStmp'])
winter_df['DayOfWeek'] = winter_df['dateTimeStmp'].dt.day_name()
shoulder_df['dateTimeStmp'] = pd.to_datetime(shoulder_df['dateTimeStmp'])
shoulder_df['DayOfWeek'] = shoulder_df['dateTimeStmp'].dt.day_name()

In [31]:
# Create dictionary, and traverse weekdays
seasons = {
    'Summer': summer_df,
    'Winter': winter_df,
    'Shoulder': shoulder_df
}

days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Define independent and dependent varibles
dependent_var = 'raw_MM_Wh'
independent_vars = ['D-3', 'D-2', 'D-1', 'D+1', 'D+2', 'D+3']
results = {day: {} for day in days}

In [32]:
for day in days:
    for season, df in seasons.items():
        # Filter the data for the corresponding week of the season
        df_day = df[df['DayOfWeek'] == day].copy()
        if df_day.empty:
            continue
        
        # Data on independent and dependent variables
        X = df_day[independent_vars]
        X = sm.add_constant(X)
        y = df_day[dependent_var]
        
        # Perform OLS regression
        model = sm.OLS(y, X).fit()
        
        # Save regression results
        results[day][season] = {
            'params': model.params,
            'tvalues': model.tvalues,
            'r_squared': model.rsquared
        }


In [33]:
# Generate 7 tables
def create_result_table_for_day(results_day):
    season_order = ['Summer', 'Winter', 'Shoulder']
    rows = []
    
    for season in season_order:
        if season in results_day:
            res = results_day[season]
            row = {}
            for param in res['params'].index:
                row[f'coef_{param}'] = res['params'][param]
                row[f't_{param}'] = res['tvalues'][param]
                row['R_squared'] = res['r_squared']
        else:
            row = {}
            for param in ['const'] + independent_vars:
                row[f'coef_{param}'] = None
                row[f't_{param}'] = None
            row['R_squared'] = None
        row['Season'] = season
        rows.append(row)
    
    df_result = pd.DataFrame(rows)
    df_result.set_index('Season', inplace=True)
    return df_result

# Generate a table for each day and save it in the dictionary
tables = {}
for day in days:
    tables[day] = create_result_table_for_day(results[day])

In [34]:
for day in days:
    print(f"====== {day} ======")
    print(tables[day])
    print()

           coef_const   t_const  R_squared  coef_D-3     t_D-3  coef_D-2  \
Season                                                                     
Summer    1036.012227  6.458778   0.782749  0.115118  6.029591 -0.092982   
Winter    -139.120569 -2.292964   0.868061  0.103803  8.088442  0.087218   
Shoulder   381.151518  5.299355   0.866298  0.120037  8.346367  0.017473   

             t_D-2  coef_D-1      t_D-1  coef_D+1      t_D+1  coef_D+2  \
Season                                                                   
Summer   -4.152924  0.311613  16.848212  0.594105  31.165121 -0.040467   
Winter    6.399878  0.161685  12.668057  0.302617  19.876039  0.230215   
Shoulder  1.098692  0.314512  22.533116  0.411539  22.914647  0.136910   

              t_D+2  coef_D+3     t_D+3  
Season                                   
Summer    -1.773758  0.068701  4.086566  
Winter    16.354019  0.129762  9.629910  
Shoulder   8.007763 -0.044488 -3.091342  

          coef_const   t_const  R_squ

In [35]:
# Export
# for day, table in tables.items():
#     filename = f'{day}_regression_results.csv'
#     table.to_csv(filename, index=True)