In [21]:
# Combine the previous regression results in a single variable “results”
import joblib

joblib.dump(results, 'results.pkl')

print("Saved results to results.pkl")

Saved results to results.pkl


In [22]:
import pandas as pd
import numpy as np
import joblib

# Loads the saved regression results
results = joblib.load('results.pkl') 

In [25]:
# Read the original dataset and add the same regressor features as before

df = pd.read_excel('combined_data.xlsx', parse_dates=['dateTimeStmp'])
df.sort_values('dateTimeStmp', inplace=True)
for d in [-3, -2, -1, 1, 2, 3]:
    shift_steps = d*96
    df[f'D{d:+}'] = df['raw_MM_Wh'].shift(-shift_steps).fillna(0)
df['backfilled_MM_Wh'] = np.nan

def get_season_from_month(m):
    # Define the season
    
    if m in [6,7,8]:
        return 'Summer'
    elif m in [11,12,1,2,3]:
        return 'Winter'
    else:
        return 'Shoulder'

def predict_with_stored_params(row, day, season, results):
    # Manually calculate the predicted value.
    
    # 1) Retrieve the regression information for this (weekday, season)
    if day not in results or season not in results[day]:
        return None
    param_info = results[day][season]
    if 'params' not in param_info:
        return None

    params = param_info['params']  # Including 'const', 'D-3', 'D-2', ...
    # 2) Add up manually
    y_pred = params.get('const', 0)
    for var in ['D-3','D-2','D-1','D+1','D+2','D+3']:
        coef = params.get(var, 0)
        xval = row.get(var, 0)
        if xval ==0:
            return 0
        y_pred += coef * xval
    
    return y_pred

# 3) Loop backfill
for idx, row_data in df.iterrows():
    raw_val = row_data['raw_MM_Wh']
    if raw_val != 0:
        df.at[idx, 'backfilled_MM_Wh'] = raw_val
    else:
        dt = row_data['dateTimeStmp']
        day_name = dt.day_name()    # "Monday","Tuesday",...
        season = get_season_from_month(dt.month)
        y_pred = predict_with_stored_params(row_data, day_name, season, results)
        
        df.at[idx, 'backfilled_MM_Wh'] = y_pred if y_pred is not None else 0

# Save result
df.to_csv('Combined_data_backfilled.csv', index=False)
print("Backfill completed and saved to Combined_data_backfilled.csv")

Backfill completed and saved to Combined_data_backfilled.csv
