## Model Revision

We try to update our ES model here.  

*Key changes*:
- We update the level and trend once a day, instead of once an hour. This is to reduce the number of updates and make the model more robust.
- We use the previous time-period's seasonality indices instead of the average ones. This is to make the model more robust to the recent changes in the data.

*Note*- 
-  One year forecasts are preferred here since the updation stops for the new data. 
- $ \alpha $ and $ \beta $ values are to be found again for the new model.

In [None]:
# seasonalities are to be taken from the previous period, but how?
# since seasonality calculation requires a bigger time frame data
# for example, daily seasonality is fixed for Mondays of a month
# hourly seasonality is fixed for Mondays of a month

# data\New SI Data\S.I. Hour of the Day.xlsx
# data\New SI Data\S.I. Day of the Week.xlsx

# using scaled ones now as they were already calculated
# will update them to unscaled ones later

## Data Preprocessing

to get the data in daily format

In [1]:
def filepath(year:int,demandtype:str):
    """
    Works for the following file structure-

    ├── yourfile
    ├── data/
    │ ├── Yearly Energy Demand Data/
    | | ├── System Demand (Actual)/
    | │ └── NEM Demand (Actual)
    | | └── NEM Demand (Forecast)

    demandtype = ['system','nem_actual','nem_forecast']
    """
    typelist = ['system','nem_actual','nem_forecast']
    if demandtype == typelist[0]:
        return "data/Yearly Energy Demand Data/System Demand (Actual)/" + f"{year}.xlsx"
    elif demandtype == typelist[1]:
        return "data/Yearly Energy Demand Data/NEM Demand (Actual)/" + f"{year}[nem_actual].xlsx"
    elif demandtype == typelist[2]:
        return "data/Yearly Energy Demand Data/NEM Demand (Forecast)/" + f"{year}[nem_forecast].xlsx"
    else:
        pass

def dailytimeseries(startyear:int, endyear:int, demandtype:str):
    """
    works when filepath function is supported for the file

    typelist = ['system','nem_actual','nem_forecast']
    """
    import warnings
    warnings.filterwarnings("ignore")
    import pandas as pd
    
    # collect all years of data in one dataframe
    bigdf = pd.read_excel(filepath(startyear-1,demandtype), index_col=0)
    for year in range(startyear,endyear+1):
        temp_df = pd.read_excel(filepath(year,demandtype), index_col=0)
        bigdf = bigdf.merge(temp_df, left_index=True, right_index=True)

    tempdf = bigdf.copy()
    tempdf = tempdf.sum(axis=0)
    tempdf.index = pd.to_datetime([i[:-4] for i in tempdf.index], dayfirst=True)
    tempdf = tempdf[tempdf.index.year >= startyear]
    tempdf = tempdf[tempdf.index.year <= endyear]

    return tempdf

def hourlytimeseries(startyear:int, endyear:int, demandtype:str):
    """
    works when filepath function is supported for the file

    typelist = ['system','nem_actual','nem_forecast']
    """
    import warnings
    warnings.filterwarnings("ignore")

    import numpy as np
    import pandas as pd
    import datetime as dt
    # collect all years of data in one dataframe
    bigdf = pd.read_excel(filepath(startyear-1,demandtype), index_col=0)
    for year in range(startyear,endyear+1):
        temp_df = pd.read_excel(filepath(year,demandtype), index_col=0)
        bigdf = bigdf.merge(temp_df, left_index=True, right_index=True)

    # turn it into a timeseries data with proper date and time index
    datelist = []
    delta = dt.timedelta(minutes=30) # to fix half-hourly slots, to make them start at 00:00 and end at 23:30 
    for i in bigdf.columns:
        for j in bigdf.index:
            j_mod = dt.datetime.strptime(j,"%H:%M") - delta
            j_mod = dt.datetime.strftime(j_mod," %H:%M")
            datelist.append(i[:11]+j_mod)

    bigdf = bigdf.melt()
    bigdf['variable'] = pd.to_datetime(datelist, dayfirst=True)
    bigdf.set_index('variable', inplace=True)
    bigdf = bigdf[bigdf.index.year >= startyear]
    bigdf = bigdf[bigdf.index.year <= endyear]

    # turning it into hourly data
    tempdf = bigdf.copy()
    drop_list = []
    for i in range(len(tempdf)):
        if i%2 == 0:
            tempdf.iloc[i] = (tempdf.iloc[i] + tempdf.iloc[i+1])
        else:
            drop_list.append(tempdf.index[i])
    tempdf.drop(drop_list,inplace=True)

    return tempdf

## Seasonalities

In [38]:
import pandas as pd

#WE ADD ALL THE DATA FROM YEARS INTO ONE BIG DATAFRAME
#UPDATE YOUR FILE PATH ACCORDINGLY
df = pd.read_excel(filepath(2004,'system'), index_col=0)
year = 2004
for i in range(1,19):
    temp_df = pd.read_excel(filepath(year + i,'system'), index_col=0)
    df = df.merge(temp_df, left_index=True, right_index=True)
df.columns = pd.to_datetime(df.columns, dayfirst=True) # converting into datetime format
df.drop(df.columns[-1], axis=1, inplace=True) # removing Jan 01 2023 column
hourlyall = df.copy() # DataFrame with half-hourly values

# changing data from half-hourly to hourly by adding consecutive half-hour data
drop_list = []
for i in range(48):
    if i%2 == 1:
        hourlyall.iloc[i] = (hourlyall.iloc[i] + hourlyall.iloc[i-1])
    else:
        drop_list.append(hourlyall.index[i])
hourlyall.drop(drop_list,inplace=True)
hourlyall.head()

dfdict = {}
for year in hourlyall.columns.year.unique():
    for month in hourlyall.columns.month_name().unique():
        for day in hourlyall.columns.day_name().unique():
            dfdict[(year,month,day)] = []
            for hour in hourlyall.index:
                # average hourly demand for a gicen day of the week, month and year (divided by S.I. of the month and day of the week)
                hourly_avg = hourlyall[hourlyall.columns[hourlyall.columns.strftime("%Y%B%A") == str(year)+month+day]].loc[hour].mean() 
                dfdict[(year,month,day)].append(hourly_avg)
            # S.I. for the hour of the day (for particular day of the week, month and year)
            dfdict[(year,month,day)] = dfdict[(year,month,day)] / (sum(dfdict[(year,month,day)])/len(dfdict[(year,month,day)]))
hourlydf = pd.DataFrame(dfdict, index=hourlyall.index)

for year in range(2004,2023):
    year_temp = hourlydf.xs(year, axis=1, level=0)
    try:
        with pd.ExcelWriter('data/Model Revision/HourlySI.xlsx', mode='a', engine='openpyxl') as writer:
            year_temp.to_excel(writer, sheet_name=str(year))
    except:
        year_temp.to_excel('data/Model Revision/HourlySI.xlsx', sheet_name=str(year))

In [21]:
def input_seasonality(date):
    import pandas as pd
    
    year = date.year
    month = date.strftime("%B")
    day = date.strftime("%A")
    hour = date.hour

    months = {'January':'December', 'February':'January', 'March':'February', 'April':'March', 'May':'April', 'June':'May', 'July':'June', 'August':'July', 'September':'August', 'October':'September', 'November':'October', 'December':'November'}

    # Monthly seasonality
    monthly = pd.read_excel('data/Model Revision/MonthlySI.xlsx', index_col=0)
    monthly = monthly[year-1]
    monthly = monthly[monthly.index == month]

    # Hourly seasonality
    if month == 'January':
        hourly = pd.read_excel('data/Model Revision/HourlySI.xlsx', sheet_name=str(year-1), header=[0,1], index_col=0).xs(months[month], axis=1, level=0)
    else:
        hourly = pd.read_excel('data/Model Revision/HourlySI.xlsx', sheet_name=str(year), header=[0,1], index_col=0).xs(months[month], axis=1, level=0)
    hourly = hourly[day]

    return hourly, monthly
    

In [22]:
def updated_es_model(series,
          alpha:float,
          beta:float,
          future_steps: 365):
      import numpy as np
      import pandas as pd

      forecasts = []
      index = []

      # input data we have is now daily since we are updating L and T daily
      l_t1 = np.mean(series.iloc[:365])
      t_t1 = 0

      for t in range(len(series)):
            i_sh, i_sm = input_seasonality(series.index[t])
            i_sm = i_sm.values[0]

            l_t = alpha * (series.values[t]/i_sm) + (1 - alpha) * (l_t1 + t_t1)
            t_t = beta * (l_t - l_t1) + (1 - beta) * t_t1

            forecast_t = (l_t1 + t_t1) * i_sm
            l_t1 = l_t
            t_t1 = t_t
            daily_forecast = forecast_t
            hourly_forecast = daily_forecast / 24

            for i in range(24):
                  forecasts.append(hourly_forecast * i_sh[i])
                  index.append(series.index[t] + pd.Timedelta(hours=i))

      return pd.DataFrame(forecasts, index=index, columns=['Forecast'])

In [23]:
inputseries = dailytimeseries(2013,2018,'system')
testseries = hourlytimeseries(2018,2018,'system')

In [24]:
forecasts = updated_es_model(inputseries, alpha=0.19, beta=0.88, future_steps=365)

In [25]:
forecasts

Unnamed: 0,Forecast
2013-01-01 00:00:00,9174.155594
2013-01-01 01:00:00,8836.648941
2013-01-01 02:00:00,8610.700844
2013-01-01 03:00:00,8467.302411
2013-01-01 04:00:00,8418.982350
...,...
2018-12-31 19:00:00,12540.326124
2018-12-31 20:00:00,12334.361553
2018-12-31 21:00:00,12089.360632
2018-12-31 22:00:00,11533.521303


In [26]:
from sklearn.metrics import mean_absolute_percentage_error

mean_absolute_percentage_error(testseries, forecasts['Forecast'].iloc[-365*24:])

0.0501677336816102

In [35]:
# import pandas as pd

# pd.DataFrame({'Actual':list(testseries.value.iloc[-365*24:]),'Forecast':list(forecasts['Forecast'].iloc[-365*24:])}, index=testseries.index[-365*24:]).to_excel('data/Model Revision/ES_Forecast_2018.xlsx')

with previous year seasonalities for both monthly and daily seasonality, the MAPE is 0.049 (with previously calculated alpha and beta values)

with daily seasonalities from previous month- MAPE is 0.05

PREVIOUS YEAR FORECAST CORRECT MAPE - gets higher, so let's not report them now