## Method - 1  
1. Use seasonality data from 5 years ago to predict.
2. Train the LinReg model for data from 5 years ago. 
3. Use known GDP to predict yearly demand for the year.

In [1]:
import pandas as pd

# The data is corrected for MWh here
data = pd.read_excel("data\hourly-revision-data\Total Yearly Demand.xlsx", index_col = 0)
data.head()

Unnamed: 0_level_0,Total Yearly Demand,GDP (in current billion US$)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2004,36482620.0,115.03357
2005,38020660.0,127.80781
2006,39359260.0,148.627255
2007,41734240.0,180.941741
2008,41307380.0,193.617346


In [2]:
from sklearn.linear_model import LinearRegression

# calculating MAPE without sklearn
import numpy as np

def MAPE(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Function defining the input seasonality
###############################################################################
############# WE ARE USING THE UPDATED SEASONALITY INDEXES ####################
###############################################################################
def input_seasonality(year):
    # Monthly seasonality
    monthly = pd.read_excel('data/hourly-revision-data/MonthlySI.xlsx', index_col=0)
    monthly = monthly[year]

    # Hourly seasonalit
    hourly = pd.read_excel('data/hourly-revision-data/HourlySI.xlsx', sheet_name=str(year), header=[0,1], index_col=0)

    return hourly, monthly

# function to make predictions
def prediction_with_gdp(year:int, series):
    # Linear Regression Fitting
    demandgdp = pd.read_excel("data\hourly-revision-data\Total Yearly Demand.xlsx", index_col=0)
    demandgdp.columns = ['demand','gdp']

    ######################################################################
    ############### USING DATA FOR LINREG FROM 5 YEARS AGO ###############    
    X_fit, y_fit = demandgdp.loc[2004:year-5].gdp, demandgdp.loc[2004:year-5].demand
    ######################################################################

    linreg = LinearRegression()
    linreg.fit(X_fit.values.reshape(-1,1),y_fit.values.reshape(-1,1))

    forecastyearly = int(linreg.predict([[demandgdp.loc[year].gdp]]))
    forecasted = [forecastyearly/(365*24) for i in range(365*24)]

    # Seasonality adjustment 
    ######################################################################
    ############# USING SEASONALTIY INDEXES FROM 5 YEARS AGO #############
    hourly, monthly = input_seasonality(year - 5)
    ######################################################################

    for i in range(len(series)):
        i_sh = hourly.xs([series.index[i].strftime('%B'), series.index[i].strftime('%A')], axis=1, level=[0,1]).loc[series.index[i].strftime('%H:%M')].values[0]
        i_sm = monthly.loc[series.index[i].strftime('%B')]
        forecasted[i] = forecasted[i] * i_sh * i_sm

    return pd.DataFrame({'Original': list(series.iloc[:365*24].value), 'Forecast': forecasted}, index=series.index[:365*24])# .to_excel(f'ModelRevision_GDPModel[Forecast {year}].xlsx')
    

In [3]:
from hourly_revision_utils_ts import hourlytimeseries
inputseries = hourlytimeseries(2018,2018,'system')
forecasts = prediction_with_gdp(2018, inputseries)

In [4]:
MAPE(forecasts['Original'], forecasts['Forecast'])

5.128668063170858

In [5]:
inputseries = hourlytimeseries(2017,2017,'system')
forecasts = prediction_with_gdp(2017, inputseries)
MAPE(forecasts['Original'], forecasts['Forecast'])

5.46777111595436

In [6]:
inputseries = hourlytimeseries(2019,2019,'system')
forecasts = prediction_with_gdp(2019, inputseries)
MAPE(forecasts['Original'], forecasts['Forecast'])

5.619257132250321

## Method - 2
1. Use seasonality data from 5 years ago to predict.
2. Use GDP data from 5 years ago to predict yearly demand for the next year.

Source- [here](https://www.imf.org/en/Publications/WEO/weo-database/2013/October/weo-report?c=576,&s=NGDPD,&sy=2004&ey=2018&ssm=0&scsm=1&scc=0&ssd=1&ssc=0&sic=0&sort=country&ds=.&br=1)

In [7]:
# function to make predictions
def prediction_with_forecasted_gdp(year:int, series, demandgdp):
    # Linear Regression Fitting
    demandgdp.columns = ['gdp','demand']

    ######################################################################
    ############### USING DATA FOR LINREG FROM 5 YEARS AGO ###############    
    X_fit, y_fit = demandgdp.loc[2004:year-5].gdp, demandgdp.loc[2004:year-5].demand
    ######################################################################

    linreg = LinearRegression()
    linreg.fit(X_fit.values.reshape(-1,1),y_fit.values.reshape(-1,1))

    # print model summary
    print(f'Intercept: {linreg.intercept_}')
    print(f'Coefficient: {linreg.coef_}')
    print(f'R^2: {linreg.score(X_fit.values.reshape(-1,1),y_fit.values.reshape(-1,1))}')
    print(f'R: {np.sqrt(linreg.score(X_fit.values.reshape(-1,1),y_fit.values.reshape(-1,1)))}')

    forecastyearly = int(linreg.predict([[demandgdp.loc[year].gdp]]))
    forecasted = [forecastyearly/(365*24) for i in range(365*24)]

    # Seasonality adjustment 
    ######################################################################
    ############# USING SEASONALTIY INDEXES FROM 5 YEARS AGO #############
    hourly, monthly = input_seasonality(year - 5)
    ######################################################################
    try:
        for i in range(len(series)):
            i_sh = hourly.xs([series.index[i].strftime('%B'), series.index[i].strftime('%A')], axis=1, level=[0,1]).loc[series.index[i].strftime('%H:%M')].values[0]
            i_sm = monthly.loc[series.index[i].strftime('%B')]
            forecasted[i] = forecasted[i] * i_sh * i_sm
    except:
        forecasted = [forecastyearly/(366*24) for i in range(366*24)]
        for i in range(len(series)):
            i_sh = hourly.xs([series.index[i].strftime('%B'), series.index[i].strftime('%A')], axis=1, level=[0,1]).loc[series.index[i].strftime('%H:%M')].values[0]
            i_sm = monthly.loc[series.index[i].strftime('%B')]
            forecasted[i] = forecasted[i] * i_sh * i_sm

    return pd.DataFrame({'Original': list(series.value), 'Forecast': forecasted}, index=series.index)# .to_excel(f'ModelRevision_GDPModel[Forecast {year}].xlsx')
    

2018 forecast from 2014 report

In [8]:
report2014 = pd.read_excel('data/Model Revision/WEO_Data_Oct14.xlsx')
report2014.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2014.dropna(inplace=True)
report2014 = report2014.transpose()
report2014.columns = ['GDP']
report2014['Energy'] = data['Total Yearly Demand'].loc[report2014.index]

In [9]:
inputseries = hourlytimeseries(2018,2018,'system')
forecasts = prediction_with_forecasted_gdp(2018, inputseries, report2014)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2018].xlsx')

Intercept: [30554988.95100892]
Coefficient: [[57839.63313991]]
R^2: 0.9793676726542911
R: 0.989630068588405
MAPE:  5.460854118686638


In [10]:
# import package
import statsmodels.formula.api as smf 

# fitting the model 
model = smf.ols(formula='demand ~ gdp', 
				data=report2014.loc[2004:2013]).fit() 

# model summary 
print(model.summary()) 

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.979
Model:                            OLS   Adj. R-squared:                  0.977
Method:                 Least Squares   F-statistic:                     379.7
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           5.00e-08
Time:                        11:09:41   Log-Likelihood:                -145.96
No. Observations:                  10   AIC:                             295.9
Df Residuals:                       8   BIC:                             296.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   3.055e+07   6.36e+05     48.020      0.0

2019 forecast from 2015 report

In [11]:
report2015 = pd.read_excel('data/Model Revision/WEO_Data_Oct15.xlsx')
report2015.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2015.dropna(inplace=True)
report2015 = report2015.transpose()
report2015.columns = ['GDP']
report2015['Energy'] = data['Total Yearly Demand'].loc[report2015.index]

inputseries = hourlytimeseries(2019,2019,'system')
forecasts = prediction_with_forecasted_gdp(2019, inputseries, report2015)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2019].xlsx')

Intercept: [30457047.3578641]
Coefficient: [[58397.36052253]]
R^2: 0.9797450493251547
R: 0.9898207157486424
MAPE:  5.584735278918744


In [12]:
# fitting the model 
model = smf.ols(formula='demand ~ gdp', 
				data=report2015.loc[2004:2014]).fit() 

# model summary 
print(model.summary()) 

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.980
Model:                            OLS   Adj. R-squared:                  0.977
Method:                 Least Squares   F-statistic:                     435.3
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           6.25e-09
Time:                        11:09:58   Log-Likelihood:                -161.38
No. Observations:                  11   AIC:                             326.8
Df Residuals:                       9   BIC:                             327.6
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   3.046e+07   6.31e+05     48.255      0.0

2017 forecast from 2013 report

In [13]:
report2013 = pd.read_excel('data/Model Revision/WEO_Data_Oct13.xlsx')
report2013.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2013.dropna(inplace=True)
report2013 = report2013.transpose()
report2013.columns = ['GDP']
report2013['Energy'] = data['Total Yearly Demand'].loc[report2013.index]

inputseries = hourlytimeseries(2017,2017,'system')
forecasts = prediction_with_forecasted_gdp(2017, inputseries, report2013)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2017].xlsx')

Intercept: [30085191.06341361]
Coefficient: [[61684.83248427]]
R^2: 0.9760928309762319
R: 0.9879741044056933
MAPE:  5.113402099973538


In [14]:
# fitting the model 
model = smf.ols(formula='demand ~ gdp', 
				data=report2013.loc[2004:2012]).fit() 

# model summary 
print(model.summary()) 

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.976
Model:                            OLS   Adj. R-squared:                  0.973
Method:                 Least Squares   F-statistic:                     285.8
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           6.21e-07
Time:                        11:10:17   Log-Likelihood:                -131.41
No. Observations:                   9   AIC:                             266.8
Df Residuals:                       7   BIC:                             267.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   3.009e+07   7.24e+05     41.576      0.0

2020 forecast from 2016 report

In [15]:
report2016 = pd.read_excel('data/Model Revision/WEO_Data_Oct16.xlsx')
report2016.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2016.dropna(inplace=True)
report2016 = report2016.transpose()
report2016.columns = ['GDP']
report2016['Energy'] = data['Total Yearly Demand'].loc[report2016.index]

inputseries = hourlytimeseries(2020,2020,'system')
forecasts = prediction_with_forecasted_gdp(2020, inputseries, report2016)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2020].xlsx')

Intercept: [29912407.34859338]
Coefficient: [[61859.2932725]]
R^2: 0.9632628646401199
R: 0.9814595583314271
MAPE:  6.0322491610035796


In [16]:
# fitting the model
model = smf.ols(formula='demand ~ gdp', 
                data=report2016.loc[2004:2015]).fit()

# model summary
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     262.2
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           1.67e-08
Time:                        11:10:38   Log-Likelihood:                -180.45
No. Observations:                  12   AIC:                             364.9
Df Residuals:                      10   BIC:                             365.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   2.991e+07   8.84e+05     33.838      0.0

2021 forecast from 2017 report

In [17]:
report2017 = pd.read_excel('data/Model Revision/WEO_Data_Oct17.xlsx')
report2017.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2017.dropna(inplace=True)
report2017 = report2017.transpose()
report2017.columns = ['GDP']
report2017['Energy'] = data['Total Yearly Demand'].loc[report2017.index]

inputseries = hourlytimeseries(2021,2021,'system')
forecasts = prediction_with_forecasted_gdp(2021, inputseries, report2017)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2021].xlsx')

Intercept: [29475457.39060476]
Coefficient: [[64578.4238463]]
R^2: 0.9453971965229523
R: 0.9723153791455488
MAPE:  6.868543754857469


In [18]:
# fitting the model
model = smf.ols(formula='demand ~ gdp', 
                data=report2017.loc[2004:2016]).fit()

# model summary
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.945
Model:                            OLS   Adj. R-squared:                  0.940
Method:                 Least Squares   F-statistic:                     190.5
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           2.73e-08
Time:                        11:10:53   Log-Likelihood:                -198.94
No. Observations:                  13   AIC:                             401.9
Df Residuals:                      11   BIC:                             403.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   2.948e+07   1.11e+06     26.478      0.0

2022 forecast from 2018 report

In [19]:
report2018 = pd.read_excel('data/Model Revision/WEO_Data_Oct18.xlsx')
report2018.drop(columns=['Country', 'Subject Descriptor', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After'], inplace=True)
report2018.dropna(inplace=True)
report2018 = report2018.transpose()
report2018.columns = ['GDP']
report2018['Energy'] = data['Total Yearly Demand'].loc[report2018.index[:-1]] # 2023 is not in the demand data

inputseries = hourlytimeseries(2022,2022,'system')
forecasts = prediction_with_forecasted_gdp(2022, inputseries, report2018)
print('MAPE: ',MAPE(forecasts['Original'], forecasts['Forecast']))
forecasts.to_excel('data/hourly-revision-data/Forecasts/GDPModel_Method2[Forecast 2022].xlsx')

Intercept: [29382914.51286875]
Coefficient: [[64794.70244437]]
R^2: 0.9611607243909346
R: 0.9803880478621384
MAPE:  4.349607126192993


In [20]:
# fitting the model
model = smf.ols(formula='demand ~ gdp', 
                data=report2018.loc[2004:2017]).fit()

# model summary
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 demand   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.958
Method:                 Least Squares   F-statistic:                     297.0
Date:                Fri, 29 Mar 2024   Prob (F-statistic):           7.88e-10
Time:                        11:11:08   Log-Likelihood:                -212.53
No. Observations:                  14   AIC:                             429.1
Df Residuals:                      12   BIC:                             430.3
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   2.938e+07    9.3e+05     31.583      0.0