In [1]:
import numpy as np
import pandas as pd


In [2]:
daily_data = pd.DataFrame(pd.read_csv("consumption_daily.csv",header=0, index_col=0, parse_dates=True, squeeze=True))


# Adding seasonality features

In [3]:
## Set weekends and holidays to 1, otherwise 0
daily_data['Atypical_Day'] = np.zeros(len(daily_data))
daily_data["Religious Holidays"] = np.zeros(len(daily_data))

# Weekends 5 saturday, 6 sunday
daily_data['Atypical_Day'][(daily_data.index.dayofweek==5)|(daily_data.index.dayofweek==6)] = 1

# Holidays
holidays = [str(x)+ a for a in ["-01-01","-04-23","-05-01","-05-19","-07-15","-08-30","-10-29"] for x in range(2016,2020)]
bayramlar = [
 '2016-07-04',
 '2016-07-05',
 '2016-07-06',
 '2016-07-07',
 '2016-09-11',
 '2016-09-12',
 '2016-09-13',
 '2016-09-14',
 '2016-09-15',
 '2016-12-31',
 '2017-06-24',
 '2017-06-25',
 '2017-06-26',
 '2017-06-27',
 '2017-08-31',
 '2017-09-01',
 '2017-09-02',
 '2017-09-03',
 '2017-09-04',
 '2017-12-31',
 '2018-06-14',
 '2018-06-15',
 '2018-06-16',
 '2018-06-17',
 '2018-08-20',
 '2018-08-21',
 '2018-08-22',
 '2018-08-23',
 '2018-08-24',
 '2018-12-31',
 '2019-06-04',
 '2019-06-05',
 '2019-06-06',
 '2019-06-07',
 '2019-08-10',
 '2019-08-11',
 '2019-08-12',
 '2019-08-13',
 '2019-08-14',
 '2019-12-31'
]

for gun,j in daily_data.iterrows():    
    if str(gun)[:10] in bayramlar:
        daily_data["Religious Holidays"].loc[gun] =1
        

for gun,j in daily_data.iterrows():    
    if str(gun)[:10] in holidays:
        daily_data["Atypical_Day"].loc[gun] =1
 

daily_data.head(5)

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-01-01,648689.21,1.0,0.0
2016-01-02,702608.51,1.0,0.0
2016-01-03,685453.07,1.0,0.0
2016-01-04,794904.71,0.0,0.0
2016-01-05,796749.92,0.0,0.0


In [4]:
# Saving the data with seasonality features to create different window sizes.
daily_data.to_csv("daily_data_seasonality.csv",index = True)

# Creating Window Features (7, 14, 21, and 28)

In [5]:
# Add historic usage to each X vector
daily_data = pd.DataFrame(pd.read_csv("daily_data_seasonality.csv",header=0, index_col=0, parse_dates=True, squeeze=True))
# Set number of days prediction is in advance
n_days_advance = 1

# Set number of historic days used
n_days_window = 7

# Adding corresponding columns for windows
for k in range(n_days_advance,n_days_advance+n_days_window):
    daily_data['Consumption_t-%i'% k] = np.zeros(len(daily_data['Consumption']))

# Adding historic consumption to corresponding columns
for i in range(n_days_advance+n_days_window,len(daily_data['Consumption'])):
    for j in range(n_days_advance,n_days_advance+n_days_window):
        daily_data['Consumption_t-%i'% j][i] = daily_data['Consumption'][i-j]


# Eliminating the data which has no window size.
daily_data = daily_data.iloc[n_days_advance+n_days_window:]
daily_data.to_csv("daily_data_seasonality_7.csv",index = True)
daily_data.head(3)

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays,Consumption_t-1,Consumption_t-2,Consumption_t-3,Consumption_t-4,Consumption_t-5,Consumption_t-6,Consumption_t-7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-09,748705.83,1.0,0.0,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07,702608.51
2016-01-10,675374.82,1.0,0.0,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07
2016-01-11,763341.77,0.0,0.0,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71


### Applying same steps for different window size according to statistical analysis (14, 21, and 28).

In [6]:
# Add historic usage to each X vector
daily_data = pd.DataFrame(pd.read_csv("daily_data_seasonality.csv",header=0, index_col=0, parse_dates=True, squeeze=True))
# Set number of days prediction is in advance
n_days_advance = 1

# Set number of historic days used
n_days_window = 14

   
for k in range(n_days_advance,n_days_advance+n_days_window):
    daily_data['Consumption_t-%i'% k] = np.zeros(len(daily_data['Consumption']))

    
for i in range(n_days_advance+n_days_window,len(daily_data['Consumption'])):
    for j in range(n_days_advance,n_days_advance+n_days_window):
        daily_data['Consumption_t-%i'% j][i] = daily_data['Consumption'][i-j]

daily_data = daily_data.iloc[n_days_advance+n_days_window:]
daily_data.to_csv("daily_data_seasonality_14.csv",index = True)
daily_data.head(3)

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays,Consumption_t-1,Consumption_t-2,Consumption_t-3,Consumption_t-4,Consumption_t-5,Consumption_t-6,Consumption_t-7,Consumption_t-8,Consumption_t-9,Consumption_t-10,Consumption_t-11,Consumption_t-12,Consumption_t-13,Consumption_t-14
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2016-01-16,746281.42,1.0,0.0,781958.01,784066.96,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07,702608.51
2016-01-17,663657.21,1.0,0.0,746281.42,781958.01,784066.96,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07
2016-01-18,780843.77,0.0,0.0,663657.21,746281.42,781958.01,784066.96,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71


In [7]:
# Add historic usage to each X vector
daily_data = pd.DataFrame(pd.read_csv("daily_data_seasonality.csv",header=0, index_col=0, parse_dates=True, squeeze=True))
# Set number of days prediction is in advance
n_days_advance = 1

# Set number of historic days used
n_days_window = 21


for k in range(n_days_advance,n_days_advance+n_days_window):
    
    daily_data['Consumption_t-%i'% k] = np.zeros(len(daily_data['Consumption']))

    
    
for i in range(n_days_advance+n_days_window,len(daily_data['Consumption'])):
    
    for j in range(n_days_advance,n_days_advance+n_days_window):
        
        daily_data['Consumption_t-%i'% j][i] = daily_data['Consumption'][i-j]
      

daily_data = daily_data.iloc[n_days_advance+n_days_window:]
daily_data.to_csv("daily_data_seasonality_21.csv",index = True)
daily_data.head(3)

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays,Consumption_t-1,Consumption_t-2,Consumption_t-3,Consumption_t-4,Consumption_t-5,Consumption_t-6,Consumption_t-7,...,Consumption_t-12,Consumption_t-13,Consumption_t-14,Consumption_t-15,Consumption_t-16,Consumption_t-17,Consumption_t-18,Consumption_t-19,Consumption_t-20,Consumption_t-21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-23,782900.57,1.0,0.0,812696.47,818856.4,819194.49,809196.68,780843.77,663657.21,746281.42,...,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07,702608.51
2016-01-24,696940.47,1.0,0.0,782900.57,812696.47,818856.4,819194.49,809196.68,780843.77,663657.21,...,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07
2016-01-25,813896.19,0.0,0.0,696940.47,782900.57,812696.47,818856.4,819194.49,809196.68,780843.77,...,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71


In [8]:
# Add historic usage to each X vector
daily_data = pd.DataFrame(pd.read_csv("daily_data_seasonality.csv",header=0, index_col=0, parse_dates=True, squeeze=True))
# Set number of days prediction is in advance
n_days_advance = 1

# Set number of historic days used
n_days_window = 28


for k in range(n_days_advance,n_days_advance+n_days_window):
    
    daily_data['Consumption_t-%i'% k] = np.zeros(len(daily_data['Consumption']))

    
    
for i in range(n_days_advance+n_days_window,len(daily_data['Consumption'])):
    
    for j in range(n_days_advance,n_days_advance+n_days_window):
        
        daily_data['Consumption_t-%i'% j][i] = daily_data['Consumption'][i-j]
      

daily_data = daily_data.iloc[n_days_advance+n_days_window:]
daily_data.to_csv("daily_data_seasonality_28.csv",index = True)
daily_data.head(3)

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays,Consumption_t-1,Consumption_t-2,Consumption_t-3,Consumption_t-4,Consumption_t-5,Consumption_t-6,Consumption_t-7,...,Consumption_t-19,Consumption_t-20,Consumption_t-21,Consumption_t-22,Consumption_t-23,Consumption_t-24,Consumption_t-25,Consumption_t-26,Consumption_t-27,Consumption_t-28
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-30,758549.68,1.0,0.0,806796.85,829661.57,829632.01,838403.67,813896.19,696940.47,782900.57,...,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07,702608.51
2016-01-31,662382.97,1.0,0.0,758549.68,806796.85,829661.57,829632.01,838403.67,813896.19,696940.47,...,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07
2016-02-01,763070.55,0.0,0.0,662382.97,758549.68,806796.85,829661.57,829632.01,838403.67,813896.19,...,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71


# 4 datasets are created.
## 7-days window, 14-days window, 21-days window, and 28-days window.

In [9]:
daily_data.head()

Unnamed: 0_level_0,Consumption,Atypical_Day,Religious Holidays,Consumption_t-1,Consumption_t-2,Consumption_t-3,Consumption_t-4,Consumption_t-5,Consumption_t-6,Consumption_t-7,...,Consumption_t-19,Consumption_t-20,Consumption_t-21,Consumption_t-22,Consumption_t-23,Consumption_t-24,Consumption_t-25,Consumption_t-26,Consumption_t-27,Consumption_t-28
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-30,758549.68,1.0,0.0,806796.85,829661.57,829632.01,838403.67,813896.19,696940.47,782900.57,...,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07,702608.51
2016-01-31,662382.97,1.0,0.0,758549.68,806796.85,829661.57,829632.01,838403.67,813896.19,696940.47,...,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71,685453.07
2016-02-01,763070.55,0.0,0.0,662382.97,758549.68,806796.85,829661.57,829632.01,838403.67,813896.19,...,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92,794904.71
2016-02-02,762948.04,0.0,0.0,763070.55,662382.97,758549.68,806796.85,829661.57,829632.01,838403.67,...,784066.96,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21,796749.92
2016-02-03,758355.09,0.0,0.0,762948.04,763070.55,662382.97,758549.68,806796.85,829661.57,829632.01,...,781958.01,784066.96,774584.07,764872.77,763341.77,675374.82,748705.83,775536.44,780834.48,783228.21
