In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import json
import plotly.graph_objects as go
import plotly.express as px
import warnings

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [2]:
start_date = "2016-01-01"
end_date = datetime.date.today()

In [3]:
url="https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate="+f'{start_date}'+"&endDate="+f'{end_date}'

In [4]:
response = requests.get(url, verify=False)
json_data = json.loads(response.text.encode("utf8"))
df = pd.DataFrame(json_data["body"]["hourlyConsumptions"]).iloc[:-1]
df["date"] = pd.to_datetime(df.date.str[:16])

In [5]:
df

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
57616,2022-07-28 16:00:00,49440.12
57617,2022-07-28 17:00:00,48091.66
57618,2022-07-28 18:00:00,45718.99
57619,2022-07-28 19:00:00,41793.88


In [6]:
fh_new = 24*7+1
date = pd.date_range(start=df.date.tail(1).iloc[0],periods=fh_new,freq="H", name = "date")
date = pd.DataFrame(date)
df_fe = pd.merge(df, date, how= "outer")

In [7]:
fh_new

169

In [8]:
date

Unnamed: 0,date
0,2022-07-28 20:00:00
1,2022-07-28 21:00:00
2,2022-07-28 22:00:00
3,2022-07-28 23:00:00
4,2022-07-29 00:00:00
5,2022-07-29 01:00:00
6,2022-07-29 02:00:00
7,2022-07-29 03:00:00
8,2022-07-29 04:00:00
9,2022-07-29 05:00:00


In [9]:
df_fe.tail(169)

Unnamed: 0,date,consumption
57620,2022-07-28 20:00:00,42830.3
57621,2022-07-28 21:00:00,
57622,2022-07-28 22:00:00,
57623,2022-07-28 23:00:00,
57624,2022-07-29 00:00:00,
57625,2022-07-29 01:00:00,
57626,2022-07-29 02:00:00,
57627,2022-07-29 03:00:00,
57628,2022-07-29 04:00:00,
57629,2022-07-29 05:00:00,


In [10]:
def rolling_features(df,fh):
    df_c=df.copy()
    rolling_windows=[fh,fh+3,fh+10,fh+15,fh+20,fh+25]
    lags=[fh,fh+5,fh+10,fh+15,fh+20,fh+30]
    for a in rolling_windows:
        df_c['rolling_mean_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).mean().shift(1)
        df_c['rolling_std_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).std().shift(1)
        df_c['rolling_min_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).min().shift(1)
        df_c['rolling_max_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).max().shift(1)
        df_c['rolling_var_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).var().shift(1)
    for l in lags:
        df_c['consumption_lag_'+str(l)]=df_c['consumption'].shift(l)
    return(df_c)


In [11]:
df_fe = rolling_features(df_fe, fh_new)

In [12]:
df_fe

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57784,2022-08-04 16:00:00,,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47469.272222,3290.601444,41793.88,50883.08,1.082806e+07,45232.583125,5235.342286,35257.64,50883.08,2.740881e+07,43755.770952,5358.226474,35257.64,50883.08,2.871059e+07,44125.601538,4876.102166,35257.64,50883.08,2.377637e+07,44897.481935,4842.842742,35257.64,50883.08,2.345313e+07,50189.41,48062.71,35959.59,41975.95,45520.68,46785.73
57785,2022-08-04 17:00:00,,45574.990000,3283.432268,41793.88,49440.12,1.078093e+07,47346.223750,3495.595519,41793.88,50883.08,1.221919e+07,45850.782667,4776.513756,35257.64,50883.08,2.281508e+07,43844.762000,5481.479190,35257.64,50883.08,3.004661e+07,44069.798400,4968.170415,35257.64,50883.08,2.468272e+07,44705.023000,4803.540949,35257.64,50883.08,2.307401e+07,49440.12,49651.82,35257.64,40097.62,46771.03,48247.74
57786,2022-08-04 18:00:00,,44608.707500,2854.801361,41793.88,48091.66,8.149891e+06,46992.491429,3617.716340,41793.88,50883.08,1.308787e+07,46607.435714,3914.285139,37402.46,50883.08,1.532163e+07,44041.980000,5558.305053,35257.64,50883.08,3.089476e+07,43957.247083,5042.362004,35257.64,50883.08,2.542541e+07,44518.387931,4776.586345,35257.64,50883.08,2.281578e+07,48091.66,48453.66,37402.46,38591.91,46540.75,49230.05
57787,2022-08-04 19:00:00,,43447.723333,2034.092134,41793.88,45718.99,4.137531e+06,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47315.510769,2999.052272,41793.88,50883.08,8.994315e+06,44344.761667,5555.877370,35257.64,50883.08,3.086777e+07,43844.920870,5124.894025,35257.64,50883.08,2.626454e+07,44341.158214,4766.148133,35257.64,50883.08,2.271617e+07,45718.99,49822.35,43485.08,37482.33,45699.87,48142.68


In [13]:
def date_features(df) :
    df_c = df.copy()
    df_c["month"] = df_c["date"].dt.month
    df_c["year"] = df_c["date"].dt.year
    df_c["hour"] = df_c["date"].dt.hour
    df_c["quarter"] = df_c["date"].dt.quarter
    df_c["dayofweek"] = df_c["date"].dt.dayofweek
    df_c["dayofyear"] = df_c["date"].dt.dayofyear
    df_c["dayofmounth"] = df_c["date"].dt.day
    df_c["weekofyear"] = df_c["date"].dt.weekofyear
    return(df_c)


In [14]:
df_fe = date_features(df_fe)

In [15]:
df_fe.head()

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2016,0,1,4,1,1,53
1,2016-01-01 01:00:00,24991.82,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,,,,,,,1,2016,1,1,4,1,1,53
2,2016-01-01 02:00:00,23532.61,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,,,,,,,1,2016,2,1,4,1,1,53
3,2016-01-01 03:00:00,22464.78,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,,,,,,,1,2016,3,1,4,1,1,53
4,2016-01-01 04:00:00,22002.91,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,,,,,,,1,2016,4,1,4,1,1,53


In [16]:
df_fe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57789 entries, 0 to 57788
Data columns (total 46 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   date                 57789 non-null  datetime64[ns]
 1   consumption          57621 non-null  float64       
 2   rolling_mean_169     57788 non-null  float64       
 3   rolling_std_169      57787 non-null  float64       
 4   rolling_min_169      57788 non-null  float64       
 5   rolling_max_169      57788 non-null  float64       
 6   rolling_var_169      57787 non-null  float64       
 7   rolling_mean_172     57788 non-null  float64       
 8   rolling_std_172      57787 non-null  float64       
 9   rolling_min_172      57788 non-null  float64       
 10  rolling_max_172      57788 non-null  float64       
 11  rolling_var_172      57787 non-null  float64       
 12  rolling_mean_179     57788 non-null  float64       
 13  rolling_std_179      57787 non-

In [17]:
fh_new

169

In [18]:
df_fe[fh_new+30:]

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
199,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
200,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
201,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
202,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
203,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57784,2022-08-04 16:00:00,,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47469.272222,3290.601444,41793.88,50883.08,1.082806e+07,45232.583125,5235.342286,35257.64,50883.08,2.740881e+07,43755.770952,5358.226474,35257.64,50883.08,2.871059e+07,44125.601538,4876.102166,35257.64,50883.08,2.377637e+07,44897.481935,4842.842742,35257.64,50883.08,2.345313e+07,50189.41,48062.71,35959.59,41975.95,45520.68,46785.73,8,2022,16,3,3,216,4,31
57785,2022-08-04 17:00:00,,45574.990000,3283.432268,41793.88,49440.12,1.078093e+07,47346.223750,3495.595519,41793.88,50883.08,1.221919e+07,45850.782667,4776.513756,35257.64,50883.08,2.281508e+07,43844.762000,5481.479190,35257.64,50883.08,3.004661e+07,44069.798400,4968.170415,35257.64,50883.08,2.468272e+07,44705.023000,4803.540949,35257.64,50883.08,2.307401e+07,49440.12,49651.82,35257.64,40097.62,46771.03,48247.74,8,2022,17,3,3,216,4,31
57786,2022-08-04 18:00:00,,44608.707500,2854.801361,41793.88,48091.66,8.149891e+06,46992.491429,3617.716340,41793.88,50883.08,1.308787e+07,46607.435714,3914.285139,37402.46,50883.08,1.532163e+07,44041.980000,5558.305053,35257.64,50883.08,3.089476e+07,43957.247083,5042.362004,35257.64,50883.08,2.542541e+07,44518.387931,4776.586345,35257.64,50883.08,2.281578e+07,48091.66,48453.66,37402.46,38591.91,46540.75,49230.05,8,2022,18,3,3,216,4,31
57787,2022-08-04 19:00:00,,43447.723333,2034.092134,41793.88,45718.99,4.137531e+06,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47315.510769,2999.052272,41793.88,50883.08,8.994315e+06,44344.761667,5555.877370,35257.64,50883.08,3.086777e+07,43844.920870,5124.894025,35257.64,50883.08,2.626454e+07,44341.158214,4766.148133,35257.64,50883.08,2.271617e+07,45718.99,49822.35,43485.08,37482.33,45699.87,48142.68,8,2022,19,3,3,216,4,31


In [19]:
df_fe = df_fe[fh_new+30:].reset_index(drop=True)

In [20]:
df_fe.isnull().sum()

date                     0
consumption            168
rolling_mean_169         0
rolling_std_169          0
rolling_min_169          0
rolling_max_169          0
rolling_var_169          0
rolling_mean_172         0
rolling_std_172          0
rolling_min_172          0
rolling_max_172          0
rolling_var_172          0
rolling_mean_179         0
rolling_std_179          0
rolling_min_179          0
rolling_max_179          0
rolling_var_179          0
rolling_mean_184         0
rolling_std_184          0
rolling_min_184          0
rolling_max_184          0
rolling_var_184          0
rolling_mean_189         0
rolling_std_189          0
rolling_min_189          0
rolling_max_189          0
rolling_var_189          0
rolling_mean_194         0
rolling_std_194          0
rolling_min_194          0
rolling_max_194          0
rolling_var_194          0
consumption_lag_169      0
consumption_lag_174      0
consumption_lag_179      0
consumption_lag_184      0
consumption_lag_189      0
c

In [21]:
df_fe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57590 entries, 0 to 57589
Data columns (total 46 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   date                 57590 non-null  datetime64[ns]
 1   consumption          57422 non-null  float64       
 2   rolling_mean_169     57590 non-null  float64       
 3   rolling_std_169      57590 non-null  float64       
 4   rolling_min_169      57590 non-null  float64       
 5   rolling_max_169      57590 non-null  float64       
 6   rolling_var_169      57590 non-null  float64       
 7   rolling_mean_172     57590 non-null  float64       
 8   rolling_std_172      57590 non-null  float64       
 9   rolling_min_172      57590 non-null  float64       
 10  rolling_max_172      57590 non-null  float64       
 11  rolling_var_172      57590 non-null  float64       
 12  rolling_mean_179     57590 non-null  float64       
 13  rolling_std_179      57590 non-

In [22]:
df_fe

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57585,2022-08-04 16:00:00,,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47469.272222,3290.601444,41793.88,50883.08,1.082806e+07,45232.583125,5235.342286,35257.64,50883.08,2.740881e+07,43755.770952,5358.226474,35257.64,50883.08,2.871059e+07,44125.601538,4876.102166,35257.64,50883.08,2.377637e+07,44897.481935,4842.842742,35257.64,50883.08,2.345313e+07,50189.41,48062.71,35959.59,41975.95,45520.68,46785.73,8,2022,16,3,3,216,4,31
57586,2022-08-04 17:00:00,,45574.990000,3283.432268,41793.88,49440.12,1.078093e+07,47346.223750,3495.595519,41793.88,50883.08,1.221919e+07,45850.782667,4776.513756,35257.64,50883.08,2.281508e+07,43844.762000,5481.479190,35257.64,50883.08,3.004661e+07,44069.798400,4968.170415,35257.64,50883.08,2.468272e+07,44705.023000,4803.540949,35257.64,50883.08,2.307401e+07,49440.12,49651.82,35257.64,40097.62,46771.03,48247.74,8,2022,17,3,3,216,4,31
57587,2022-08-04 18:00:00,,44608.707500,2854.801361,41793.88,48091.66,8.149891e+06,46992.491429,3617.716340,41793.88,50883.08,1.308787e+07,46607.435714,3914.285139,37402.46,50883.08,1.532163e+07,44041.980000,5558.305053,35257.64,50883.08,3.089476e+07,43957.247083,5042.362004,35257.64,50883.08,2.542541e+07,44518.387931,4776.586345,35257.64,50883.08,2.281578e+07,48091.66,48453.66,37402.46,38591.91,46540.75,49230.05,8,2022,18,3,3,216,4,31
57588,2022-08-04 19:00:00,,43447.723333,2034.092134,41793.88,45718.99,4.137531e+06,46344.060000,3489.062045,41793.88,50189.41,1.217355e+07,47315.510769,2999.052272,41793.88,50883.08,8.994315e+06,44344.761667,5555.877370,35257.64,50883.08,3.086777e+07,43844.920870,5124.894025,35257.64,50883.08,2.626454e+07,44341.158214,4766.148133,35257.64,50883.08,2.271617e+07,45718.99,49822.35,43485.08,37482.33,45699.87,48142.68,8,2022,19,3,3,216,4,31


In [23]:
split_date = df_fe.date.tail(fh_new).iloc[0]
historical = df_fe.loc[df_fe.date <= split_date]
y = historical[["date", "consumption"]].set_index("date")
X = historical.drop("consumption", axis=1).set_index("date")
forecast_df = df_fe.loc[df_fe.date > split_date].set_index("date").drop("consumption", axis=1)

In [24]:
forecast_df

Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2022-07-28 21:00:00,42067.615266,5228.640426,31035.25,50883.08,27338680.0,42114.701047,5196.942962,31035.25,50883.08,27008220.0,42323.210056,5200.668127,31035.25,50883.08,27046950.0,42221.719511,5217.255607,31035.25,50883.08,27219760.0,42102.272063,5206.2715,31035.25,50883.08,27105260.0,42145.656649,5147.189972,31035.25,50883.08,26493560.0,45010.1,48173.87,46284.45,34597.54,40552.98,48379.74,7,2022,21,3,3,209,28,30
2022-07-28 22:00:00,42050.100476,5239.29682,31035.25,50883.08,27450230.0,42089.770175,5201.879741,31035.25,50883.08,27059550.0,42300.955899,5206.784655,31035.25,50883.08,27110610.0,42263.381694,5200.786926,31035.25,50883.08,27048180.0,42110.512979,5218.937317,31035.25,50883.08,27237310.0,42138.056839,5159.485312,31035.25,50883.08,26620290.0,45086.03,47663.85,47498.88,34117.04,38684.14,47836.32,7,2022,22,3,3,209,28,30
2022-07-28 23:00:00,42031.921257,5249.736968,31035.25,50883.08,27559740.0,42077.239647,5214.65821,31035.25,50883.08,27192660.0,42271.589096,5206.75167,31035.25,50883.08,27110260.0,42308.141813,5179.668121,31035.25,50883.08,26828960.0,42128.835829,5226.881276,31035.25,50883.08,27320290.0,42124.611823,5169.583247,31035.25,50883.08,26724590.0,43904.04,46377.88,46305.11,36586.72,37349.29,47183.49,7,2022,23,3,3,209,28,30
2022-07-29 00:00:00,42020.643434,5263.591702,31035.25,50883.08,27705400.0,42067.615266,5228.640426,31035.25,50883.08,27338680.0,42248.671364,5212.646462,31035.25,50883.08,27171680.0,42339.751878,5176.402961,31035.25,50883.08,26795150.0,42154.532312,5229.131843,31035.25,50883.08,27343820.0,42111.685497,5180.057585,31035.25,50883.08,26833000.0,42530.89,44219.96,47531.63,42347.67,36243.21,46158.74,7,2022,0,3,4,210,29,30
2022-07-29 01:00:00,42017.55103,5279.463592,31035.25,50883.08,27872740.0,42050.100476,5239.29682,31035.25,50883.08,27450230.0,42218.483029,5212.151702,31035.25,50883.08,27166530.0,42339.707889,5190.842015,31035.25,50883.08,26944840.0,42186.485405,5225.082718,31035.25,50883.08,27301490.0,42103.511368,5192.508128,31035.25,50883.08,26962140.0,40683.37,43703.76,48668.23,45292.82,35703.41,44081.07,7,2022,1,3,4,210,29,30
2022-07-29 02:00:00,42025.68628,5294.596031,31035.25,50883.08,28032750.0,42031.921257,5249.736968,31035.25,50883.08,27559740.0,42181.415517,5204.009738,31035.25,50883.08,27081720.0,42323.210056,5200.668127,31035.25,50883.08,27046950.0,42221.719511,5217.255607,31035.25,50883.08,27219760.0,42102.272063,5206.2715,31035.25,50883.08,27105260.0,39223.69,45010.1,48173.87,46284.45,34597.54,43612.42,7,2022,2,3,4,210,29,30
2022-07-29 03:00:00,42042.876442,5306.319568,31035.25,50883.08,28157030.0,42020.643434,5263.591702,31035.25,50883.08,27705400.0,42146.777052,5198.960105,31035.25,50883.08,27029190.0,42300.955899,5206.784655,31035.25,50883.08,27110610.0,42263.381694,5200.786926,31035.25,50883.08,27048180.0,42110.512979,5218.937317,31035.25,50883.08,27237310.0,37739.01,45086.03,47663.85,47498.88,34117.04,44719.5,7,2022,3,3,4,210,29,30
2022-07-29 04:00:00,42069.443519,5311.888006,31035.25,50883.08,28216150.0,42017.55103,5279.463592,31035.25,50883.08,27872740.0,42114.701047,5196.942962,31035.25,50883.08,27008220.0,42271.589096,5206.75167,31035.25,50883.08,27110260.0,42308.141813,5179.668121,31035.25,50883.08,26828960.0,42128.835829,5226.881276,31035.25,50883.08,27320290.0,36225.51,43904.04,46377.88,46305.11,36586.72,44593.54,7,2022,4,3,4,210,29,30
2022-07-29 05:00:00,42105.741242,5308.270165,31035.25,50883.08,28177730.0,42025.68628,5294.596031,31035.25,50883.08,28032750.0,42089.770175,5201.879741,31035.25,50883.08,27059550.0,42248.671364,5212.646462,31035.25,50883.08,27171680.0,42339.751878,5176.402961,31035.25,50883.08,26795150.0,42154.532312,5229.131843,31035.25,50883.08,27343820.0,35731.49,42530.89,44219.96,47531.63,42347.67,43664.77,7,2022,5,3,4,210,29,30
2022-07-29 06:00:00,42145.580312,5300.736937,31035.25,50883.08,28097810.0,42042.876442,5306.319568,31035.25,50883.08,28157030.0,42077.239647,5214.65821,31035.25,50883.08,27192660.0,42218.483029,5212.151702,31035.25,50883.08,27166530.0,42339.707889,5190.842015,31035.25,50883.08,26944840.0,42186.485405,5225.082718,31035.25,50883.08,27301490.0,34459.18,40683.37,43703.76,48668.23,45292.82,42337.74,7,2022,6,3,4,210,29,30


In [25]:
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from catboost import CatBoostRegressor

In [26]:
tscv = TimeSeriesSplit(n_splits=3, test_size=fh_new*20)
score_list=[]
fold=1
unseen_preds=[]
importance=[]

for train_index, test_index in tscv.split(X,y):
    X_train,X_val = X.iloc[train_index],X.iloc[test_index]
    y_train,y_val = y.iloc[train_index],y.iloc[test_index]
    print(X_train.shape,X_val.shape)

    cat = CatBoostRegressor(iterations=1000, eval_metric="MAE", allow_writing_files=False)
    cat.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=150, verbose=50)

    forecast_predict = cat.predict(forecast_df)
    unseen_preds.append(forecast_predict)
    score = mean_absolute_error(y_val, cat.predict(X_val))
    print(f"MAE FOLD--{fold}:{score}")
    score_list.append(score)
    importance.append(cat.get_feature_importance())
    fold+=1

print("cv mean score:", np.mean(score_list))


(47282, 44) (3380, 44)
Learning rate set to 0.093287
0:	learn: 3888.6760498	test: 6571.3726175	best: 6571.3726175 (0)	total: 140ms	remaining: 2m 19s
50:	learn: 1184.6427972	test: 2614.1504299	best: 2611.4750438 (45)	total: 558ms	remaining: 10.4s
100:	learn: 992.9487338	test: 2392.7930576	best: 2392.7930576 (100)	total: 934ms	remaining: 8.31s
150:	learn: 873.4903469	test: 2389.3766177	best: 2343.4700202 (121)	total: 1.33s	remaining: 7.47s
200:	learn: 791.9224246	test: 2412.9272787	best: 2343.4700202 (121)	total: 1.72s	remaining: 6.84s
250:	learn: 733.0522206	test: 2441.4678888	best: 2343.4700202 (121)	total: 2.1s	remaining: 6.27s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 2343.47002
bestIteration = 121

Shrink model to first 122 iterations.
MAE FOLD--1:2343.4700039171635
(50662, 44) (3380, 44)
Learning rate set to 0.094304
0:	learn: 4068.1567559	test: 5253.1272474	best: 5253.1272474 (0)	total: 9.53ms	remaining: 9.52s
50:	learn: 1225.7182150	test: 1359.9086816	bes

In [27]:
forecasted=pd.DataFrame(unseen_preds[2],columns=["forecasting"]).set_index(forecast_df.index)

In [28]:
forecasted

Unnamed: 0_level_0,forecasting
date,Unnamed: 1_level_1
2022-07-28 21:00:00,46127.645095
2022-07-28 22:00:00,45224.810968
2022-07-28 23:00:00,43639.29218
2022-07-29 00:00:00,41236.07087
2022-07-29 01:00:00,39854.216027
2022-07-29 02:00:00,38891.441119
2022-07-29 03:00:00,37248.415031
2022-07-29 04:00:00,36880.636628
2022-07-29 05:00:00,35918.864575
2022-07-29 06:00:00,35392.611641


In [29]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_fe.date.iloc[-fh_new*5:],y=df_fe.consumption.iloc[-fh_new*5:],name='Tarihsel Veri',mode='lines'))
fig1.add_trace(go.Scatter(x=forecasted.index,y=forecasted['forecasting'],name='Öngörü',mode='lines'))

In [30]:
f_importance = pd.concat([pd.Series(X.columns.to_list(),name='Feature'),pd.Series(importance[2],name="Importance")],axis=1).sort_values(by='Importance',ascending=True)


In [31]:
import plotly.express as px
fig2 = px.bar(f_importance.tail(20),x='Importance',y='Feature')
fig2.show()