In [1]:
import pandas as pd
import numpy as np
import datetime
import requests
import json
import plotly.graph_objects as go
import plotly.express as px
import warnings

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [2]:
start_date = "2016-01-01"
end_date = datetime.date.today()

In [3]:
url="https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate="+f'{start_date}'+"&endDate="+f'{end_date}'

In [4]:
response = requests.get(url, verify=False)
json_data = json.loads(response.text.encode("utf8"))
df = pd.DataFrame(json_data["body"]["hourlyConsumptions"]).iloc[:-1]
df["date"] = pd.to_datetime(df.date.str[:16])

In [5]:
df

Unnamed: 0,date,consumption
0,2016-01-01 00:00:00,26277.24
1,2016-01-01 01:00:00,24991.82
2,2016-01-01 02:00:00,23532.61
3,2016-01-01 03:00:00,22464.78
4,2016-01-01 04:00:00,22002.91
...,...,...
57620,2022-07-28 20:00:00,47063.07
57621,2022-07-28 21:00:00,46714.17
57622,2022-07-28 22:00:00,45874.93
57623,2022-07-28 23:00:00,43876.79


In [6]:
fh_new = 24*7+1
date = pd.date_range(start=df.date.tail(1).iloc[0],periods=fh_new,freq="H", name = "date")
date = pd.DataFrame(date)
df_fe = pd.merge(df, date, how= "outer")

In [7]:
fh_new

169

In [8]:
date.head()

Unnamed: 0,date
0,2022-07-29 00:00:00
1,2022-07-29 01:00:00
2,2022-07-29 02:00:00
3,2022-07-29 03:00:00
4,2022-07-29 04:00:00


In [9]:
df_fe.tail(169)

Unnamed: 0,date,consumption
57624,2022-07-29 00:00:00,36422.9
57625,2022-07-29 01:00:00,
57626,2022-07-29 02:00:00,
57627,2022-07-29 03:00:00,
57628,2022-07-29 04:00:00,
57629,2022-07-29 05:00:00,
57630,2022-07-29 06:00:00,
57631,2022-07-29 07:00:00,
57632,2022-07-29 08:00:00,
57633,2022-07-29 09:00:00,


In [10]:
def rolling_features(df,fh):
    df_c=df.copy()
    rolling_windows=[fh,fh+3,fh+10,fh+15,fh+20,fh+25]
    lags=[fh,fh+5,fh+10,fh+15,fh+20,fh+30]
    for a in rolling_windows:
        df_c['rolling_mean_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).mean().shift(1)
        df_c['rolling_std_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).std().shift(1)
        df_c['rolling_min_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).min().shift(1)
        df_c['rolling_max_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).max().shift(1)
        df_c['rolling_var_'+str(a)]=df_c['consumption'].rolling(a,min_periods=1).var().shift(1)
    for l in lags:
        df_c['consumption_lag_'+str(l)]=df_c['consumption'].shift(l)
    return(df_c)


In [11]:
df_fe = rolling_features(df_fe, fh_new)

In [12]:
df_fe

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2016-01-01 01:00:00,24991.82,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,26277.240000,,26277.24,26277.24,,,,,,,
2,2016-01-01 02:00:00,23532.61,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,25634.530000,908.929199,24991.82,26277.24,8.261523e+05,,,,,,
3,2016-01-01 03:00:00,22464.78,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,24933.890000,1373.231726,23532.61,26277.24,1.885765e+06,,,,,,
4,2016-01-01 04:00:00,22002.91,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,24316.612500,1667.723887,22464.78,26277.24,2.781303e+06,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57788,2022-08-04 20:00:00,,44201.633333,3975.690664,36422.90,47063.07,1.580612e+07,45479.891111,3772.488160,36422.90,49568.34,1.423167e+07,47088.155000,3462.205297,36422.90,50949.57,1.198687e+07,44883.661905,5242.972974,35257.31,50949.57,2.748877e+07,44018.105385,5129.019598,35257.31,50949.57,2.630684e+07,44360.428387,4752.883706,35257.31,50949.57,2.258990e+07,45257.94,50949.57,46720.22,37001.16,43867.43,49381.88
57789,2022-08-04 21:00:00,,43990.372000,4407.143087,36422.90,47063.07,1.942291e+07,44968.835000,3684.882130,36422.90,48242.95,1.357836e+07,47112.684000,3582.283605,36422.90,50949.57,1.283276e+07,45277.787000,5049.933736,35257.31,50949.57,2.550183e+07,44024.132400,5234.689738,35257.31,50949.57,2.740198e+07,44300.936667,4822.381689,35257.31,50949.57,2.325537e+07,47063.07,50260.35,48097.48,35958.79,41877.29,50675.15
57790,2022-08-04 22:00:00,,43222.197500,4686.498319,36422.90,46714.17,2.196327e+07,44501.104286,3714.773681,36422.90,47063.07,1.379954e+07,47042.341429,3706.745258,36422.90,50949.57,1.373996e+07,45768.260526,4673.330703,35257.31,50949.57,2.184002e+07,44113.584167,5327.722691,35257.31,50949.57,2.838463e+07,44258.624828,4902.069664,35257.31,50949.57,2.403029e+07,46714.17,49568.34,49692.52,35257.31,40096.11,50121.34
57791,2022-08-04 23:00:00,,42058.206667,4981.531068,36422.90,45874.93,2.481565e+07,44201.633333,3975.690664,36422.90,47063.07,1.580612e+07,46838.481538,3775.532785,36422.90,50949.57,1.425465e+07,46352.202222,4032.970692,36422.90,50949.57,1.626485e+07,44288.256957,5376.737041,35257.31,50949.57,2.890930e+07,44168.640357,4967.573017,35257.31,50949.57,2.467678e+07,45874.93,48242.95,48492.19,37406.37,38591.42,49485.83


In [13]:
def date_features(df) :
    df_c = df.copy()
    df_c["month"] = df_c["date"].dt.month
    df_c["year"] = df_c["date"].dt.year
    df_c["hour"] = df_c["date"].dt.hour
    df_c["quarter"] = df_c["date"].dt.quarter
    df_c["dayofweek"] = df_c["date"].dt.dayofweek
    df_c["dayofyear"] = df_c["date"].dt.dayofyear
    df_c["dayofmounth"] = df_c["date"].dt.day
    df_c["weekofyear"] = df_c["date"].dt.weekofyear
    return(df_c)


In [14]:
df_fe = date_features(df_fe)

In [15]:
df_fe.head()

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
0,2016-01-01 00:00:00,26277.24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2016,0,1,4,1,1,53
1,2016-01-01 01:00:00,24991.82,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,26277.24,,26277.24,26277.24,,,,,,,,1,2016,1,1,4,1,1,53
2,2016-01-01 02:00:00,23532.61,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,25634.53,908.929199,24991.82,26277.24,826152.3,,,,,,,1,2016,2,1,4,1,1,53
3,2016-01-01 03:00:00,22464.78,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,24933.89,1373.231726,23532.61,26277.24,1885765.0,,,,,,,1,2016,3,1,4,1,1,53
4,2016-01-01 04:00:00,22002.91,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,24316.6125,1667.723887,22464.78,26277.24,2781303.0,,,,,,,1,2016,4,1,4,1,1,53


In [16]:
df_fe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 57793 entries, 0 to 57792
Data columns (total 46 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   date                 57793 non-null  datetime64[ns]
 1   consumption          57625 non-null  float64       
 2   rolling_mean_169     57792 non-null  float64       
 3   rolling_std_169      57791 non-null  float64       
 4   rolling_min_169      57792 non-null  float64       
 5   rolling_max_169      57792 non-null  float64       
 6   rolling_var_169      57791 non-null  float64       
 7   rolling_mean_172     57792 non-null  float64       
 8   rolling_std_172      57791 non-null  float64       
 9   rolling_min_172      57792 non-null  float64       
 10  rolling_max_172      57792 non-null  float64       
 11  rolling_var_172      57791 non-null  float64       
 12  rolling_mean_179     57792 non-null  float64       
 13  rolling_std_179      57791 non-

In [17]:
fh_new

169

In [18]:
df_fe[fh_new+30:]

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
199,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,2.256508e+07,31590.084535,4845.146793,22870.89,39760.12,2.347545e+07,31425.355531,4840.827773,22870.89,39760.12,2.343361e+07,31410.212500,4777.354494,22870.89,39760.12,2.282312e+07,31340.303545,4734.432316,22870.89,39760.12,2.241485e+07,31121.748196,4867.195894,21844.16,39760.12,2.368960e+07,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
200,2016-01-09 08:00:00,30627.32,31761.095680,4723.648507,23405.11,39760.12,2.231286e+07,31612.945756,4813.176124,22870.89,39760.12,2.316666e+07,31406.848715,4851.988398,22870.89,39760.12,2.354179e+07,31396.421902,4786.868377,22870.89,39760.12,2.291411e+07,31338.337725,4736.226964,22870.89,39760.12,2.243185e+07,31146.987062,4831.993928,21844.16,39760.12,2.334817e+07,24022.70,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
201,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,2.196253e+07,31658.041279,4766.904516,23325.63,39760.12,2.272338e+07,31413.362905,4850.144040,22870.89,39760.12,2.352390e+07,31395.836848,4786.956240,22870.89,39760.12,2.291495e+07,31347.434444,4733.184446,22870.89,39760.12,2.240303e+07,31190.408608,4788.866050,21844.16,39760.12,2.293324e+07,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
202,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,2.183643e+07,31717.010000,4725.783101,23405.11,39760.12,2.233303e+07,31436.968603,4849.803255,22870.89,39760.12,2.352059e+07,31403.555489,4789.158093,22870.89,39760.12,2.293604e+07,31372.191058,4731.969302,22870.89,39760.12,2.239153e+07,31250.326598,4743.828632,22870.89,39760.12,2.250391e+07,30043.60,22870.89,28069.09,31438.11,29367.70,22464.78,1,2016,10,1,5,9,9,1
203,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,2.186839e+07,31782.055698,4690.338090,23405.11,39760.12,2.199927e+07,31474.531453,4849.610207,22870.89,39760.12,2.351872e+07,31421.787717,4795.671625,22870.89,39760.12,2.299847e+07,31400.895503,4736.197990,22870.89,39760.12,2.243157e+07,31310.626134,4713.878145,22870.89,39760.12,2.222065e+07,32102.38,23325.63,26224.60,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57788,2022-08-04 20:00:00,,44201.633333,3975.690664,36422.90,47063.07,1.580612e+07,45479.891111,3772.488160,36422.90,49568.34,1.423167e+07,47088.155000,3462.205297,36422.90,50949.57,1.198687e+07,44883.661905,5242.972974,35257.31,50949.57,2.748877e+07,44018.105385,5129.019598,35257.31,50949.57,2.630684e+07,44360.428387,4752.883706,35257.31,50949.57,2.258990e+07,45257.94,50949.57,46720.22,37001.16,43867.43,49381.88,8,2022,20,3,3,216,4,31
57789,2022-08-04 21:00:00,,43990.372000,4407.143087,36422.90,47063.07,1.942291e+07,44968.835000,3684.882130,36422.90,48242.95,1.357836e+07,47112.684000,3582.283605,36422.90,50949.57,1.283276e+07,45277.787000,5049.933736,35257.31,50949.57,2.550183e+07,44024.132400,5234.689738,35257.31,50949.57,2.740198e+07,44300.936667,4822.381689,35257.31,50949.57,2.325537e+07,47063.07,50260.35,48097.48,35958.79,41877.29,50675.15,8,2022,21,3,3,216,4,31
57790,2022-08-04 22:00:00,,43222.197500,4686.498319,36422.90,46714.17,2.196327e+07,44501.104286,3714.773681,36422.90,47063.07,1.379954e+07,47042.341429,3706.745258,36422.90,50949.57,1.373996e+07,45768.260526,4673.330703,35257.31,50949.57,2.184002e+07,44113.584167,5327.722691,35257.31,50949.57,2.838463e+07,44258.624828,4902.069664,35257.31,50949.57,2.403029e+07,46714.17,49568.34,49692.52,35257.31,40096.11,50121.34,8,2022,22,3,3,216,4,31
57791,2022-08-04 23:00:00,,42058.206667,4981.531068,36422.90,45874.93,2.481565e+07,44201.633333,3975.690664,36422.90,47063.07,1.580612e+07,46838.481538,3775.532785,36422.90,50949.57,1.425465e+07,46352.202222,4032.970692,36422.90,50949.57,1.626485e+07,44288.256957,5376.737041,35257.31,50949.57,2.890930e+07,44168.640357,4967.573017,35257.31,50949.57,2.467678e+07,45874.93,48242.95,48492.19,37406.37,38591.42,49485.83,8,2022,23,3,3,216,4,31


In [19]:
df_fe = df_fe[fh_new+30:].reset_index(drop=True)

In [20]:
df_fe.isnull().sum()

date                     0
consumption            168
rolling_mean_169         0
rolling_std_169          0
rolling_min_169          0
rolling_max_169          0
rolling_var_169          0
rolling_mean_172         0
rolling_std_172          0
rolling_min_172          0
rolling_max_172          0
rolling_var_172          0
rolling_mean_179         0
rolling_std_179          0
rolling_min_179          0
rolling_max_179          0
rolling_var_179          0
rolling_mean_184         0
rolling_std_184          0
rolling_min_184          0
rolling_max_184          0
rolling_var_184          0
rolling_mean_189         0
rolling_std_189          0
rolling_min_189          0
rolling_max_189          0
rolling_var_189          0
rolling_mean_194         0
rolling_std_194          0
rolling_min_194          0
rolling_max_194          0
rolling_var_194          0
consumption_lag_169      0
consumption_lag_174      0
consumption_lag_179      0
consumption_lag_184      0
consumption_lag_189      0
c

In [21]:
df_fe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57594 entries, 0 to 57593
Data columns (total 46 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   date                 57594 non-null  datetime64[ns]
 1   consumption          57426 non-null  float64       
 2   rolling_mean_169     57594 non-null  float64       
 3   rolling_std_169      57594 non-null  float64       
 4   rolling_min_169      57594 non-null  float64       
 5   rolling_max_169      57594 non-null  float64       
 6   rolling_var_169      57594 non-null  float64       
 7   rolling_mean_172     57594 non-null  float64       
 8   rolling_std_172      57594 non-null  float64       
 9   rolling_min_172      57594 non-null  float64       
 10  rolling_max_172      57594 non-null  float64       
 11  rolling_var_172      57594 non-null  float64       
 12  rolling_mean_179     57594 non-null  float64       
 13  rolling_std_179      57594 non-

In [22]:
df_fe.head()

Unnamed: 0,date,consumption,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
0,2016-01-09 07:00:00,26853.42,31741.874142,4750.272034,23405.11,39760.12,22565080.0,31590.084535,4845.146793,22870.89,39760.12,23475450.0,31425.355531,4840.827773,22870.89,39760.12,23433610.0,31410.2125,4777.354494,22870.89,39760.12,22823120.0,31340.303545,4734.432316,22870.89,39760.12,22414850.0,31121.748196,4867.195894,21844.16,39760.12,23689600.0,23604.98,24708.58,30166.14,29390.89,27224.96,26277.24,1,2016,7,1,5,9,9,1
1,2016-01-09 08:00:00,30627.32,31761.09568,4723.648507,23405.11,39760.12,22312860.0,31612.945756,4813.176124,22870.89,39760.12,23166660.0,31406.848715,4851.988398,22870.89,39760.12,23541790.0,31396.421902,4786.868377,22870.89,39760.12,22914110.0,31338.337725,4736.226964,22870.89,39760.12,22431850.0,31146.987062,4831.993928,21844.16,39760.12,23348170.0,24022.7,23771.58,29461.28,30734.97,28908.04,24991.82,1,2016,8,1,5,9,9,1
2,2016-01-09 09:00:00,33468.25,31800.176272,4686.419236,23405.11,39760.12,21962530.0,31658.041279,4766.904516,23325.63,39760.12,22723380.0,31413.362905,4850.14404,22870.89,39760.12,23523900.0,31395.836848,4786.95624,22870.89,39760.12,22914950.0,31347.434444,4733.184446,22870.89,39760.12,22403030.0,31190.408608,4788.86605,21844.16,39760.12,22933240.0,26930.48,22921.29,29242.83,32048.02,28789.25,23532.61,1,2016,9,1,5,9,9,1
3,2016-01-09 10:00:00,34792.84,31838.861302,4672.946364,23405.11,39760.12,21836430.0,31717.01,4725.783101,23405.11,39760.12,22333030.0,31436.968603,4849.803255,22870.89,39760.12,23520590.0,31403.555489,4789.158093,22870.89,39760.12,22936040.0,31372.191058,4731.969302,22870.89,39760.12,22391530.0,31250.326598,4743.828632,22870.89,39760.12,22503910.0,30043.6,22870.89,28069.09,31438.11,29367.7,22464.78,1,2016,10,1,5,9,9,1
4,2016-01-09 11:00:00,35382.85,31866.963314,4676.364978,23405.11,39760.12,21868390.0,31782.055698,4690.33809,23405.11,39760.12,21999270.0,31474.531453,4849.610207,22870.89,39760.12,23518720.0,31421.787717,4795.671625,22870.89,39760.12,22998470.0,31400.895503,4736.19799,22870.89,39760.12,22431570.0,31310.626134,4713.878145,22870.89,39760.12,22220650.0,32102.38,23325.63,26224.6,30728.47,29548.32,22002.91,1,2016,11,1,5,9,9,1


In [23]:
split_date = df_fe.date.tail(fh_new).iloc[0]
historical = df_fe.loc[df_fe.date <= split_date]
y = historical[["date", "consumption"]].set_index("date")
X = historical.drop("consumption", axis=1).set_index("date")
forecast_df = df_fe.loc[df_fe.date > split_date].set_index("date").drop("consumption", axis=1)

In [24]:
forecast_df.head()

Unnamed: 0_level_0,rolling_mean_169,rolling_std_169,rolling_min_169,rolling_max_169,rolling_var_169,rolling_mean_172,rolling_std_172,rolling_min_172,rolling_max_172,rolling_var_172,rolling_mean_179,rolling_std_179,rolling_min_179,rolling_max_179,rolling_var_179,rolling_mean_184,rolling_std_184,rolling_min_184,rolling_max_184,rolling_var_184,rolling_mean_189,rolling_std_189,rolling_min_189,rolling_max_189,rolling_var_189,rolling_mean_194,rolling_std_194,rolling_min_194,rolling_max_194,rolling_var_194,consumption_lag_169,consumption_lag_174,consumption_lag_179,consumption_lag_184,consumption_lag_189,consumption_lag_199,month,year,hour,quarter,dayofweek,dayofyear,dayofmounth,weekofyear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2022-07-29 01:00:00,42099.393491,5284.168044,31035.25,50949.57,27922430.0,42129.758488,5244.419428,31035.25,50949.57,27503940.0,42291.263184,5215.363165,31035.25,50949.57,27200010.0,42407.875,5192.863315,31035.25,50949.57,26965830.0,42256.091958,5228.283953,31035.25,50949.57,27334950.0,42173.034742,5197.413877,31035.25,50949.57,27013110.0,40683.37,43703.76,48668.23,45292.82,35703.41,44081.07,7,2022,1,3,4,210,29,30
2022-07-29 02:00:00,42107.822202,5298.825711,31035.25,50949.57,28077550.0,42112.470351,5254.903737,31035.25,50949.57,27614010.0,42255.437528,5207.940557,31035.25,50949.57,27122640.0,42392.110273,5202.692875,31035.25,50949.57,27068010.0,42290.946649,5220.181502,31035.25,50949.57,27250290.0,42172.181347,5210.917603,31035.25,50949.57,27153660.0,39223.69,45010.1,48173.87,46284.45,34597.54,43612.42,7,2022,2,3,4,210,29,30
2022-07-29 03:00:00,42125.092455,5310.017514,31035.25,50949.57,28196290.0,42101.931706,5268.615156,31035.25,50949.57,27758310.0,42222.000056,5203.518476,31035.25,50949.57,27076600.0,42370.723791,5208.972731,31035.25,50949.57,27133400.0,42332.087861,5203.545176,31035.25,50949.57,27076880.0,42180.614687,5223.22025,31035.25,50949.57,27282030.0,37739.01,45086.03,47663.85,47498.88,34117.04,44719.5,7,2022,3,3,4,210,29,30
2022-07-29 04:00:00,42151.514639,5315.061428,31035.25,50949.57,28249880.0,42099.393491,5284.168044,31035.25,50949.57,27922430.0,42191.080455,5202.032794,31035.25,50949.57,27061150.0,42342.391436,5209.340736,31035.25,50949.57,27137230.0,42376.254785,5182.324576,31035.25,50949.57,26856490.0,42198.920838,5230.768458,31035.25,50949.57,27360940.0,36225.51,43904.04,46377.88,46305.11,36586.72,44593.54,7,2022,4,3,4,210,29,30
2022-07-29 05:00:00,42187.429818,5310.998426,31035.25,50949.57,28206700.0,42107.822202,5298.825711,31035.25,50949.57,28077550.0,42167.155886,5207.240179,31035.25,50949.57,27115350.0,42320.376333,5215.421473,31035.25,50949.57,27200620.0,42407.549568,5178.73494,31035.25,50949.57,26819300.0,42224.445211,5232.648647,31035.25,50949.57,27380610.0,35731.49,42530.89,44219.96,47531.63,42347.67,43664.77,7,2022,5,3,4,210,29,30


In [25]:
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from catboost import CatBoostRegressor

In [26]:
tscv = TimeSeriesSplit(n_splits=3, test_size=fh_new*20)
score_list=[]
fold=1
unseen_preds=[]
importance=[]

for train_index, test_index in tscv.split(X,y):
    X_train,X_val = X.iloc[train_index],X.iloc[test_index]
    y_train,y_val = y.iloc[train_index],y.iloc[test_index]
    print(X_train.shape,X_val.shape)

    cat = CatBoostRegressor(iterations=1000, eval_metric="MAE", allow_writing_files=False)
    cat.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=150, verbose=50)

    forecast_predict = cat.predict(forecast_df)
    unseen_preds.append(forecast_predict)
    score = mean_absolute_error(y_val, cat.predict(X_val))
    print(f"MAE FOLD--{fold}:{score}")
    score_list.append(score)
    importance.append(cat.get_feature_importance())
    fold+=1

print("cv mean score:", np.mean(score_list))


(47286, 44) (3380, 44)
Learning rate set to 0.093289
0:	learn: 3889.9571321	test: 6562.1577373	best: 6562.1577373 (0)	total: 165ms	remaining: 2m 44s
50:	learn: 1181.6559065	test: 2625.8738045	best: 2613.7968298 (49)	total: 1.04s	remaining: 19.3s
100:	learn: 984.3480120	test: 2439.2301342	best: 2435.5624397 (94)	total: 1.82s	remaining: 16.2s
150:	learn: 868.1567726	test: 2436.7045765	best: 2407.6326543 (131)	total: 2.66s	remaining: 14.9s
200:	learn: 791.2483387	test: 2490.4754372	best: 2407.6326543 (131)	total: 3.56s	remaining: 14.2s
250:	learn: 733.8948296	test: 2527.1379236	best: 2407.6326543 (131)	total: 4.36s	remaining: 13s
Stopped by overfitting detector  (150 iterations wait)

bestTest = 2407.632654
bestIteration = 131

Shrink model to first 132 iterations.
MAE FOLD--1:2407.63264744427
(50666, 44) (3380, 44)
Learning rate set to 0.094305
0:	learn: 4068.0798779	test: 5251.6608022	best: 5251.6608022 (0)	total: 30.5ms	remaining: 30.5s
50:	learn: 1227.7543155	test: 1325.5546404	best: 

In [27]:
forecasted=pd.DataFrame(unseen_preds[2],columns=["forecasting"]).set_index(forecast_df.index)

In [28]:
forecasted.head()

Unnamed: 0_level_0,forecasting
date,Unnamed: 1_level_1
2022-07-29 01:00:00,39923.157904
2022-07-29 02:00:00,38617.507242
2022-07-29 03:00:00,37016.168934
2022-07-29 04:00:00,37129.52875
2022-07-29 05:00:00,36200.26578


In [29]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_fe.date.iloc[-fh_new*5:],y=df_fe.consumption.iloc[-fh_new*5:],name='Tarihsel Veri',mode='lines'))
fig1.add_trace(go.Scatter(x=forecasted.index,y=forecasted['forecasting'],name='Öngörü',mode='lines'))

In [30]:
f_importance = pd.concat([pd.Series(X.columns.to_list(),name='Feature'),pd.Series(importance[2],name="Importance")],axis=1).sort_values(by='Importance',ascending=True)


In [31]:
import plotly.express as px
fig2 = px.bar(f_importance.tail(20),x='Importance',y='Feature')
fig2.show()