# Jobathon Nov 2022 Time series Model

## Table of Contents
### 1. [Read Train and Test ](#read)
### 2. [Feature Generation](#feature)
### 3. [Train and Validation Split](#split)
### 4. [Model Evaluation using Facebook Prophet](#model_eval_fbprophet)
### 4. [Model Evaluation using Thyme Boost](#model_eval_thyme)
### 4. [Model Evaluation using Unobserved Components](#model_eval)
### 5. [Model Finalization for Test Prediction](#model_final)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
pd.options.display.max_columns=500
pd.options.display.max_rows=500

In [3]:
from pandas.tseries.holiday import *

In [4]:
# !pip install pmdarima

In [5]:
!pip install ThymeBoost



In [6]:
KAGGLE=False

In [7]:
if KAGGLE:
    path = '/kaggle/input/jobathon-nov-2022/'
else:
    path = 'input/'

<a id='read'></a>
## Read Train and Test Data

In [8]:
train=pd.read_csv(path+'train.csv')
print(train.shape)

(94992, 3)


In [9]:
test=pd.read_csv(path+'test.csv')
print(test.shape)

(26304, 2)


In [10]:
train.head()

Unnamed: 0,row_id,datetime,energy
0,1,2008-03-01 00:00:00,1259.985563
1,2,2008-03-01 01:00:00,1095.5415
2,3,2008-03-01 02:00:00,1056.2475
3,4,2008-03-01 03:00:00,1034.742
4,5,2008-03-01 04:00:00,1026.3345


In [11]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94992 entries, 0 to 94991
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   row_id    94992 non-null  int64  
 1   datetime  94992 non-null  object 
 2   energy    93092 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.2+ MB


In [12]:
train['energy'].describe()
targetcol='energy'

In [13]:
train['datetime']=pd.to_datetime(train['datetime'],infer_datetime_format=True)
test['datetime']=pd.to_datetime(test['datetime'],infer_datetime_format=True)

In [14]:
train['datetime'].min(),train['datetime'].max()

(Timestamp('2008-03-01 00:00:00'), Timestamp('2018-12-31 23:00:00'))

<a id='feature'></a>
## Feature Generation

In [15]:
#create hour map based on business, non - business , sleeping hours etc.
hour_map={0:0,1:0,2:0,3:0,4:0,5:0,
         6:1,7:1,8:1,
         9:2,10:2,11:2,
         12:3,13:3,14:3,15:3,
         16:4,17:4,
         18:5,19:5,20:5,
         21:6,22:6,23:6}
 
#Monday and Sunday as group 1, Saturday as group 2, Otherdays as group 3
dayofweek_map = {0:1,6:1,
                5:2,
                1:3,2:3,3:3,4:3}

create basic date related features

In [16]:
def gen_datefeats(data):
    data['year']=data['datetime'].dt.year
    data['month']=data['datetime'].dt.month
    data['day']=data['datetime'].dt.day
    data['hour']=data['datetime'].dt.hour
    data['weekofyear']=data['datetime'].dt.isocalendar().week
    data['dayofweek']=data['datetime'].dt.dayofweek
    data['dayofweek_grp']=data['dayofweek'].replace(dayofweek_map)
    data['quarter']=data['datetime'].dt.quarter
    data['is_weekend']=data['datetime'].dt.dayofweek > 4
    data['day_part']=data['hour'].replace(hour_map)

In [17]:
gen_datefeats(train)
gen_datefeats(test)

create holiday features with special holiday denoting christmas long holidays

In [18]:
#generate holidays feature
def gen_holiday_feat(data,start,end):
    cal = USFederalHolidayCalendar()
    holiday_dates = cal.holidays(start=start, end=end)
    data['is_holiday'] = False
    mask = data['datetime'].dt.date.astype('datetime64').isin(holiday_dates)
    data.loc[mask,'is_holiday']=True   
    
    data['special_holiday']=False
    mask= ((data['datetime'].dt.month==12) & (data['datetime'].dt.day>=24))  \
           | ((data['datetime'].dt.month==1) & (data['datetime'].dt.day<3)) 
    data.loc[mask,'special_holiday']=True   
        
    return holiday_dates

In [19]:
holidays= gen_holiday_feat(train,train['datetime'].dt.date.min(),train['datetime'].dt.date.max())
print(holidays)
print(train['special_holiday'].value_counts())
train['is_holiday'].value_counts()

DatetimeIndex(['2008-05-26', '2008-07-04', '2008-09-01', '2008-10-13',
               '2008-11-11', '2008-11-27', '2008-12-25', '2009-01-01',
               '2009-01-19', '2009-02-16',
               ...
               '2018-01-01', '2018-01-15', '2018-02-19', '2018-05-28',
               '2018-07-04', '2018-09-03', '2018-10-08', '2018-11-12',
               '2018-11-22', '2018-12-25'],
              dtype='datetime64[ns]', length=107, freq=None)
False    92400
True      2592
Name: special_holiday, dtype: int64


False    92424
True      2568
Name: is_holiday, dtype: int64

In [20]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False


generate hour aggregate features month-wise, quarter-wise, week of year-wise and week day group wise

In [21]:
def gen_datetime_comb_feats(data):
    data['dayofweek_hr']=data['dayofweek'].astype('str') + '_'+data['hour'].astype('str')
    data['weekofyear_hr']=data['weekofyear'].astype('str') + '_'+data['hour'].astype('str')
    data['month_hr']=data['month'].astype('str') + '_'+data['hour'].astype('str')
    data['quarter_hr']=data['quarter'].astype('str') + '_'+data['hour'].astype('str')


def gen_mean_feats(train,test,cols,newcolname):
    grouped=train.groupby(cols)[targetcol].mean().reset_index()
    grouped.columns=cols+[newcolname]
    train=train.merge(grouped,on=cols)    
    test=test.merge(grouped,on=cols)   
    return train,test
    
def gen_mean_feats_all(train,test):
    train,test=gen_mean_feats(train,test,['month','hour'],'month_hour_mean')
    train,test=gen_mean_feats(train,test,['quarter','hour'],'quarter_hour_mean')
    train,test=gen_mean_feats(train,test,['weekofyear','hour'],'weekofyear_mean')
    train,test=gen_mean_feats(train,test,['dayofweek_grp','hour'],'dayofweek_grp_mean')  
    
    train.sort_values('datetime',inplace=True)
    train.reset_index(drop=True,inplace=True)
    test.sort_values('datetime',inplace=True)
    test.reset_index(drop=True,inplace=True)
    return train,test

In [22]:
train,test=gen_mean_feats_all(train,test)

In [23]:
gen_datetime_comb_feats(train)
gen_datetime_comb_feats(test)

In [24]:
train.head(10)

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4
5,6,2008-03-01 05:00:00,1033.7685,2008,3,1,5,9,5,2,1,True,0,False,False,1275.482187,1305.534563,1230.498917,1493.890931,5_5,9_5,3_5,1_5
6,7,2008-03-01 06:00:00,1086.78,2008,3,1,6,9,5,2,1,True,1,False,False,1337.872697,1358.662339,1295.098037,1566.537865,5_6,9_6,3_6,1_6
7,8,2008-03-01 07:00:00,1211.742,2008,3,1,7,9,5,2,1,True,1,False,False,1464.527791,1470.300187,1412.6619,1697.378282,5_7,9_7,3_7,1_7
8,9,2008-03-01 08:00:00,1293.693,2008,3,1,8,9,5,2,1,True,1,False,False,1556.391201,1543.597804,1493.338281,1781.379368,5_8,9_8,3_8,1_8
9,10,2008-03-01 09:00:00,1318.9155,2008,3,1,9,9,5,2,1,True,2,False,False,1577.528044,1589.814926,1517.786396,1804.679326,5_9,9_9,3_9,1_9


In [25]:
test.head()

Unnamed: 0,row_id,datetime,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,94993,2019-01-01 00:00:00,2019,1,1,0,1,1,3,1,False,0,1661.002682,1504.829986,1711.103516,1625.815769,1_0,1_0,1_0,1_0
1,94994,2019-01-01 01:00:00,2019,1,1,1,1,1,3,1,False,0,1543.596544,1412.376876,1592.733254,1543.548695,1_1,1_1,1_1,1_1
2,94995,2019-01-01 02:00:00,2019,1,1,2,1,1,3,1,False,0,1462.390875,1352.371501,1506.964956,1493.577402,1_2,1_2,1_2,1_2
3,94996,2019-01-01 03:00:00,2019,1,1,3,1,1,3,1,False,0,1410.076914,1315.962924,1439.720612,1465.82546,1_3,1_3,1_3,1_3
4,94997,2019-01-01 04:00:00,2019,1,1,4,1,1,3,1,False,0,1378.289974,1298.825422,1415.453093,1459.997669,1_4,1_4,1_4,1_4


In [26]:
train['year'].unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018])

In [27]:
test['datetime'].min(),test['datetime'].max()

(Timestamp('2019-01-01 00:00:00'), Timestamp('2021-12-31 23:00:00'))

In [28]:
holidays= gen_holiday_feat(test,test['datetime'].dt.date.min(),test['datetime'].dt.date.max())
print(len(holidays))
print(holidays)
print(test['special_holiday'].value_counts())
test['is_holiday'].value_counts()

31
DatetimeIndex(['2019-01-01', '2019-01-21', '2019-02-18', '2019-05-27',
               '2019-07-04', '2019-09-02', '2019-10-14', '2019-11-11',
               '2019-11-28', '2019-12-25', '2020-01-01', '2020-01-20',
               '2020-02-17', '2020-05-25', '2020-07-03', '2020-09-07',
               '2020-10-12', '2020-11-11', '2020-11-26', '2020-12-25',
               '2021-01-01', '2021-01-18', '2021-02-15', '2021-05-31',
               '2021-07-05', '2021-09-06', '2021-10-11', '2021-11-11',
               '2021-11-25', '2021-12-24', '2021-12-31'],
              dtype='datetime64[ns]', freq=None)
False    25584
True       720
Name: special_holiday, dtype: int64


False    25560
True       744
Name: is_holiday, dtype: int64

In [29]:
targetcol = 'energy'

In [30]:
# train[targetcol].fillna(train[targetcol].mean(),inplace=True)
train[targetcol].fillna(method='ffill',inplace=True)

Create Lag Features

In [31]:
def create_lag(data,lagno_list):
    res = pd.DataFrame()
    for i in lagno_list:
        shifted = data.shift(i)
        res=pd.concat([res,shifted],axis=1)

#     res=pd.concat([data.shift(i) for i in lagno_list],axis=1)
    res.columns=[f'lag_{i}' for i in lagno_list]
    return res

In [32]:
test.shape

(26304, 22)

In [33]:
train['istrain']=1
test['istrain']=0
combined = pd.concat([train,test],axis=0) 
#lag 1 year, 3 year, quarter, month,week
lag_df = create_lag(combined[targetcol],[24,168,720,2160,8760,26304])
combined=pd.concat([combined,lag_df],axis=1)
train=combined[combined['istrain']==1]
test=combined[combined['istrain']==0]

del combined,train['istrain'],test['istrain'],test[targetcol]
print(train.shape,test.shape)

(94992, 29) (26304, 28)


In [34]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0,,,,,,
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1,,,,,,
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2,,,,,,
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3,,,,,,
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4,,,,,,


In [35]:
train.columns

Index(['row_id', 'datetime', 'energy', 'year', 'month', 'day', 'hour',
       'weekofyear', 'dayofweek', 'dayofweek_grp', 'quarter', 'is_weekend',
       'day_part', 'is_holiday', 'special_holiday', 'month_hour_mean',
       'quarter_hour_mean', 'weekofyear_mean', 'dayofweek_grp_mean',
       'dayofweek_hr', 'weekofyear_hr', 'month_hr', 'quarter_hr', 'lag_24',
       'lag_168', 'lag_720', 'lag_2160', 'lag_8760', 'lag_26304'],
      dtype='object')

Fill null values in train data using previous hour values

In [36]:
cols = [col for col in train.columns if col.startswith('lag_')]
target_mean = train[targetcol].mean()
for col in cols:
    train[col].fillna(0,inplace=True)

<a id='split'></a>
## Train and Validation Split

Validation Set from 2016 to 2018 <br>
Train Set from 2008 to 2015

In [37]:
import datetime 
train_start = datetime.datetime(year=2008,month=1,day=1,hour=0)
val_start = datetime.datetime(year=2016,month=1,day=1,hour=0)
val_end = datetime.datetime(year=2018,month=12,day=31,hour=23)

X_val= train[(train['datetime']>=val_start) & (train['datetime']<=val_end)].copy()
X_train= train[(train['datetime']>=train_start) & (train['datetime']<val_start)].copy()
print(X_train.shape)
print(X_val.shape)
X_val.head()           

(68688, 29)
(26304, 29)


Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
68688,68689,2016-01-01 00:00:00,2002.247,2016,1,1,0,53,4,3,1,False,0,True,True,1661.002682,1504.829986,1874.91005,1625.815769,4_0,53_0,1_0,1_0,2045.017,2274.753,1983.0616,1878.0918,2287.2788,1479.1658
68689,68690,2016-01-01 01:00:00,1843.387,2016,1,1,1,53,4,3,1,False,0,True,True,1543.596544,1412.376876,1734.62185,1543.548695,4_1,53_1,1_1,1_1,1881.1468,2118.4592,1827.3788,1878.0918,2120.5124,1375.2627
68690,68691,2016-01-01 02:00:00,1735.1178,2016,1,1,2,53,4,3,1,False,0,True,True,1462.390875,1352.371501,1633.81205,1493.577402,4_2,53_2,1_2,1_2,1781.7982,2020.4548,1719.965,1627.704,2015.9704,1307.8261
68691,68692,2016-01-01 03:00:00,1666.197,2016,1,1,3,53,4,3,1,False,0,True,True,1410.076914,1315.962924,1578.1656,1465.82546,4_3,53_3,1_3,1_3,1719.5984,1936.5034,1648.2336,1564.5266,1939.974,1261.5295
68692,68693,2016-01-01 04:00:00,1630.3924,2016,1,1,4,53,4,3,1,False,0,True,True,1378.289974,1298.825422,1542.4391,1459.997669,4_4,53_4,1_4,1_4,1657.1542,1895.8108,1603.264,1541.553,1885.3868,1228.8682


In [38]:
val_target = X_val[targetcol]
print(X_val[targetcol].isnull().sum())

0


Create simple validation prediction baseline using train mean value 

In [39]:
from sklearn.metrics import mean_squared_error

In [40]:
#compute baseline error by predicting train energy mean as the energy for all time
val_preds_baseline = np.full(len(X_val),train[targetcol].mean())

In [41]:
#compute error score on baseline predictions
val_score = mean_squared_error(val_target,val_preds_baseline,squared=False)
print('valid score:',val_score)

valid score: 431.7300102305795


<a id='model_eval_fbprophet'></a>
## Model Evaluation using Facebook Prophet

In [42]:
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
from fbprophet.plot import plot_plotly
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [43]:
cols = ['hour','dayofweek','quarter','quarter_hr','month_hr']
exog_train = pd.get_dummies(train.set_index('datetime')[cols],columns=cols,prefix=cols)

y_train = train.set_index('datetime')[targetcol].copy()

In [44]:
train_df = train.rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
# train_df['y'] = np.log1p(train_df['y'])
train_df['cap'] = 4000
train_df['floor'] = 1000

# train_df = train_df[['ds','y']]

In [63]:
train_df.iloc[10000]

row_id                              10001
ds                    2009-04-21 16:00:00
y                               1431.2214
year                                 2009
month                                   4
day                                    21
hour                                   16
weekofyear                             17
dayofweek                               1
dayofweek_grp                           3
quarter                                 2
is_weekend                          False
day_part                                4
is_holiday                          False
special_holiday                     False
month_hour_mean               1586.411166
quarter_hour_mean             1739.058247
weekofyear_mean               1585.157356
dayofweek_grp_mean            1868.988464
dayofweek_hr                         1_16
weekofyear_hr                       17_16
month_hr                             4_16
quarter_hr                           2_16
lag_24                          11

In [64]:
%%time
# Defining the model
model_for_cv = Prophet(
                        uncertainty_samples=0,
                        growth='logistic',
                        yearly_seasonality=True,weekly_seasonality=True,daily_seasonality=True,
#                         changepoint_range=0.5,
#                        interval_width=0.95
                      )
model_for_cv.fit(train_df[:10000])

Initial log joint probability = -208.891
CPU times: user 1.68 s, sys: 47.5 ms, total: 1.72 s
Wall time: 1.71 s


<fbprophet.forecaster.Prophet at 0x7efc69f80190>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       17633.6   0.000793343       92.4568      0.5393      0.5393      126   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     147       17634.4   0.000499842       119.498   4.502e-06       0.001      217  LS failed, Hessian reset 
     199       17634.6   1.07049e-06       76.4158      0.8151      0.8151      285   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     220       17634.6   9.83134e-05       83.0171   1.065e-06       0.001      346  LS failed, Hessian reset 
     242       17634.6   9.69201e-07       90.1972           1           1      376   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


In [51]:
test_df = test.rename(columns={'datetime': 'ds'})[['ds']]
test_df['cap'] = 4000
test_df['floor'] = 1000

In [52]:
%%time
# Predicting the range
forecast = model_for_cv.predict(test_df)
forecast.tail()
# forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

CPU times: user 60.5 ms, sys: 30.8 ms, total: 91.3 ms
Wall time: 55.9 ms


Unnamed: 0,ds,trend,cap,floor,additive_terms,daily,weekly,yearly,multiplicative_terms,yhat
26299,2021-12-31 19:00:00,3644.972754,4000,1000,358.532401,88.818624,32.116984,237.596792,0.0,4003.505154
26300,2021-12-31 20:00:00,3644.983326,4000,1000,380.999331,119.011343,32.068574,229.919414,0.0,4025.982657
26301,2021-12-31 21:00:00,3644.993897,4000,1000,367.975659,113.703577,32.042661,222.229421,0.0,4012.969557
26302,2021-12-31 22:00:00,3645.004469,4000,1000,316.697142,70.136654,32.033376,214.527112,0.0,3961.701611
26303,2021-12-31 23:00:00,3645.01504,4000,1000,242.798077,3.952142,32.03315,206.812785,0.0,3887.813116


In [71]:
# preds = np.expm1(forecast['yhat'])
# pd.Series(preds).describe()

In [68]:
%%time
cutoffs = [
        pd.Timestamp('2008-12-15 08:00:00'),
        pd.Timestamp('2009-01-25 08:00:00'),
#             pd.Timestamp('2016-01-01 00:00:00'),
        ]
print(cutoffs)
df_cv = cross_validation(model_for_cv,
                         horizon="2400 hours", #26304
                         cutoffs=cutoffs,parallel="processes")

INFO:fbprophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7efc90ca90a0>


[Timestamp('2008-12-15 08:00:00'), Timestamp('2009-01-25 08:00:00')]
Initial log joint probability = -255.393
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -251.456
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       12216.1     0.0600529        740.57      0.1604      0.4406      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       12241.1    0.00389637       927.011           1           1      238   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      82       13927.6   5.12413e-05       101.513    4.85e-07       0.001      154  LS failed, Hessian reset 
      99       13927.6   4.06303e-07       101.102      0.4077           1      177   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     102       13

In [69]:
df_cv.tail()

Unnamed: 0,ds,yhat,y,cutoff
4466,2009-04-21 11:00:00,1318.62286,1502.7516,2009-01-25 08:00:00
4467,2009-04-21 12:00:00,1328.177559,1478.8494,2009-01-25 08:00:00
4468,2009-04-21 13:00:00,1335.842079,1466.0604,2009-01-25 08:00:00
4469,2009-04-21 14:00:00,1341.48504,1450.008,2009-01-25 08:00:00
4470,2009-04-21 15:00:00,1343.061195,1429.3692,2009-01-25 08:00:00


In [70]:
#calculating root mean squared error
# preds = np.expm1(df_cv['yhat'])
# actuals = np.expm1(df_cv['y'])

preds = df_cv['yhat']
actuals = df_cv['y']

rmse_score = mean_squared_error(actuals,preds,squared=False)
print('valid score:',rmse_score)

valid score: 5067.370978493918


In [58]:
print(pd.Series(preds).describe())
print(X_val[targetcol].describe())

count     239.000000
mean     1188.400679
std       129.577484
min       845.870099
25%      1102.059132
50%      1195.139294
75%      1296.118975
max      1429.682700
Name: yhat, dtype: float64
count    26304.000000
mean      1972.675229
std        337.270208
min       1234.220000
25%       1727.597800
50%       1937.152800
75%       2189.560800
max       3374.399600
Name: energy, dtype: float64


In [88]:
from multiprocessing import cpu_count
from joblib import Parallel, delayed
from multiprocessing import cpu_count

In [129]:
from sklearn.model_selection import ParameterGrid
params_grid = {
            'changepoint_prior_scale':[0.1,0.2,0.3,0.4,0.5,0.6],
              'changepoint_range' : [0.5,0.6,0.7,0.8,0.9],
              'n_changepoints' : [50,100]
              }
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)

Total Possible Models 60


In [130]:
grid[0]

{'n_changepoints': 50,
 'changepoint_range': 0.5,
 'changepoint_prior_scale': 0.1}

In [131]:
train.iloc[68688]

row_id                              68689
datetime              2016-01-01 00:00:00
energy                           2002.247
year                                 2016
month                                   1
day                                     1
hour                                    0
weekofyear                             53
dayofweek                               4
dayofweek_grp                           3
quarter                                 1
is_weekend                          False
day_part                                0
is_holiday                           True
special_holiday                      True
month_hour_mean               1661.002682
quarter_hour_mean             1504.829986
weekofyear_mean                1874.91005
dayofweek_grp_mean            1625.815769
dayofweek_hr                          4_0
weekofyear_hr                        53_0
month_hr                              1_0
quarter_hr                            1_0
lag_24                           2

In [133]:
def tune_prophet_params(param):
    val_start   = 68688
    val_period  = 26304
    print(param)
    np.random.seed(0)
    train_model =Prophet(uncertainty_samples=0,
                        changepoint_prior_scale = param['changepoint_prior_scale'],
#                              n_changepoints = param['n_changepoints'],
                         changepoint_range = param['changepoint_range'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)
    train_model.fit(train_df[:val_start])
    future_df = train_model.make_future_dataframe(periods=val_period, freq='H',include_history = False)
#     future_df['cap']=4000
#     future_df['floor']=1000
    train_forecast = train_model.predict(future_df)
    val=train_forecast[['ds','yhat']]
    Actual = train_df[val_start:val_start+val_period]
    RMSE = mean_squared_error(Actual['y'],abs(val['yhat']),squared=False)
    print('RMSE------------------------------------',RMSE)
    model_param = {'RMSE':RMSE}
    model_param.update(param)

    return model_param

In [123]:
%%time
# param={'changepoint_range':0.8, 'changepoint_prior_scale': 0.05}

model_param = tune_prophet_params(grid[0])
model_param

{'changepoint_range': 0.5, 'changepoint_prior_scale': 0.1}
Initial log joint probability = -667.226
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        137698    0.00368729       1481.13           1           1      117   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        137827    0.00183986       727.152           1           1      227   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        137872     0.0376305       1338.71      0.4878           1      338   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        137907     0.0245073        1071.1           1           1      444   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        137934   0.000700129       121.873           1           1      552   
    I

{'RMSE': 357.5171105067675,
 'changepoint_range': 0.5,
 'changepoint_prior_scale': 0.1}

In [134]:
%%time
print("Number of jobs: ",int(cpu_count()))

# Use multiple CPUs (Multi Processing)
model_params = Parallel(n_jobs=int(cpu_count()), prefer='processes')(
    delayed(tune_prophet_params)(param=param) 
    for param in grid)

Number of jobs:  8
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter 

     599        166086    0.00690569       8464.77           1           1      692   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        165357      0.014517       1415.48           1           1      691   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        166086    0.00690569       8464.77           1           1      692   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        165357      0.014517       1415.48           1           1      691   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        166009    0.00619642       9345.42      0.2503     0.02503      702   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        170297     0.0038262       4894.78           1           1      696   
    Iter      log 

    1099        166108    0.00242692       453.923           1           1     1274   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        166108    0.00242692       453.923           1           1     1274   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        165634     0.0087123       859.648           1           1     1344   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        166312     0.0439622       13418.3           1           1     1353   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        165634     0.0087123       859.648           1           1     1344   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        166312     0.0439622       13418.3           1           1     1353   
    Iter      log 

    1699        166180   0.000549122       251.561           1           1     1954   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        171312    0.00215656       779.353           1           1     1951   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        171312    0.00215656       779.353           1           1     1951   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        166180   0.000549122       251.561           1           1     1954   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1799        165744    0.00719322       1981.49           1           1     2029   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1799        166546   0.000732837       1191.32      0.8911      0.8911     2037   
    Iter      log 

    2299        165762     0.0022818       430.878           1           1     2594   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166580    0.00233154       169.502           1           1     2600   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        171348    0.00210531        594.34           1           1     2638   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166211   0.000808182       1265.88      0.2154      0.2154     2639   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        171348    0.00210531        594.34           1           1     2638   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166211   0.000808182       1265.88      0.2154      0.2154     2639   
    Iter      log 

      99        165270    0.00356304       7260.93           1           1      125   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        168946    0.00812042       3870.71           1           1      358   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        168946    0.00812042       3870.71           1           1      358   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2799        166221   0.000524766       189.429           1           1     3216   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2799        166221   0.000524766       189.429           1           1     3216   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        165188    0.00371512       3980.56           1           1      356   
    Iter      log 

     799        165522     0.0090565       1232.73       1.647      0.1647      916   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     799        165522     0.0090565       1232.73       1.647      0.1647      916   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     699        166209     0.0415249       6538.26           1           1      820   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     699        166209     0.0415249       6538.26           1           1      820   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        165959    0.00144529       1147.83           1           1      588   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        169871    0.00131248       3492.99           1           1     1039   
    Iter      log 

    1399        170046    0.00952699        4330.7      0.3028           1     1610   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        170046    0.00952699        4330.7      0.3028           1     1610   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        165680    0.00114684       1460.91           1           1     1602   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        165680    0.00114684       1460.91           1           1     1602   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1299        166553   0.000438278       942.786           1           1     1510   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        166212   0.000588791       650.931      0.8375      0.8375     1255   
    Iter      log 

    1799        166647    0.00274262       1077.68           1           1     2080   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        170145    0.00417813       1399.52           1           1     2280   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        166305   0.000438873       656.499           1           1     1814   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        170145    0.00417813       1399.52           1           1     2280   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        165917     0.0134019         10466      0.2846           1     2269   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        165917     0.0134019         10466      0.2846           1     2269   
    Iter      log 

    2599        170168     0.0038236       1084.33           1           1     2945   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        166355     0.0520781       1650.39           1           1     2503   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2399        166806     0.0185059       2463.92           1           1     2768   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2399        166806     0.0185059       2463.92           1           1     2768   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        170168     0.0038236       1084.33           1           1     2945   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        166355     0.0520781       1650.39           1           1     2503   
    Iter      log 

    2899        166851   0.000117618       360.977       0.743       0.743     3342   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2899        166851   0.000117618       360.977       0.743       0.743     3342   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3099        166020    0.00673876       953.571           1           1     3509   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        169030    0.00375657       3291.87           1           1      349   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3099        166020    0.00673876       953.571           1           1     3509   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2799        166381    0.00018754       62.6846       0.794       0.794     3185   
    Iter      log 

     299        169047    0.00976188       11600.3           1           1      350   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     799        170897     0.0053168       1877.96           1           1      916   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3481        166884   0.000227557       71.9647           1           1     3997   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    3481        166884   0.000227557       71.9647           1           1     3997   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
      99        165006    0.00285199       3486.69           1           1      126   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||

     499        166155    0.00754104       1389.69           1           1      583   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        170115     0.0161642       1434.87      0.7161     0.07161     1037   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        165469   0.000877138       521.239           1           1      701   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        170115     0.0161642       1434.87      0.7161     0.07161     1037   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        171445   0.000486437       684.799           1           1     1584   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        166232    0.00464208        2063.1           1           1      701   
    Iter      log 

    1099        166596      0.123303       3482.04           1           1     1276   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        170294    0.00437589       905.314           1           1     1710   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        166596      0.123303       3482.04           1           1     1276   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        170294    0.00437589       905.314           1           1     1710   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        165881    0.00675644       6968.83      0.1022           1     1402   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        165881    0.00675644       6968.83      0.1022           1     1402   
    Iter      log 

    1699        166034    0.00165032       978.194      0.3954           1     1979   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        171620   0.000458171       940.888      0.6722      0.6722     2930   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2099        170362    0.00477771        927.36           1           1     2387   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        166924    0.00172441       2019.43      0.5196      0.5196     1977   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        166924    0.00172441       2019.43      0.5196      0.5196     1977   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        171620   0.000458171       940.888      0.6722      0.6722     2930   
    Iter      log 

    3099        171641    0.00153665       438.488           1           1     3517   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        170378    0.00912103       1667.66           1           1     2953   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166090    0.00130066       933.851      0.4015      0.4015     2642   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        167039   0.000472567        196.42           1           1     2670   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2699        170380     0.0040178       237.024           1           1     3063   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2399        166093   0.000443283       222.553       0.452           1     2756   
    Iter      log 

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        165810    0.00142677       1762.26           1           1      359   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2880        166117   0.000242763       75.9157           1           1     3310   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    2799        167049   0.000106521       139.981           1           1     3259   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3187        170385   2.30469e-05       137.947      0.8085      0.8085     3602   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Initial log joint probability =

     399        165275   0.000197504       808.625           1           1      461   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        170437     0.0021236       2422.79           1           1      569   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        169625      0.083564       14877.6           1           1      583   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        169625      0.083564       14877.6           1           1      583   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        166228    0.00949168       761.946           1           1     1043   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        170569     0.0380345       9935.75       3.015      0.3015      681   
    Iter      log 

    1399        166287   0.000998373       570.105           1           1     1620   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     999        165650   0.000523396       2625.41      0.7606      0.7606     1147   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     999        165650   0.000523396       2625.41      0.7606      0.7606     1147   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        170245   0.000649782        777.37           1           1     1264   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        171331    0.00348428       2067.11      0.7055      0.7055     1262   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        170245   0.000649782        777.37           1           1     1264   
    Iter      log 

    1999        166432     0.0131263       269.816           1           1     2308   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        171694     0.0509795        5249.3      0.3588           1     1943   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        165827     0.0205842       3282.83           1           1     1829   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        166432     0.0131263       269.816           1           1     2308   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        170342    0.00116004        1434.6           1           1     1950   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        165827     0.0205842       3282.83           1           1     1829   
    Iter      log 

    2499        166451    0.00042588       598.331     0.09171           1     2871   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2518        166452    0.00014249       147.427      0.2277           1     2895   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    2099        165987   0.000424874       183.133           1           1     2408   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        170403    0.00156784        211.77           1           1     2503   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        171731   0.000996602       416.239           1           1     2511   
    Iter      log prob        ||dx||      ||grad||

    2699        166062    0.00761213       2835.55       0.705       0.705     3093   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        166032    0.00722166       6027.49           1           1      588   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2799        170435    0.00219001       986.456           1           1     3174   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        165780     0.0529689       7981.48       5.182      0.5182      245   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        165344    0.00291999       6574.46      0.3765           1      132   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     599        166274    0.00609026       3776.13           1           1      710   
    Iter      log 

    1099        166494    0.00774663       2857.83           1           1     1268   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3299        166152     0.0266481       1268.42           1           1     3766   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3299        170459     0.0029113       1345.54      0.3788           1     3739   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3391        170460   0.000279293       74.7922           1           1     3844   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    1099        166494    0.00774663       2857.83           1           1     1268   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||

    1199        166304   0.000647696       683.174           1           1     1402   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1299        166332   0.000854327       1861.22        0.45        0.45     1517   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3799        166183    0.00200138       834.504      0.3024           1     4345   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3873        166184     0.0008238       42.2982      0.9658      0.9658     4432   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    1699        166772    0.00114754       5429.95       0.227           1     1943   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        166772    0.00114754       5429.95       0.227           1     1943   
    

    1099        171439    0.00247023       2679.97      0.3046           1     1265   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        169664    0.00564007       2110.18      0.1757           1      607   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     999        171281     0.0883911       32149.3      0.4138           1     1157   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1799        166395    0.00021652       882.571      0.4701      0.4701     2080   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1899        166398   0.000525481       598.808           1           1     2198   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166905   0.000668386       1108.75           1           1     2635   
    Iter      log 

    2799        167005    0.00882742       4685.54           1           1     3215   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        171652    0.00592113       2246.28           1           1     1954   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     999        170141    0.00247008       1481.88           1           1     1181   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        171621    0.00587726       3044.27           1           1     1839   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2399        166438    0.00205503       2146.25           1           1     2762   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        170160   0.000479953        284.46      0.1028      0.8903     1302   
    Iter      log 

    1599        170396    0.00239204       957.036           1           1     1858   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3399        167056   0.000247873       175.126           1           1     3890   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        171737   0.000466183       250.326           1           1     2634   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        165300    0.00139918       2662.23           1           1      461   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3399        167056   0.000247873       175.126           1           1     3890   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        171732   0.000775969       692.984           1           1     2521   
    Iter      log 

    2099        170465   0.000200363       304.914      0.3296      0.3296     2421   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        165880     0.0106867       4899.93           1           1     1046   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3964        167098   5.75295e-05       79.9522      0.9722      0.9722     4530   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    2199        170466   0.000323941       853.897      0.6106      0.6106     2540   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3964        167098   5.75295e-05       79.9522      0.9722      0.9722     4530   
Optimization terminated normally: 
  Convergence d

     499        166069    0.00741701       1128.49           1           1      582   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        165973    0.00348125       486.874           1           1     1638   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        166069    0.00741701       1128.49           1           1      582   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3299        171812    0.00158535       867.944           1           1     3787   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3399        171816    0.00374486       782.763           1           1     3902   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        165994     0.0517113       8720.31           1           1      466   
    Iter      log 

     299        168769    0.00575707       11765.8           1           1      350   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        166099   0.000837175       2985.22      0.8388      0.8388     2324   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        166264   0.000371005       761.558      0.5196           1     1033   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        166522     0.0037282       842.023           1           1     1275   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        166099   0.000837175       2985.22      0.8388      0.8388     2324   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        168769    0.00575707       11765.8           1           1      350   
    Iter      log 

    2527        166145   9.47813e-05        106.92           1           1     2919   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
     799        170856     0.0216747       16114.7           1           1      910   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        166903    0.00358882       1134.36           1           1     1828   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2527        166145   9.47813e-05        106.92           1           1     2919   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # eval

    1999        166446      0.002682       1421.02      0.2532      0.2532     2265   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2099        166458   0.000974641       740.214       0.994       0.994     2377   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        171557    0.00278716       877.783           1           1     1568   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        167095     0.0014497       940.993      0.4992     0.04992     2522   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        171583    0.00285361       2036.48           1           1     1681   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        167095     0.0014497       940.993      0.4992     0.04992     2522   
    Iter      log 

     299        165216    0.00111321       930.682       2.107      0.2107      352   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        170311    0.00336186       2351.38      0.3536      0.3536     1267   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        171756    0.00191228       1158.89           1           1     2243   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        170311    0.00336186       2351.38      0.3536      0.3536     1267   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2566        166473   0.000208535       74.1932       0.275      0.9201     2920   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    1999        171756    0.00191228       1158.89           1           1     2243   
    

     499        166089   0.000684374       1433.92           1           1      581   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        165650    0.00147698        5777.3      0.3754      0.3754     1021   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        170445    0.00798897       2365.05           1           1     1940   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        165650    0.00147698        5777.3      0.3754      0.3754     1021   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        171837   0.000450663        895.05      0.1869      0.1869     2922   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        171837   0.000450663        895.05      0.1869      0.1869     2922   
    Iter      log 

    2199        170489    0.00114194       323.468           1           1     2502   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        166466     0.0133848       3480.19           1           1     1391   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        166441   9.96114e-05       685.093      0.6692      0.6692     1280   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        165868     0.0394011         15153           1           1     1704   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        170491    0.00112478       578.014           1           1     2620   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        165868     0.0394011         15153           1           1     1704   
    Iter      log 

    1999        166029     0.0145598       721.479           1           1     2278   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3699        171898    0.00955695       1096.79           1           1     4171   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1999        166029     0.0145598       721.479           1           1     2278   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        165865   0.000695533       968.186           1           1      463   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1699        166617    0.00257092       3936.65           1           1     1960   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1799        166638    0.00718569       2230.76           1           1     2071   
    Iter      log 

     899        166207    0.00093449       479.209           1           1     1037   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        166111    0.00431328       287.882           1           1     2968   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    4299        171913   0.000261768       1184.88      0.3949      0.3949     4857   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166803    0.00873281       3223.95      0.2721      0.2721     2644   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        166111    0.00431328       287.882           1           1     2968   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2399        166866    0.00603281       3570.86           1           1     2753   
    Iter      log 

     299        168932     0.0106642       7131.61           1           1      344   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        167747     0.0237017       16933.1           1           1      123   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1499        166370    0.00172988       7762.71      0.2091      0.2091     1716   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3099        166140    0.00257699       381.301           1           1     3538   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        169780    0.00467979       37621.5       0.228       0.228      458   
    Iter      log prob        ||dx||      ||grad||       alpha  

    2199        166427    0.00565538       881.425       1.084     0.01084     2503   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        171027     0.0115268        2986.3           1           1     1035   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        171027     0.0115268        2986.3           1           1     1035   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     699        169936     0.0969558       12988.7           1           1      792   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2199        166427    0.00565538       881.425       1.084     0.01084     2503   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        166441    0.00847213        795.34           1           1     2615   
    Iter      log 

    3199        166518    0.00136199       309.821           1           1     3654   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3099        166507      0.014888       3094.22           1           1     3535   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1199        170158    0.00186284       2190.63           1           1     1350   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1599        171658    0.00642671       1866.02           1           1     1838   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    3299        166521   0.000255549       560.767           1           1     3774   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1399        170260    0.00500579       3546.43           1           1     1568   
    Iter      log 

    2499        171843    0.00412621       1589.15      0.7224      0.7224     2837   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2299        170518   0.000197581        311.83      0.5606      0.5606     2594   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2315        170518   0.000725229       78.6921           1           1     2612   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
    2199        170516    0.00131122       282.129           1           1     2482   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2699        171875   0.000333884       258.508           1           1     3057   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    2599        171870    0.00846776        2028.7      0.5181           1     2945   
    

{'changepoint_prior_scale': 0.1, 'changepoint_range': 0.5, 'n_changepoints': 50}
RMSE------------------------------------ 301.7337201310919
{'changepoint_prior_scale': 0.2, 'changepoint_range': 0.6, 'n_changepoints': 50}
RMSE------------------------------------ 354.5152706430968
{'changepoint_prior_scale': 0.3, 'changepoint_range': 0.6, 'n_changepoints': 100}
RMSE------------------------------------ 355.0159876807711
{'changepoint_prior_scale': 0.4, 'changepoint_range': 0.5, 'n_changepoints': 100}
RMSE------------------------------------ 300.5432000403724
{'changepoint_prior_scale': 0.4, 'changepoint_range': 0.9, 'n_changepoints': 100}
RMSE------------------------------------ 250.15700909796746
{'changepoint_prior_scale': 0.5, 'changepoint_range': 0.7, 'n_changepoints': 100}
RMSE------------------------------------ 344.5270530736484
{'changepoint_prior_scale': 0.6, 'changepoint_range': 0.6, 'n_changepoints': 100}
RMSE------------------------------------ 354.40097173148325
{'changepoint

In [135]:
pd.DataFrame(model_params)

Unnamed: 0,RMSE,changepoint_prior_scale,changepoint_range,n_changepoints
0,301.73372,0.1,0.5,50
1,301.73372,0.1,0.5,100
2,354.379266,0.1,0.6,50
3,354.379266,0.1,0.6,100
4,342.595585,0.1,0.7,50
5,342.595585,0.1,0.7,100
6,376.32,0.1,0.8,50
7,376.32,0.1,0.8,100
8,241.361836,0.1,0.9,50
9,241.361836,0.1,0.9,100


In [139]:
parameters = pd.DataFrame(model_params).sort_values(by=['RMSE'])
parameters = parameters.reset_index(drop=True)
parameters

Unnamed: 0,RMSE,changepoint_prior_scale,changepoint_range,n_changepoints
0,241.361836,0.1,0.9,50
1,241.361836,0.1,0.9,100
2,250.157009,0.4,0.9,100
3,250.157009,0.4,0.9,50
4,254.466901,0.5,0.9,100
5,254.466901,0.5,0.9,50
6,266.23123,0.6,0.9,100
7,266.23123,0.6,0.9,50
8,266.552385,0.2,0.9,100
9,266.552385,0.2,0.9,50


In [137]:
parameters.to_csv('tuned_parameters.csv',index=False)

Evaluation of Best Tuned Model with additional regressors

In [43]:
def add_regressors(data_prophet,data_orig,regressors):
    df_with_reg = pd.concat([data_prophet.reset_index(drop=True),
                             data_orig[regressors].head(len(data_prophet)).reset_index(drop=True)],axis=1)
    return df_with_reg

In [44]:
# cols = ['hour','dayofweek','weekofyear','quarter_hr','dayofweek_hr','month_hr']
cols = ['hour','dayofweek','quarter','quarter_hr','month_hr']
exog_train = pd.get_dummies(X_train.set_index('datetime')[cols],columns=cols,prefix=cols)
exog_test = pd.get_dummies(X_val.set_index('datetime')[cols],columns=cols,prefix=cols)

y_train = X_train.set_index('datetime')[targetcol].copy()
y_test = X_val.set_index('datetime')[targetcol].copy()

In [47]:
regressors = list(exog_train.columns)
# regressors = [col for col in exog_train.columns if col.startswith('hour_')]
X_train_df = pd.concat([exog_train,y_train],axis=1)
X_train_df = X_train_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_train_df.shape)
X_train_df.head()

(68688, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2008-03-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1259.985563
1,2008-03-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1095.5415
2,2008-03-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1056.2475
3,2008-03-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1034.742
4,2008-03-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1026.3345


In [48]:
X_val_df = pd.concat([exog_test,y_test],axis=1)
X_val_df = X_val_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_val_df.shape)
X_val_df.head()

(26304, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2016-01-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2002.247
1,2016-01-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1843.387
2,2016-01-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1735.1178
3,2016-01-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1666.197
4,2016-01-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1630.3924


In [50]:
%%time
val_start   = len(X_train_df)#10000
val_period  = len(X_val_df)#48
# val_start   = 68688
# val_period  = 26304
np.random.seed(0)
train_model =Prophet(uncertainty_samples=0,
                    changepoint_prior_scale =0.1,
                    n_changepoints = 50,
                     changepoint_range = 0.9,
                     weekly_seasonality=True,
                     daily_seasonality = True,
                     yearly_seasonality = True,
                     interval_width=0.95)
# train_w_reg = add_regressors(train_df[:val_start],X_train,regressors)

CPU times: user 3.1 ms, sys: 41.7 ms, total: 44.8 ms
Wall time: 43.3 ms


In [51]:
for col in regressors:
    train_model.add_regressor(col,prior_scale=15,standardize=False)

In [76]:
train_model.holidays_prior_scale

10.0

In [52]:
%%time
train_model.fit(X_train_df[:val_start])

Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99        178605    0.00660813       36397.1           1           1      141   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        179649     0.0045011        9403.2           1           1      250   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299        180632     0.0399178       11536.5           1           1      361   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     399        181230    0.00712607       18906.8           1           1      472   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499        181689     0.0105748       4372.66           1           1      585   
    Iter      log prob        ||dx||      ||grad||       alpha  

<fbprophet.forecaster.Prophet at 0x7ff133ca6dc0>

   181781    0.00413133       8472.83      0.3169           1      703   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     699        182115     0.0737011         13245           1           1      818   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     799        182396     0.0807707       14255.8           1           1      928   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     899        182597     0.0012427       6170.22      0.8315      0.8315     1038   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     999        182760    0.00199204       14050.6      0.7699      0.7699     1153   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    1099        182863    0.00418666       23012.7           1           1     1269   
    Iter      log prob        |

In [53]:
# future_df = train_model.make_future_dataframe(periods=val_period, freq='H',include_history = False)
# X_val_df['cap']=4000
# X_val_df['floor']=1000
# future_w_reg = add_regressors(future_df,X_val,regressors)
train_forecast = train_model.predict(X_val_df[:val_period])
RMSE = mean_squared_error(X_val_df[:val_period]['y'],abs(train_forecast['yhat']),squared=False)
# RMSE = mean_squared_error(X_val_df['y'],abs(train_forecast['yhat']),squared=False)
print('RMSE------------------------------------',RMSE)

  df['trend'] = self.predict_trend(df)


RMSE------------------------------------ 261.0355500978341


In [56]:
from fbprophet.serialize import model_to_json, model_from_json

with open('fbprophet_bestchangepoint.json', 'w') as fout:
    fout.write(model_to_json(train_model))  # Save model

with open('fbprophet_bestchangepoint.json', 'r') as fin:
    temp_model = model_from_json(fin.read())  # Load model

In [58]:
train_forecast['yhat'].describe()

count    26304.000000
mean      2119.232022
std        245.472627
min       1394.838126
25%       1944.646416
50%       2110.707326
75%       2285.002080
max       2895.013859
Name: yhat, dtype: float64

In [159]:
train_w_reg.shape

(1200, 36)

In [161]:
future_w_reg

Unnamed: 0,ds,hour,dayofweek,quarter,quarter_hr,month_hr
0,2008-04-20 00:00:00,0,4,1,1_0,1_0
1,2008-04-20 01:00:00,1,4,1,1_1,1_1
2,2008-04-20 02:00:00,2,4,1,1_2,1_2
3,2008-04-20 03:00:00,3,4,1,1_3,1_3
4,2008-04-20 04:00:00,4,4,1,1_4,1_4
5,2008-04-20 05:00:00,5,4,1,1_5,1_5
6,2008-04-20 06:00:00,6,4,1,1_6,1_6
7,2008-04-20 07:00:00,7,4,1,1_7,1_7
8,2008-04-20 08:00:00,8,4,1,1_8,1_8
9,2008-04-20 09:00:00,9,4,1,1_9,1_9


In [140]:
# preds_lower = np.expm1(df_cv['yhat_lower'])
# preds_upper = np.expm1(df_cv['yhat_upper'])
# print(pd.Series(preds_lower).describe())
# print(pd.Series(preds_upper).describe())

In [None]:
overall = performance_metrics(df_cv,metrics=['rmse','coverage'],rolling_window=1)
print(overall)

df_p = performance_metrics(df_cv,metrics=['rmse','coverage'],rolling_window=-1)
df_p_y= pd.concat([df_p,df_cv[['yhat','y']]],axis=1)
df_p_y.head()

In [None]:
df_p_y.tail(100)

In [None]:
# Defining the model
fb_model = Prophet(interval_width=0.95)
fb_model.fit(df)

In [None]:
# Having future dates
future_dates_healthcare = fb_model.make_future_dataframe(periods=12, freq='M')
print(future_dates_healthcare.shape)
future_dates_healthcare.tail()

<a id='model_eval_thyme'></a>
## Model Evaluation using Thyme Boost

In [59]:
from ThymeBoost import ThymeBoost as tb
import matplotlib.pyplot as plt

In [None]:
# exog_train = X_train.set_index('datetime')[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')
# exog_test = X_val.set_index('datetime')[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')

# y_train = X_train.set_index('datetime')[targetcol].copy()
# y_test = X_val.set_index('datetime')[targetcol].copy()

In [None]:
X_train['dayofweek_hr']=X_train['dayofweek'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['dayofweek_hr']=X_val['dayofweek'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['weekofyear_hr']=X_train['weekofyear'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['weekofyear_hr']=X_val['weekofyear'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['month_hr']=X_train['month'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['month_hr']=X_val['month'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['quarter_hr']=X_train['quarter'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['quarter_hr']=X_val['quarter'].astype('str') + '_'+X_val['hour'].astype('str')

In [60]:
# cols = ['hour','dayofweek','weekofyear','quarter_hr','dayofweek_hr','month_hr']
cols = ['hour','dayofweek','quarter','quarter_hr','month_hr']
exog_train = pd.get_dummies(X_train.set_index('datetime')[cols],columns=cols,prefix=cols)
exog_test = pd.get_dummies(X_val.set_index('datetime')[cols],columns=cols,prefix=cols)

y_train = X_train.set_index('datetime')[targetcol].copy()
y_test = X_val.set_index('datetime')[targetcol].copy()

In [61]:
print(len(exog_train.columns))
exog_train.columns

419


Index(['hour_0', 'hour_1', 'hour_2', 'hour_3', 'hour_4', 'hour_5', 'hour_6',
       'hour_7', 'hour_8', 'hour_9',
       ...
       'month_hr_9_21', 'month_hr_9_22', 'month_hr_9_23', 'month_hr_9_3',
       'month_hr_9_4', 'month_hr_9_5', 'month_hr_9_6', 'month_hr_9_7',
       'month_hr_9_8', 'month_hr_9_9'],
      dtype='object', length=419)

In [62]:
# train_dt_idx= train.set_index('datetime',drop=False)
# mask = (train_dt_idx['year'] >=2011) &  \
#         (train_dt_idx['datetime'].dt.date <=datetime.date(2014, 5, 1)) 
# no_change_points = list(y_train[mask].index)

In [63]:
# #visual checking of data. Plotting by Pandas method, drawing axes by Matplotlib
# f, ax = plt.subplots(figsize=(18,6),dpi=200);
# #upto may 2014
# mask = (train['year'] ==2009)  | (train['year'] <=2011) 
# # mask = (train['year'] >=2011) &  \
# #         (train['datetime'].dt.date <=datetime.date(2014, 5, 1)) 
# train_filt = train[mask].set_index('datetime')
# plt.suptitle('Renewable Energy consumption', fontsize=24);
# train_filt['energy'].plot(ax=ax,rot=90,ylabel='Energy');
# # xcoords = ['2011-01-01', '2012-01-01', '2013-01-01', '2014-01-01']
# # for xc in xcoords:
# #     plt.axvline(x=xc, color='black', linestyle='--')

In [64]:
# len(no_change_points)

In [65]:
boosted_model = tb.ThymeBoost(verbose=1,
                              # n_split_proposals = 1000,
                              # exclude_splits =no_change_points
                             )

In [66]:
# exog_train_2.head()

In [67]:
output = boosted_model.fit(y_train,
                           init_trend='median',
                           # ransac_trials=1000,ransac_min_samples=100,
                           trend_estimator='Linear',
                           seasonal_estimator='fourier',
                           # seasonal_period=[24,168,8766],
                           seasonal_period=[168,8766,24],
                           split_cost='mse',
                           global_cost='maicc',
                           fit_type='local',
                           exogenous_estimator='ols',
                          exogenous=exog_train
                          )

********** Round 1 **********
Using Split: None
Fitting initial trend globally with trend model:
median()
seasonal model:
fourier(10, False)
exogenous model:
ols model
cost: 755124.358768365
********** Round 2 **********
Using Split: 2012-03-20 15:00:00
Fitting local with trend model:
linear((1, None))
seasonal model:
fourier(10, False)
exogenous model:
ols model
cost: 691393.9100593573
********** Round 3 **********
Using Split: 2014-10-29 12:00:00
Fitting local with trend model:
linear((1, None))
seasonal model:
fourier(10, False)
exogenous model:
ols model
cost: 690377.9956746168
Boosting Terminated 
Using round 3


In [68]:
predicted_output = boosted_model.predict(output,
                                         forecast_horizon=26304,
                                         future_exogenous=exog_test
                                        )

In [69]:
print(predicted_output.shape)
predicted_output.head()

(26304, 6)


Unnamed: 0,predictions,predicted_trend,predicted_seasonality,predicted_exogenous,predicted_upper,predicted_lower
2016-01-01 00:00:00,2019.36868,1885.700335,17.65272,116.015625,2437.757699,1600.979661
2016-01-01 01:00:00,1906.163788,1885.707756,-19.372581,39.828613,2324.57108,1487.756495
2016-01-01 02:00:00,1826.385089,1885.715177,-46.613535,-12.716553,2244.810656,1407.959522
2016-01-01 03:00:00,1776.61678,1885.722597,-62.956403,-46.149414,2195.060621,1358.172939
2016-01-01 04:00:00,1748.848461,1885.730018,-66.942714,-69.938843,2167.310577,1330.386346


In [70]:
#calculating root mean squared error
RMSE_UC1 = mean_squared_error(y_test,predicted_output['predictions'],squared=False)
print('valid score:',RMSE_UC1)

valid score: 205.88305357086432


In [None]:
output.head()

In [None]:
#visual checking of data. Plotting by Pandas method, drawing axes by Matplotlib
f, ax = plt.subplots(figsize=(18,6),dpi=200);
plt.suptitle('Renewable Energy consumption', fontsize=24);
output['trend'].plot(ax=ax,rot=90,ylabel='Energy');
xcoords = ['2008-01-01','2009-01-01','2010-01-01', '2011-01-01', '2012-01-01', '2013-01-01', '2014-01-01',
           '2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01']
for xc in xcoords:
    plt.axvline(x=xc, color='black', linestyle='--')

In [None]:
boosted_model.plot_components(output)

In [None]:
boosted_model.plot_results(output,predicted_output)

In [None]:
residuals_val = y_test - predicted_output['predictions']
plt.plot(residuals_val)
plt.plot(y_test)
plt.plot(predicted_output['predictions'])
plt.xticks(rotation = 45) 
plt.show()

In [None]:
residuals_train = y_train - output['yhat']
plt.plot(residuals_train)
plt.plot(y_train)
plt.plot(output['yhat'])
plt.xticks(rotation = 45) 
plt.show()

In [None]:
X_train_w_errors = X_train.copy().reset_index()
X_train_w_errors['residual']=residuals_train.reset_index(drop=True)
X_train_w_errors['prediction']=output['yhat'].reset_index(drop=True)
cols = ['datetime','energy','prediction','residual']
X_train_w_errors[cols].to_csv('X_train_w_errors.csv',index=False)

X_train_w_errors.head()

In [None]:
pd.Series(residuals_train).describe()

In [None]:
mask = X_train_w_errors['residual']>300
print(X_train_w_errors[mask]['month'].value_counts()) # 6, 12
print(X_train_w_errors[mask]['weekofyear'].value_counts()) #24,51
print(X_train_w_errors[mask]['quarter'].value_counts()) 

In [None]:
mask = X_val_w_errors['residual']>300
print(X_val_w_errors[mask]['month'].value_counts()) # 6, 7
print(X_val_w_errors[mask]['weekofyear'].value_counts()) #29,23
print(X_val_w_errors[mask]['quarter'].value_counts()) 

In [None]:
actual.head()

In [None]:
preds.index

In [None]:
mask = X_train.reset_index(drop=True)['year']>=2014
actual = y_train.reset_index()[mask].set_index('datetime')
preds = output.reset_index()[mask].set_index('datetime')['yhat']
residuals_train = actual - preds
plt.plot(residuals_train)
plt.plot(actual)
plt.plot(preds)
plt.xticks(rotation = 45) 
plt.show()

In [None]:
residuals_val[0:5]

In [None]:
X_val_w_errors = X_val.copy().reset_index()
X_val_w_errors['residual']=residuals_val.reset_index(drop=True)
X_val_w_errors['prediction']=predicted_output['predictions'].reset_index(drop=True)

X_val_w_errors.head()

In [None]:
cols = ['datetime','energy','prediction','residual']
X_val_w_errors[cols].to_csv('Xval_w_errors.csv',index=False)

In [None]:
# import matplotlib.pyplot as plt
# plt.plot(trend, label='actual trend')
# plt.plot(output['trend'], label='fitted trend')
# plt.legend()
# plt.show()

All seasonal <br>
_________________ <br>
fit local with Weekly, Yearly, Daily order: 196.9 <br>
fit type Global :  207.1 <br>
fit type local :  204.1 <br>
RANSAC trend  with Weekly, Yearly, Daily order and fit type global: 201.9 <br>

Two seasonal <br>
_________________ <br>
Weekly, Yearly :  207.2 <br>
Daily, Weekly :  241.6 <br>
Daily, Yearly :  235.6 <br>

Single seasonal <br>
_________________ <br>
Daily :  241.6 <br>
Weekly : 201.77 <br>
Yearly:  1306 <br>
None: 1238 <br>

In [None]:
linear_preds_val = predicted_output['predictions']
linear_preds_train =output['yhat']

## Blend of fbprophet best predictions and ThymeBoost

In [75]:
preds_thyme = predicted_output['predictions'].values
preds_fb = train_forecast['yhat'].values

w =[0.99,0.01]
preds_comb = w[0] * preds_thyme + w[1]*preds_fb

RMSE_UC1 = mean_squared_error(y_test,preds_comb,squared=False)
print('valid score:',RMSE_UC1)

valid score: 206.20568993965966


## Residuals and dummy exogenous using Catboost|

In [None]:
from catboost import CatBoostRegressor

In [None]:
catbst_model=CatBoostRegressor(loss_function='RMSE',n_estimators=2000, verbose = 0)

In [None]:
X_train['dayofweek_hr']=X_train['dayofweek'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['dayofweek_hr']=X_val['dayofweek'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['weekofyear_hr']=X_train['weekofyear'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['weekofyear_hr']=X_val['weekofyear'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['month_hr']=X_train['month'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['month_hr']=X_val['month'].astype('str') + '_'+X_val['hour'].astype('str')

X_train['quarter_hr']=X_train['quarter'].astype('str') + '_'+X_train['hour'].astype('str')
X_val['quarter_hr']=X_val['quarter'].astype('str') + '_'+X_val['hour'].astype('str')

In [None]:
cols = ['hour','dayofweek','weekofyear','quarter_hr','dayofweek_hr','month_hr']
exog_train_2 = pd.get_dummies(X_train.set_index('datetime')[cols],columns=cols,prefix=cols)
exog_test_2 = pd.get_dummies(X_val.set_index('datetime')[cols],columns=cols,prefix=cols)

In [None]:
print(len(exog_train_2.columns))
# list(exog_train_2.columns)

In [None]:
y_train_2 = y_train - linear_preds_train
y_test_2 = y_test - linear_preds_val

In [None]:
catbst_model.fit(exog_train_2,y_train_2)
print("Fit Complete")
catbst_pred_train= catbst_model.predict(exog_train_2)+linear_preds_train
catbst_pred_test=catbst_model.predict(exog_test_2)+linear_preds_val

In [None]:
#calculating root mean squared error
RMSE_UC1 = mean_squared_error(y_test,catbst_pred_test,squared=False)
print('valid score:',RMSE_UC1)

<a id='model_eval'></a>
## Model Evaluation using Unobserved Components

In [None]:
X_val.shape
X_train.shape

In [None]:
import statsmodels.api as sm

In [None]:
def get_exog(data):
    exog_data =data[[
#                     'is_weekend',
                     'hour',
                     'month_hour_mean','quarter_hour_mean','weekofyear_mean',
                      'dayofweek_grp_mean',  
                    ]]
#     exog_data =data[['is_weekend','month','hour','is_holiday','special_holiday']]
    cols = ['hour']
    exog_data = pd.get_dummies(exog_data,prefix=cols,columns=cols)
    return exog_data.astype('int')

In [None]:
X_val.shape

In [None]:
X_train.index

In [None]:
# exog_train = get_exog(X_train)
# exog_test = get_exog(X_val)
# print(exog_train.columns)

# y_train = X_train[targetcol].copy()
# y_test = X_val[targetcol].copy()

In [None]:
exog_train = X_train[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')
exog_test = X_val[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')

y_train = X_train[targetcol].copy()
y_test = X_val[targetcol].copy()


Create UCM model with 3 seasonalities and set other parameters as determined during EDA

In [None]:
%%time
#splitting time series to train and test subsets

#Unobserved Components model definition
model_UC1 = sm.tsa.UnobservedComponents(y_train,
                                        autoregressive=2,
                                        level='lldtrend',
                                        exog=exog_train,
                                        cycle=False,
                                        irregular=False,
                                        stochastic_level = False,
                                        stochastic_trend = False,
                                        stochastic_freq_seasonal = [False,False,True],
                                        freq_seasonal=[{'period': 24, 'harmonics': 1},
                                                       {'period': 168, 'harmonics': 1},
                                                       {'period': 8766, 'harmonics': 2}
                                                      ],
                                        mle_regression=False
                                       )
#fitting model to train data
model_UC1res = model_UC1.fit( 
                            method='powell',
                             # cov_type='robust'
        )

#printing statsmodels summary for model
print(model_UC1res.summary())

print("")
#calculating mean absolute error and root mean squared error for in-sample prediction of model
print(f"In-sample mean absolute error (MAE): {'%.0f' % model_UC1res.mae}, In-sample root mean squared error (RMSE): {'%.0f' % np.sqrt(model_UC1res.mse)}")

#model forecast

Predict validation data set using the above trained model

In [None]:
forecast_UC1 = model_UC1res.forecast(steps=26304,exog=exog_test.reset_index(drop=True),
#                                      signal_only='abcd'
                                    )
#calculating root mean squared error
RMSE_UC1 = mean_squared_error(y_test,forecast_UC1,squared=False)
print('valid score:',RMSE_UC1)

In [None]:
model_UC1res.freq_seasonal[:10]

valid score: 
193.3 using method='powell'

In [None]:
import joblib

Save the model file and validation set and validation predictions

In [None]:
np.save('val_forecast_UC.npy',forecast_UC1)
joblib.dump(model_UC1res,'UC_model_val.pkl',compress=True)
X_val.to_csv('X_val.csv',index=False)

In [None]:
pd.Series(forecast_UC1.describe())

In [None]:
import matplotlib.pyplot as plt

<a id='model_final'></a>
## Model Finalization for Test Prediction

Train the model using complete data and perform test prediction using this model

In [None]:
exog_train_full = train[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')
exog_test_full = test[['month_hour_mean','quarter_hour_mean','weekofyear_mean','dayofweek_grp_mean']].astype('int')


Use the same model configuration as that of model evaluation phase, but fit the model on full train data

In [None]:
%%time
#splitting time series to train and test subsets
y_train_full = train[targetcol].copy()

#Unobserved Components model definition
model_UC_full = sm.tsa.UnobservedComponents(y_train_full,
                                        autoregressive=2,
                                        level='lldtrend',
                                        exog=exog_train_full,
                                        cycle=False,
                                        irregular=False,
                                        stochastic_level = False,
                                        stochastic_trend = False,
                                        stochastic_freq_seasonal = [False,False,True],
                                        freq_seasonal=[{'period': 24, 'harmonics': 1},
                                                       {'period': 168, 'harmonics': 1},
                                                       {'period': 8766, 'harmonics': 2}
                                                      ]
                                       )
#fitting model to train data
model_UC_full_res = model_UC_full.fit()

#printing statsmodels summary for model
print(model_UC_full_res.summary())

print("")
#calculating mean absolute error and root mean squared error for in-sample prediction of model
print(f"In-sample mean absolute error (MAE): {'%.0f' % model_UC_full_res.mae}, In-sample root mean squared error (RMSE): {'%.0f' % np.sqrt(model_UC_full_res.mse)}")

In [None]:
#Predict Test Set
forecast_UC_test = model_UC_full_res.forecast(steps=26304,exog=exog_test_full)

In [None]:
joblib.dump(model_UC_full_res,'UC_model_full.pkl',compress=True)

In [None]:
print(len(test),len(forecast_UC_test))

In [None]:
forecast_UC_test.index

In [None]:
pd.Series(forecast_UC_test).describe()

In [None]:
subm = pd.DataFrame()
subm['row_id'] = test['row_id']
subm[targetcol]=forecast_UC_test.values
subm.to_csv('submission.csv',index=False)

In [None]:
#check saved submission data
pd.read_csv('submission.csv').head()