# Jobathon Nov 2022 Time series Model

## Table of Contents
### 1. [Read Train and Test ](#read)
### 2. [Feature Generation](#feature)
### 3. [Train and Validation Split](#split)
### 4. [Model Evaluation using Facebook Prophet](#model_eval_fbprophet)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
pd.options.display.max_columns=500
pd.options.display.max_rows=500

In [3]:
from pandas.tseries.holiday import *

In [4]:
# !pip install pmdarima

In [5]:
# !pip install ThymeBoost

In [6]:
KAGGLE=False

In [7]:
if KAGGLE:
    path = '/kaggle/input/jobathon-nov-2022/'
else:
    path = 'input/'

<a id='read'></a>
## Read Train and Test Data

In [8]:
train=pd.read_csv(path+'train.csv')
print(train.shape)

(94992, 3)


In [9]:
test=pd.read_csv(path+'test.csv')
print(test.shape)

(26304, 2)


In [10]:
train.head()

Unnamed: 0,row_id,datetime,energy
0,1,2008-03-01 00:00:00,1259.985563
1,2,2008-03-01 01:00:00,1095.5415
2,3,2008-03-01 02:00:00,1056.2475
3,4,2008-03-01 03:00:00,1034.742
4,5,2008-03-01 04:00:00,1026.3345


In [11]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94992 entries, 0 to 94991
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   row_id    94992 non-null  int64  
 1   datetime  94992 non-null  object 
 2   energy    93092 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.2+ MB


In [12]:
train['energy'].describe()
targetcol='energy'

In [13]:
train['datetime']=pd.to_datetime(train['datetime'],infer_datetime_format=True)
test['datetime']=pd.to_datetime(test['datetime'],infer_datetime_format=True)

In [14]:
train['datetime'].min(),train['datetime'].max()

(Timestamp('2008-03-01 00:00:00'), Timestamp('2018-12-31 23:00:00'))

<a id='feature'></a>
## Feature Generation

In [15]:
#create hour map based on business, non - business , sleeping hours etc.
hour_map={0:0,1:0,2:0,3:0,4:0,5:0,
         6:1,7:1,8:1,
         9:2,10:2,11:2,
         12:3,13:3,14:3,15:3,
         16:4,17:4,
         18:5,19:5,20:5,
         21:6,22:6,23:6}
 
#Monday and Sunday as group 1, Saturday as group 2, Otherdays as group 3
dayofweek_map = {0:1,6:1,
                5:2,
                1:3,2:3,3:3,4:3}

create basic date related features

In [16]:
def gen_datefeats(data):
    data['year']=data['datetime'].dt.year
    data['month']=data['datetime'].dt.month
    data['day']=data['datetime'].dt.day
    data['hour']=data['datetime'].dt.hour
    data['weekofyear']=data['datetime'].dt.isocalendar().week
    data['dayofweek']=data['datetime'].dt.dayofweek
    data['dayofweek_grp']=data['dayofweek'].replace(dayofweek_map)
    data['quarter']=data['datetime'].dt.quarter
    data['is_weekend']=data['datetime'].dt.dayofweek > 4
    data['day_part']=data['hour'].replace(hour_map)

In [17]:
gen_datefeats(train)
gen_datefeats(test)

create holiday features with special holiday denoting christmas long holidays

In [18]:
#generate holidays feature
def gen_holiday_feat(data,start,end):
    cal = USFederalHolidayCalendar()
    holiday_dates = cal.holidays(start=start, end=end)
    data['is_holiday'] = False
    mask = data['datetime'].dt.date.astype('datetime64').isin(holiday_dates)
    data.loc[mask,'is_holiday']=True   
    
    data['special_holiday']=False
    mask= ((data['datetime'].dt.month==12) & (data['datetime'].dt.day>=24))  \
           | ((data['datetime'].dt.month==1) & (data['datetime'].dt.day<3)) 
    data.loc[mask,'special_holiday']=True   
        
    return holiday_dates

In [19]:
holidays= gen_holiday_feat(train,train['datetime'].dt.date.min(),train['datetime'].dt.date.max())
print(holidays)
print(train['special_holiday'].value_counts())
train['is_holiday'].value_counts()

DatetimeIndex(['2008-05-26', '2008-07-04', '2008-09-01', '2008-10-13',
               '2008-11-11', '2008-11-27', '2008-12-25', '2009-01-01',
               '2009-01-19', '2009-02-16',
               ...
               '2018-01-01', '2018-01-15', '2018-02-19', '2018-05-28',
               '2018-07-04', '2018-09-03', '2018-10-08', '2018-11-12',
               '2018-11-22', '2018-12-25'],
              dtype='datetime64[ns]', length=107, freq=None)
False    92400
True      2592
Name: special_holiday, dtype: int64


False    92424
True      2568
Name: is_holiday, dtype: int64

In [20]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False


generate hour aggregate features month-wise, quarter-wise, week of year-wise and week day group wise

In [21]:
def gen_datetime_comb_feats(data):
    data['dayofweek_hr']=data['dayofweek'].astype('str') + '_'+data['hour'].astype('str')
    data['weekofyear_hr']=data['weekofyear'].astype('str') + '_'+data['hour'].astype('str')
    data['month_hr']=data['month'].astype('str') + '_'+data['hour'].astype('str')
    data['quarter_hr']=data['quarter'].astype('str') + '_'+data['hour'].astype('str')


def gen_mean_feats(train,test,cols,newcolname):
    grouped=train.groupby(cols)[targetcol].mean().reset_index()
    grouped.columns=cols+[newcolname]
    train=train.merge(grouped,on=cols)    
    test=test.merge(grouped,on=cols)   
    return train,test
    
def gen_mean_feats_all(train,test):
    train,test=gen_mean_feats(train,test,['month','hour'],'month_hour_mean')
    train,test=gen_mean_feats(train,test,['quarter','hour'],'quarter_hour_mean')
    train,test=gen_mean_feats(train,test,['weekofyear','hour'],'weekofyear_mean')
    train,test=gen_mean_feats(train,test,['dayofweek_grp','hour'],'dayofweek_grp_mean')  
    
    train.sort_values('datetime',inplace=True)
    train.reset_index(drop=True,inplace=True)
    test.sort_values('datetime',inplace=True)
    test.reset_index(drop=True,inplace=True)
    return train,test

In [22]:
train,test=gen_mean_feats_all(train,test)

In [23]:
gen_datetime_comb_feats(train)
gen_datetime_comb_feats(test)

In [24]:
train.head(10)

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4
5,6,2008-03-01 05:00:00,1033.7685,2008,3,1,5,9,5,2,1,True,0,False,False,1275.482187,1305.534563,1230.498917,1493.890931,5_5,9_5,3_5,1_5
6,7,2008-03-01 06:00:00,1086.78,2008,3,1,6,9,5,2,1,True,1,False,False,1337.872697,1358.662339,1295.098037,1566.537865,5_6,9_6,3_6,1_6
7,8,2008-03-01 07:00:00,1211.742,2008,3,1,7,9,5,2,1,True,1,False,False,1464.527791,1470.300187,1412.6619,1697.378282,5_7,9_7,3_7,1_7
8,9,2008-03-01 08:00:00,1293.693,2008,3,1,8,9,5,2,1,True,1,False,False,1556.391201,1543.597804,1493.338281,1781.379368,5_8,9_8,3_8,1_8
9,10,2008-03-01 09:00:00,1318.9155,2008,3,1,9,9,5,2,1,True,2,False,False,1577.528044,1589.814926,1517.786396,1804.679326,5_9,9_9,3_9,1_9


In [25]:
test.head()

Unnamed: 0,row_id,datetime,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,94993,2019-01-01 00:00:00,2019,1,1,0,1,1,3,1,False,0,1661.002682,1504.829986,1711.103516,1625.815769,1_0,1_0,1_0,1_0
1,94994,2019-01-01 01:00:00,2019,1,1,1,1,1,3,1,False,0,1543.596544,1412.376876,1592.733254,1543.548695,1_1,1_1,1_1,1_1
2,94995,2019-01-01 02:00:00,2019,1,1,2,1,1,3,1,False,0,1462.390875,1352.371501,1506.964956,1493.577402,1_2,1_2,1_2,1_2
3,94996,2019-01-01 03:00:00,2019,1,1,3,1,1,3,1,False,0,1410.076914,1315.962924,1439.720612,1465.82546,1_3,1_3,1_3,1_3
4,94997,2019-01-01 04:00:00,2019,1,1,4,1,1,3,1,False,0,1378.289974,1298.825422,1415.453093,1459.997669,1_4,1_4,1_4,1_4


In [26]:
train['year'].unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018])

In [27]:
test['datetime'].min(),test['datetime'].max()

(Timestamp('2019-01-01 00:00:00'), Timestamp('2021-12-31 23:00:00'))

In [28]:
holidays= gen_holiday_feat(test,test['datetime'].dt.date.min(),test['datetime'].dt.date.max())
print(len(holidays))
print(holidays)
print(test['special_holiday'].value_counts())
test['is_holiday'].value_counts()

31
DatetimeIndex(['2019-01-01', '2019-01-21', '2019-02-18', '2019-05-27',
               '2019-07-04', '2019-09-02', '2019-10-14', '2019-11-11',
               '2019-11-28', '2019-12-25', '2020-01-01', '2020-01-20',
               '2020-02-17', '2020-05-25', '2020-07-03', '2020-09-07',
               '2020-10-12', '2020-11-11', '2020-11-26', '2020-12-25',
               '2021-01-01', '2021-01-18', '2021-02-15', '2021-05-31',
               '2021-07-05', '2021-09-06', '2021-10-11', '2021-11-11',
               '2021-11-25', '2021-12-24', '2021-12-31'],
              dtype='datetime64[ns]', freq=None)
False    25584
True       720
Name: special_holiday, dtype: int64


False    25560
True       744
Name: is_holiday, dtype: int64

In [29]:
targetcol = 'energy'

In [30]:
# train[targetcol].fillna(train[targetcol].mean(),inplace=True)
train[targetcol].fillna(method='ffill',inplace=True)

Create Lag Features

In [31]:
def create_lag(data,lagno_list):
    res = pd.DataFrame()
    for i in lagno_list:
        shifted = data.shift(i)
        res=pd.concat([res,shifted],axis=1)

#     res=pd.concat([data.shift(i) for i in lagno_list],axis=1)
    res.columns=[f'lag_{i}' for i in lagno_list]
    return res

In [32]:
test.shape

(26304, 22)

In [33]:
train['istrain']=1
test['istrain']=0
combined = pd.concat([train,test],axis=0) 
#lag 1 year, 3 year, quarter, month,week
lag_df = create_lag(combined[targetcol],[24,168,720,2160,8760,26304])
combined=pd.concat([combined,lag_df],axis=1)
train=combined[combined['istrain']==1]
test=combined[combined['istrain']==0]

del combined,train['istrain'],test['istrain'],test[targetcol]
print(train.shape,test.shape)

(94992, 29) (26304, 28)


In [34]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0,,,,,,
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1,,,,,,
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2,,,,,,
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3,,,,,,
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4,,,,,,


In [35]:
train.columns

Index(['row_id', 'datetime', 'energy', 'year', 'month', 'day', 'hour',
       'weekofyear', 'dayofweek', 'dayofweek_grp', 'quarter', 'is_weekend',
       'day_part', 'is_holiday', 'special_holiday', 'month_hour_mean',
       'quarter_hour_mean', 'weekofyear_mean', 'dayofweek_grp_mean',
       'dayofweek_hr', 'weekofyear_hr', 'month_hr', 'quarter_hr', 'lag_24',
       'lag_168', 'lag_720', 'lag_2160', 'lag_8760', 'lag_26304'],
      dtype='object')

Fill null values in train data using previous hour values

In [36]:
cols = [col for col in train.columns if col.startswith('lag_')]
target_mean = train[targetcol].mean()
for col in cols:
    train[col].fillna(0,inplace=True)

<a id='split'></a>
## Train and Validation Split

Validation Set from 2016 to 2018 <br>
Train Set from 2008 to 2015

In [37]:
import datetime 
train_start = datetime.datetime(year=2008,month=1,day=1,hour=0)
val_start = datetime.datetime(year=2016,month=1,day=1,hour=0)
val_end = datetime.datetime(year=2018,month=12,day=31,hour=23)

X_val= train[(train['datetime']>=val_start) & (train['datetime']<=val_end)].copy()
X_train= train[(train['datetime']>=train_start) & (train['datetime']<val_start)].copy()
print(X_train.shape)
print(X_val.shape)
X_val.head()           

(68688, 29)
(26304, 29)


Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
68688,68689,2016-01-01 00:00:00,2002.247,2016,1,1,0,53,4,3,1,False,0,True,True,1661.002682,1504.829986,1874.91005,1625.815769,4_0,53_0,1_0,1_0,2045.017,2274.753,1983.0616,1878.0918,2287.2788,1479.1658
68689,68690,2016-01-01 01:00:00,1843.387,2016,1,1,1,53,4,3,1,False,0,True,True,1543.596544,1412.376876,1734.62185,1543.548695,4_1,53_1,1_1,1_1,1881.1468,2118.4592,1827.3788,1878.0918,2120.5124,1375.2627
68690,68691,2016-01-01 02:00:00,1735.1178,2016,1,1,2,53,4,3,1,False,0,True,True,1462.390875,1352.371501,1633.81205,1493.577402,4_2,53_2,1_2,1_2,1781.7982,2020.4548,1719.965,1627.704,2015.9704,1307.8261
68691,68692,2016-01-01 03:00:00,1666.197,2016,1,1,3,53,4,3,1,False,0,True,True,1410.076914,1315.962924,1578.1656,1465.82546,4_3,53_3,1_3,1_3,1719.5984,1936.5034,1648.2336,1564.5266,1939.974,1261.5295
68692,68693,2016-01-01 04:00:00,1630.3924,2016,1,1,4,53,4,3,1,False,0,True,True,1378.289974,1298.825422,1542.4391,1459.997669,4_4,53_4,1_4,1_4,1657.1542,1895.8108,1603.264,1541.553,1885.3868,1228.8682


In [38]:
val_target = X_val[targetcol]
print(X_val[targetcol].isnull().sum())

0


Create simple validation prediction baseline using train mean value 

In [39]:
from sklearn.metrics import mean_squared_error

In [40]:
#compute baseline error by predicting train energy mean as the energy for all time
val_preds_baseline = np.full(len(X_val),train[targetcol].mean())

In [41]:
#compute error score on baseline predictions
val_score = mean_squared_error(val_target,val_preds_baseline,squared=False)
print('valid score:',val_score)

valid score: 431.7300102305795


<a id='model_eval_fbprophet'></a>
## Model Evaluation using Facebook Prophet

In [42]:
# !pip install pystan==2.19.1.1

In [43]:
# !pip install fbprophet

In [44]:
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
from fbprophet.plot import plot_plotly
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [45]:
from multiprocessing import cpu_count
from joblib import Parallel, delayed
from multiprocessing import cpu_count

In [46]:
from sklearn.model_selection import ParameterGrid

In [47]:
def tune_prophet_params(param):
    val_start   = 68688
    val_period  = 26304
    print(param)
    np.random.seed(0)
    train_model =Prophet(uncertainty_samples=0,
                        changepoint_prior_scale = param['changepoint_prior_scale'],
#                              n_changepoints = param['n_changepoints'],
                         changepoint_range = param['changepoint_range'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)
    train_model.fit(train_df[:val_start])
    future_df = train_model.make_future_dataframe(periods=val_period, freq='H',include_history = False)
#     future_df['cap']=4000
#     future_df['floor']=1000
    train_forecast = train_model.predict(future_df)
    val=train_forecast[['ds','yhat']]
    Actual = train_df[val_start:val_start+val_period]
    RMSE = mean_squared_error(Actual['y'],abs(val['yhat']),squared=False)
    print('RMSE------------------------------------',RMSE)
    model_param = {'RMSE':RMSE}
    model_param.update(param)

    return model_param

Evaluation of Best Tuned Model with additional regressors

In [48]:
def add_regressors(data_prophet,data_orig,regressors):
    df_with_reg = pd.concat([data_prophet.reset_index(drop=True),
                             data_orig[regressors].head(len(data_prophet)).reset_index(drop=True)],axis=1)
    return df_with_reg

In [49]:
# cols = ['hour','dayofweek','weekofyear','quarter_hr','dayofweek_hr','month_hr']
cols = ['hour','dayofweek','quarter','quarter_hr','month_hr']
exog_train = pd.get_dummies(X_train.set_index('datetime')[cols],columns=cols,prefix=cols)
exog_test = pd.get_dummies(X_val.set_index('datetime')[cols],columns=cols,prefix=cols)

y_train = X_train.set_index('datetime')[targetcol].copy()
y_test = X_val.set_index('datetime')[targetcol].copy()

In [50]:
# regressors = list(exog_train.columns)
regressors = [col for col in exog_train.columns if col.startswith('hour_')]
X_train_df = pd.concat([exog_train,y_train],axis=1)
X_train_df = X_train_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_train_df.shape)
X_train_df.head()

(68688, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2008-03-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1259.985563
1,2008-03-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1095.5415
2,2008-03-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1056.2475
3,2008-03-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1034.742
4,2008-03-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1026.3345


In [51]:
X_val_df = pd.concat([exog_test,y_test],axis=1)
X_val_df = X_val_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_val_df.shape)
X_val_df.head()

(26304, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2016-01-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2002.247
1,2016-01-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1843.387
2,2016-01-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1735.1178
3,2016-01-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1666.197
4,2016-01-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1630.3924


In [52]:
import random
seed=100

In [95]:
%%time
val_start   = len(X_train_df)#10000
val_period  = len(X_val_df)#48
np.random.seed(seed)

#To have different subset of train and validation canuse below
# X_train_df[:val_start] and X_val_df[:val_period] with different values for val_start and val_period
# val_start   = 68688
# val_period  = 26304

train_model =Prophet(
                    uncertainty_samples=0,
                    changepoint_prior_scale =0.01,
                     changepoint_range = 0.0,
                     weekly_seasonality=True,
                     daily_seasonality = True,
                     yearly_seasonality =True,
                     interval_width=0.95)


# for col in regressors:
#     train_model.add_regressor(col,
#                               prior_scale=10,#100
#                               standardize=False
#                              )


train_model.fit(X_train_df)

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -366.669
CPU times: user 5.15 s, sys: 78.2 ms, total: 5.23 s
Wall time: 5.18 s


<fbprophet.forecaster.Prophet at 0x7fe4a6906160>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      30        161107   0.000189219        1359.2   3.416e-08       0.001       95  LS failed, Hessian reset 
      47        161109   5.98541e-05        443.65   1.029e-07       0.001      159  LS failed, Hessian reset 
      60        161109   5.39922e-05       125.347           1           1      180   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance


In [96]:
#Predictions

#Predict Validation set
val_forecast_orig = train_model.predict(X_val_df)
RMSE = mean_squared_error(X_val_df['y'],val_forecast_orig['yhat'],squared=False)
print('Validatioin RMSE: ',RMSE)

#Predict Train set
train_forecast_orig = train_model.predict(X_train_df)
RMSE = mean_squared_error(X_train_df['y'],train_forecast_orig['yhat'],squared=False)
print('Train RMSE: ',RMSE)

Validatioin RMSE:  224.36625209548467
Train RMSE:  178.93350408024284


In [97]:
rmses=[]
for i in range(24):
    mask = X_train_df[f'hour_{i}']==1
    RMSE = mean_squared_error(X_train_df[mask]['y'],train_forecast_orig[mask]['yhat'],squared=False)
#     print(f'hour:{i}  RMSE:{RMSE}')
    rmses.append(RMSE)

hour_rmses_train = pd.DataFrame()    
hour_rmses_train['hour'] = range(24)
hour_rmses_train['rmse'] = rmses
hour_rmses_train

Unnamed: 0,hour,rmse
0,0,149.837896
1,1,149.478337
2,2,153.141254
3,3,159.593474
4,4,164.934605
5,5,170.859367
6,6,179.520053
7,7,203.977777
8,8,209.812724
9,9,184.84388


In [98]:
rmses=[]
for i in range(24):
    mask = X_val_df[f'hour_{i}']==1
    RMSE = mean_squared_error(X_val_df[mask]['y'],val_forecast_orig[mask]['yhat'],squared=False)
#     print(f'hour:{i}  RMSE:{RMSE}')
    rmses.append(RMSE)

hour_rmses_val = pd.DataFrame()    
hour_rmses_val['hour'] = range(24)
hour_rmses_val['rmse'] = rmses
hour_rmses_val

Unnamed: 0,hour,rmse
0,0,199.293125
1,1,212.937914
2,2,222.226774
3,3,231.289022
4,4,242.293174
5,5,255.752924
6,6,263.742334
7,7,282.285694
8,8,286.326049
9,9,254.876139


In [None]:
# train_model.add_seasonality(name='daily', period=24, fourier_order=15, prior_scale=0.1)
# train_model.add_seasonality(name='weekly', period=168, fourier_order=3, prior_scale=0.1)
# train_model.add_seasonality(name='yearly', period=8760, fourier_order=3, prior_scale=0.01)

# RMSE: 683 SCALE: 50,15,2 ORDEr: 15,3,3
# RMSE: 659 SCALE: 15,10,2 ORDEr: 15,3,3
# RMSE: 445 SCALE: 1,1,0.1 ORDER: 15,3,3
# RMSE: 440 SCALE: 0.1,0.1,0.01 ORDER: 15,3,3

# RMSE: 250 mcmc_samples=50, adapt_delta: 0.85
# RMSE: 245 mcmc_samples=50, adapt_delta: 0.99
# RMSE: 227 mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 4 min)
# RMSE: 225 mcmc_samples=50, adapt_delta: 0.99 , chains=1 with hour regressors (time: 4 min)
# RMSE: 225 changepoint_prior_scale=0.1, mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 1 min)
# RMSE: 222 changepoint_prior_scale=0.5, mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 1 min)


# RMSE: 284 mcmc_samples=100, adapt_delta: 0.99 , chains=2 (time: 30 min)

# train_w_reg = add_regressors(train_df[:val_start],X_train,regressors)

In [100]:
%%time

# ************** IMPORTANT NOTE *************************
# changepoint_prior_scale should be set to 0.1, otherwise the code hangs for hour=8 

val_start   = len(X_train_df)#10000
val_period  = len(X_val_df)#48
np.random.seed(seed)

preds_val = np.zeros(len(X_val_df))
preds_train = np.zeros(len(X_train_df))

rmses_train=[]
rmses_val=[]
hour_models=[]

for i in range(24):
    
    print()
    print(f'*********** HOUR {i} *******************')


    train_model =Prophet(
                        uncertainty_samples=0,
                        changepoint_prior_scale =0.1,
                         changepoint_range = 0.0,
                         weekly_seasonality=True,
                         daily_seasonality = False,
                         yearly_seasonality =True,
                         interval_width=0.95)


    # for col in regressors:
    #     train_model.add_regressor(col,
    #                               prior_scale=10,#100
    #                               standardize=False
    #                              )

    train_mask = X_train_df[f'hour_{i}']==1
    val_mask = X_val_df[f'hour_{i}']==1
    train_model.fit(X_train_df[train_mask],
                   )
    print('Fit completed')
    hour_models.append(train_model)

    #Prediction of Validation  Set
    val_forecast = train_model.predict(X_val_df[val_mask])
    RMSE = mean_squared_error(X_val_df[val_mask]['y'],val_forecast['yhat'],squared=False)
    print(f'validation hour:{i}  RMSE:{RMSE}')
    
    preds_val[val_mask]=val_forecast['yhat']
    rmses_val.append(RMSE)
    
    #Prediction of Train  Set
    train_forecast = train_model.predict(X_train_df[train_mask])
    RMSE = mean_squared_error(X_train_df[train_mask]['y'],train_forecast['yhat'],squared=False)
    print(f'train hour:{i}  RMSE:{RMSE}')
    
    preds_train[train_mask]=train_forecast['yhat']
    rmses_train.append(RMSE)

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.



*********** HOUR 0 *******************




Initial log joint probability = -16.0677
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      22       6761.81   0.000230969       143.551   1.327e-06       0.001       60  LS failed, Hessian reset 
      40       6761.97   0.000292458        69.776    7.21e-06       0.001      121  LS failed, Hessian reset 
      55       6761.99   2.18367e-05       10.6126   1.902e-06       0.001      178  LS failed, Hessian reset 
      58       6761.99   3.57465e-06       8.90171   7.032e-08       0.001      237  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed

Initial log joint probability = -2733.5
Iteration  1. Log joint probability =    1360.75. Improved by 4094.26.
Iteration  2. Log joint probability =     5752.2. Improved by 4391.44.
Iteration  3. Log joint probability =     6262.3. Improved by 510.098.
Iteration  4. Log joint probability =    6275.19. Improved by 12.8909.
Iteration  5. Log joint probability =    6382.75. Improved by 107.563.
Iteration  6. Log joint probability =    6704.72. Improved by 321.971.
Iteration  7. Log joint probability =    6713.19. Improved by 8.46583.
Iteration  8. Log joint probability =    6725.26. Improved by 12.0718.
Iteration  9. Log joint probability =    6728.02. Improved by 2.75894.
Iteration 10. Log joint probability =    6728.92. Improved by 0.900863.
Iteration 11. Log joint probability =    6728.93. Improved by 0.0166224.
Iteration 12. Log joint probability =    6729.07. Improved by 0.140848.
Iteration 13. Log joint probability =    6761.77. Improved by 32.6947.
Iteration 14. Log 

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -13.7982
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      25       6695.64   0.000195763       111.343    1.09e-06       0.001       67  LS failed, Hessian reset 
      40       6695.86   7.78755e-05       39.5819   1.005e-06       0.001      127  LS failed, Hessian reset 
      53       6695.87   3.59886e-07       12.4758   3.374e-08       0.001      189  LS failed, Hessian reset 
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:1  RMSE:186.36408212419002
train hour:1  RMSE:140.76102518638493

*********** HOUR 2 *******************
Initial log joint probability = -13.4415
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      23       6706.68    0.00019579       107.414   1.103e-06       0.001       79  LS failed, Hessian reset 
      35       6706.85   2.85528e-0

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -13.4154


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      28       6718.92   0.000821109       98.8301   1.016e-05       0.001       84  LS failed, Hessian reset 
      43       6719.05   3.89791e-05       22.1057   9.797e-07       0.001      143  LS failed, Hessian reset 
      54       6719.05   4.68177e-06       9.32677   3.973e-08       0.001      216  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Initial log joint probability = -2730.85
Iteration  1. Log joint probability =    3681.85. Improved by 6412.7.
Iteration  2. Log joint probability =    4243.43. Improved by 561.584.
Iteration  3. Log joint probability =    4804.56. Improved by 561.129.
Iteration  4. Log joint probability =    6034.34. Improved by 1229.78.
Iteration  5. Log joint probability =    6521.49. Improved by 487.148.
Iteration  6. Log joint probability =    653

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      20       6724.78   0.000270721       133.275   1.244e-06       0.001       72  LS failed, Hessian reset 
      36       6724.86   5.16144e-05       21.7249   8.619e-07       0.001      140  LS failed, Hessian reset 
      41       6724.86   7.85333e-06       8.99892   7.707e-07       0.001      187  LS failed, Hessian reset 
      42       6724.86   3.23559e-06       13.7351   3.596e-07       0.001      225  LS failed, Hessian reset 
      43       6724.86   2.76173e-07       8.92988      0.2297           1      227   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
validation hour:4  RMSE:189.29724789649123
train hour:4  RMSE:137.54575155631846

*********** HOUR 5 *******************


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -14.0382
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       6731.77   0.000124465        57.324   9.189e-07       0.001       76  LS failed, Hessian reset 
      35       6731.84   5.36413e-05       19.5111   2.306e-06       0.001      132  LS failed, Hessian reset 
      53       6731.84    2.2442e-05       10.2681           1           1      159   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
validation hour:5  RMSE:191.8950150525971
train hour:5  RMSE:138.90884219196002

*********** HOUR 6 *******************
Initial log joint probability = -14.5821


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      27       6776.56   0.000127492       75.7523   1.263e-06       0.001       80  LS failed, Hessian reset 
      31       6776.57   1.62925e-05       7.75459   1.946e-06       0.001      124  LS failed, Hessian reset 
      34       6776.57   5.92149e-06       13.8231           1           1      132   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:6  RMSE:194.2760298404512
train hour:6  RMSE:142.8020186404706

*********** HOUR 7 *******************




Initial log joint probability = -15.9881
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      28       6814.35   0.000149289       70.9002   9.384e-07       0.001       88  LS failed, Hessian reset 
      49       6814.38    2.2459e-05       9.81734   3.514e-08       0.001      168  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Fit completed


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -2733.42
Iteration  1. Log joint probability =    2741.52. Improved by 5474.94.
Iteration  2. Log joint probability =    5716.39. Improved by 2974.87.
Iteration  3. Log joint probability =    5924.71. Improved by 208.317.
Iteration  4. Log joint probability =    6562.66. Improved by 637.954.
Iteration  5. Log joint probability =    6706.11. Improved by 143.446.
Iteration  6. Log joint probability =    6721.42. Improved by 15.3131.
Iteration  7. Log joint probability =    6722.17. Improved by 0.742983.
Iteration  8. Log joint probability =     6722.2. Improved by 0.034537.
Iteration  9. Log joint probability =    6722.92. Improved by 0.721996.
Iteration 10. Log joint probability =    6722.97. Improved by 0.0484642.
Iteration 11. Log joint probability =    6723.67. Improved by 0.701109.
Iteration 12. Log joint probability =    6723.73. Improved by 0.0623619.
Iteration 13. Log joint probability =    6724.41. Improved by 0.680323.
Iteration 14. Log joint pro

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -16.0436
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      22       6862.04   0.000115818       60.9982   8.508e-07       0.001       72  LS failed, Hessian reset 
      41        6862.1   6.94157e-05       31.8958   8.118e-07       0.001      133  LS failed, Hessian reset 
      59        6862.1   1.25583e-05       12.2785       0.834       0.834      156   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:8  RMSE:198.4458088942921
train hour:8  RMSE:152.82880155932128

*********** HOUR 9 *******************


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -13.7477
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      15       6878.69   7.81571e-05       44.2319   2.052e-06       0.001       58  LS failed, Hessian reset 
      25       6878.69   1.28129e-05       10.8786           1           1       73   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:9  RMSE:193.80073037420237
train hour:9  RMSE:151.27958200876972

*********** HOUR 10 *******************
Initial log joint probability = -13.0597


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      24       6852.35   0.000187041       95.4548   8.139e-07       0.001       77  LS failed, Hessian reset 
      40       6852.43   9.89024e-05       35.9593   2.451e-06       0.001      136  LS failed, Hessian reset 
      45       6852.43   5.87445e-05       19.6313   5.629e-06       0.001      180  LS failed, Hessian reset 
      51       6852.43   6.18167e-06       10.6646       1.907      0.5921      189   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:10  RMSE:186.9913783520317
train hour:10  RMSE:148.98116060649104

*********** HOUR 11 *******************




Initial log joint probability = -13.7705
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      24       6952.23   0.000677159       170.221   2.355e-06       0.001       68  LS failed, Hessian reset 
      44       6952.33   6.79155e-05       38.2254   7.718e-07       0.001      129  LS failed, Hessian reset 
      52       6952.35   9.27777e-05       10.8506   2.107e-06       0.001      177  LS failed, Hessian reset 
      56       6952.35   1.08194e-05       10.7482    9.35e-07       0.001      225  LS failed, Hessian reset 
      58       6952.35   1.70708e-06       9.61359   2.242e-09       0.001      286  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made
Fit completed



INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -2731.21
Iteration  1. Log joint probability =     3483.6. Improved by 6214.81.
Iteration  2. Log joint probability =    5554.23. Improved by 2070.63.
Iteration  3. Log joint probability =     6680.7. Improved by 1126.46.
Iteration  4. Log joint probability =    6703.15. Improved by 22.4559.
Iteration  5. Log joint probability =    6732.53. Improved by 29.3781.
Iteration  6. Log joint probability =    6746.94. Improved by 14.4086.
Iteration  7. Log joint probability =    6769.48. Improved by 22.5402.
Iteration  8. Log joint probability =    6778.48. Improved by 8.99327.
Iteration  9. Log joint probability =    6796.29. Improved by 17.8136.
Iteration 10. Log joint probability =    6801.53. Improved by 5.24512.
Iteration 11. Log joint probability =       6816. Improved by 14.4618.
Iteration 12. Log joint probability =    6818.59. Improved by 2.59742.
Iteration 13. Log joint probability =    6830.63. Improved by 12.0382.
Iteration 14. Log joint probability 



Initial log joint probability = -16.7708
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      27       7007.39   0.000492817        257.16   1.383e-06       0.001       74  LS failed, Hessian reset 
      41       7007.59   2.81563e-05       10.8493   2.274e-06       0.001      131  LS failed, Hessian reset 
      44       7007.59   4.20138e-06       8.74817   8.599e-09       0.001      203  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Initial log joint probability = -2734.21


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Iteration  1. Log joint probability =    2992.52. Improved by 5726.73.
Iteration  2. Log joint probability =    4810.77. Improved by 1818.25.
Iteration  3. Log joint probability =    5827.42. Improved by 1016.64.
Iteration  4. Log joint probability =    5949.32. Improved by 121.908.
Iteration  5. Log joint probability =    6165.46. Improved by 216.134.
Iteration  6. Log joint probability =    6963.64. Improved by 798.179.
Iteration  7. Log joint probability =    6969.66. Improved by 6.0271.
Iteration  8. Log joint probability =    7002.04. Improved by 32.377.
Iteration  9. Log joint probability =    7003.18. Improved by 1.13578.
Iteration 10. Log joint probability =    7004.38. Improved by 1.20488.
Iteration 11. Log joint probability =    7004.94. Improved by 0.553851.
Iteration 12. Log joint probability =    7007.54. Improved by 2.60562.
Iteration 13. Log joint probability =    7007.56. Improved by 0.0202112.
Iteration 14. Log joint probability =    7007.58. Improved by 

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -19.7605
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      43       6972.37   3.40565e-05       24.9831   1.233e-06       0.001      100  LS failed, Hessian reset 
      48       6972.37   4.94993e-05       20.6305   1.645e-06       0.001      147  LS failed, Hessian reset 
      51       6972.38   2.44666e-06       15.3519   2.937e-07       0.001      192  LS failed, Hessian reset 
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:13  RMSE:194.42886577812996
train hour:13  RMSE:158.49329134459495

*********** HOUR 14 *******************
Initial log joint probability = -24.3032


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      29       6927.82   0.000766775       423.214   2.099e-06       0.001       79  LS failed, Hessian reset 
      45       6928.12   5.50412e-05       33.9625   1.457e-06       0.001      140  LS failed, Hessian reset 
      58       6928.13   1.11748e-05       11.2937           1           1      167   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
validation hour:14  RMSE:200.60885429793035
train hour:14  RMSE:164.41749281611808

*********** HOUR 15 *******************
Initial log joint probability = -28.5135


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      26       6883.75   0.000370117       168.296    7.83e-07       0.001       82  LS failed, Hessian reset 
      44       6884.07   0.000152716       78.1518   1.195e-06       0.001      146  LS failed, Hessian reset 
      55       6884.11   9.68355e-06       17.3422   9.472e-07       0.001      203  LS failed, Hessian reset 
      64       6884.11   2.53473e-07       11.3755    0.006406           1      218   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
validation hour:15  RMSE:204.98862485688605
train hour:15  RMSE:167.25333854665482

*********** HOUR 16 *******************
Initial log joint probability = -31.5492




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      30       6843.11   0.000398667       217.525   8.574e-07       0.001       73  LS failed, Hessian reset 
      50       6843.39    0.00017768       116.777   1.597e-06       0.001      139  LS failed, Hessian reset 
      66       6843.44   2.39982e-06       9.55118   2.224e-07       0.001      201  LS failed, Hessian reset 
      67       6843.44   2.39982e-06       9.55118   4.484e-08       0.001      267  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Initial log joint probability = -2748.98
Iteration  1. Log joint probability =     2953.5. Improved by 5702.49.
Iteration  2. Log joint probability =    4107.26. Improved by 1153.75.
Iteration  3. Log joint probability =    4476.56. Improved by 369.301.
Iteration  4. Log joint probability =    4917.98. Improved by 441.417.
Iteration  5. Log

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


ion 15. Log joint probability =    6683.54. Improved by 7.81872.
Iteration 16. Log joint probability =     6693.5. Improved by 9.95587.
Iteration 17. Log joint probability =    6699.26. Improved by 5.76768.
Iteration 18. Log joint probability =    6707.97. Improved by 8.70262.
Iteration 19. Log joint probability =    6712.02. Improved by 4.05597.
Iteration 20. Log joint probability =    6719.71. Improved by 7.68905.
Iteration 21. Log joint probability =    6722.34. Improved by 2.6311.
Iteration 22. Log joint probability =    6729.22. Improved by 6.87483.
Iteration 23. Log joint probability =    6730.66. Improved by 1.44775.
Iteration 24. Log joint probability =    6736.89. Improved by 6.22537.
Iteration 25. Log joint probability =    6737.36. Improved by 0.466953.
Iteration 26. Log joint probability =    6743.07. Improved by 5.71127.
Iteration 27. Log joint probability =    6819.43. Improved by 76.3615.
Iteration 28. Log joint probability =     6820.1. Improved by 0.671175.
Iteration 2



    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      32       6826.36   0.000363875       174.953    8.08e-07       0.001       88  LS failed, Hessian reset 
      53       6826.71   0.000104791       51.5082    1.67e-06       0.001      159  LS failed, Hessian reset 
      67       6826.76    7.6843e-05       7.81927   2.666e-07       0.001      239  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed

Initial log joint probability = -2748.73
Iteration  1. Log joint probability =    2976.72. Improved by 5725.44.
Iteration  2. Log joint probability =    4095.76. Improved by 1119.04.
Iteration  3. Log joint probability =    6122.05. Improved by 2026.29.
Iteration  4. Log joint probability =    6204.85. Improved by 82.8011.
Iteration  5. Log joint probability =    6457.55. Improved by 252.697.
Iteration  6. Log joint probability =     6711.4. Improved by 253.852.
Iteration  7. Log joint probability =    6750.93. Improved by 39.5366.
Iteration  8. Log joint probability =     6756.4. Improved by 5.46874.
Iteration  9. Log joint probability =    6772.47. Improved by 16.0632.
Iteration 10. Log joint probability =    6773.91. Improved by 1.43999.
Iteration 11. Log joint probability =    6774.35. Improved by 0.440371.
Iteration 12. Log joint probability =    6774.38. Improved by 0.0312541.
Iteration 13. Log joint probability =    6774.43. Improved by 0.0514306.
Iteration 14. Lo



    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      23       6761.25   0.000681407        156.14   7.626e-07       0.001       76  LS failed, Hessian reset 
      40       6761.32   6.24441e-05       8.74005   5.847e-07       0.001      149  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Initial log joint probability = -2746.25
Iteration  1. Log joint probability =    2897.41. Improved by 5643.66.
Iteration  2. Log joint probability =    3518.26. Improved by 620.849.
Iteration  3. Log joint probability =    5271.41. Improved by 1753.15.
Iteration  4. Log joint probability =    6657.39. Improved by 1385.98.
Iteration  5. Log joint probability =    6666.36. Improved by 8.97399.
Iteration  6. Log joint probability =    6679.62. Improved by 13.2628.
Iteration  7. Log joint probability =    6681.78. Improved by 2.15576.
Iteration  8. Log joint pr

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


 =    6761.32. Improved by 1.58943e-08.
Iteration 29. Log joint probability =    6761.32. Improved by 7.62429e-09.
validation hour:18  RMSE:211.38271205212374
train hour:18  RMSE:173.34083369158017

*********** HOUR 19 *******************


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -23.757
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      22       6757.85   0.000335708       67.1638   7.649e-07       0.001       87  LS failed, Hessian reset 
      44       6758.29   0.000239883        150.07   1.252e-06       0.001      149  LS failed, Hessian reset 
      54       6758.37     0.0001011       45.3135   1.361e-06       0.001      202  LS failed, Hessian reset 
      59       6758.37   2.04569e-05       11.5819   1.137e-06       0.001      256  LS failed, Hessian reset 
      67       6758.37   6.99046e-07       9.14822   6.062e-08       0.001      310  LS failed, Hessian reset 
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
validation hour:19  RMSE:211.65161099312056
train hour:19  RMSE:172.58158224054804

*********** HOUR 20 *******************


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -19.8631
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      31       6784.08    0.00118261       314.263   5.817e-06       0.001       82  LS failed, Hessian reset 
      51       6784.26   0.000121419       66.1594   1.424e-06       0.001      146  LS failed, Hessian reset 
      72       6784.29   2.11803e-06       13.9346   2.318e-07       0.001      212  LS failed, Hessian reset 
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
validation hour:20  RMSE:205.64869209605158
train hour:20  RMSE:168.69880617308237

*********** HOUR 21 *******************
Initial log joint probability = -16.1695




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6810.45   0.000429154       167.501   8.193e-07       0.001       71  LS failed, Hessian reset 
      35       6810.64    0.00028744       69.9764   2.105e-06       0.001      131  LS failed, Hessian reset 
      52       6810.66   1.61581e-05       8.61281   6.105e-08       0.001      209  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed

Initial log joint probability = -2733.6
Iteration  1. Log joint probability =    3112.41. Improved by 5846.02.
Iteration  2. Log joint probability =    5593.71. Improved by 2481.3.
Iteration  3. Log joint probability =     6361.3. Improved by 767.59.
Iteration  4. Log joint probability =    6675.62. Improved by 314.319.
Iteration  5. Log joint probability =    6681.32. Improved by 5.69377.
Iteration  6. Log joint probability =    6688.61. Improved by 7.29134.
Iteration  7. Log joint probability =    6693.62. Improved by 5.01095.
Iteration  8. Log joint probability =    6699.67. Improved by 6.05168.
Iteration  9. Log joint probability =    6704.15. Improved by 4.47814.
Iteration 10. Log joint probability =    6709.13. Improved by 4.98085.
Iteration 11. Log joint probability =    6713.19. Improved by 4.06371.
Iteration 12. Log joint probability =    6717.24. Improved by 4.04939.
Iteration 13. Log joint probability =    6720.99. Improved by 3.74341.
Iteration 14. Log joint 



    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      28       6891.05   0.000426436       173.379   7.692e-07       0.001       82  LS failed, Hessian reset 
      46       6891.29    0.00021719       118.512   1.422e-06       0.001      142  LS failed, Hessian reset 
      63       6891.32   7.45439e-06        8.5124   1.138e-07       0.001      222  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made



INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -2732.8
Iteration  1. Log joint probability =    2224.46. Improved by 4957.26.
Iteration  2. Log joint probability =    5713.63. Improved by 3489.18.
Iteration  3. Log joint probability =     6470.5. Improved by 756.863.
Iteration  4. Log joint probability =    6775.67. Improved by 305.174.
Iteration  5. Log joint probability =    6781.73. Improved by 6.06201.
Iteration  6. Log joint probability =    6786.09. Improved by 4.35128.
Iteration  7. Log joint probability =    6791.53. Improved by 5.4499.
Iteration  8. Log joint probability =    6794.93. Improved by 3.39348.
Iteration  9. Log joint probability =     6799.9. Improved by 4.96933.
Iteration 10. Log joint probability =    6802.47. Improved by 2.57292.
Iteration 11. Log joint probability =    6807.06. Improved by 4.59176.
Iteration 12. Log joint probability =    6808.93. Improved by 1.86424.
Iteration 13. Log joint probability =    6813.22. Improved by 4.29562.
Iteration 14. Log joint 



    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      26       6857.59   0.000347668       213.718    9.85e-07       0.001       75  LS failed, Hessian reset 
      42       6857.87   4.50424e-05       24.4441   9.216e-07       0.001      135  LS failed, Hessian reset 
      57       6857.87   1.87783e-05       8.94946   5.988e-08       0.001      212  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made
Fit completed

Initial log joint probability = -2731.82
Iteration  1. Log joint probability =    3531.45. Improved by 6263.27.
Iteration  2. Log joint probability =    5240.65. Improved by 1709.21.
Iteration  3. Log joint probability =    5799.46. Improved by 558.805.
Iteration  4. Log joint probability =    6562.82. Improved by 763.36.
Iteration  5. Log joint probability =    6568.28. Improved by 5.45488.
Iteration  6. Log joint probability =    662

In [101]:
hour_rmses_pred_val = pd.DataFrame()    
hour_rmses_pred_val['hour'] = range(24)
hour_rmses_pred_val['rmse'] = rmses_val
hour_rmses_pred_val

Unnamed: 0,hour,rmse
0,0,187.288632
1,1,186.364082
2,2,187.472083
3,3,188.145401
4,4,189.297248
5,5,191.895015
6,6,194.27603
7,7,199.528927
8,8,198.445809
9,9,193.80073


In [102]:
hour_rmses_pred_train = pd.DataFrame()    
hour_rmses_pred_train['hour'] = range(24)
hour_rmses_pred_train['rmse'] = rmses_train
hour_rmses_pred_train

Unnamed: 0,hour,rmse
0,0,145.347784
1,1,140.761025
2,2,138.377627
3,3,137.738264
4,4,137.545752
5,5,138.908842
6,6,142.802019
7,7,150.483674
8,8,152.828802
9,9,151.279582


In [103]:
# val_mask_8 = X_val_df['hour_8']==1
# preds[val_mask_8] = train_forecast_orig[val_mask_8]['yhat']

In [104]:
pd.Series(preds_val).describe()

count    26304.000000
mean      2021.803862
std        256.990237
min       1370.213967
25%       1836.925619
50%       2024.884824
75%       2204.027094
max       2730.205304
dtype: float64

In [105]:
RMSE = mean_squared_error(X_val_df['y'],preds_val,squared=False)
print('Validation RMSE:',RMSE)

Validation RMSE: 196.3774674833253


In [106]:
RMSE = mean_squared_error(X_train_df['y'],preds_train,squared=False)
print('Train RMSE:',RMSE)

Train RMSE: 154.86419227601507


In [99]:
# train_model.params

In [107]:
from fbprophet.serialize import model_to_json, model_from_json

In [108]:
for i,hour_model in enumerate(hour_models):
    with open(f'output/fbprophet_hourwise_{i}.json', 'w') as fout:
        fout.write(model_to_json(hour_model))  # Save model

# with open('fbprophet_hourwise.json', 'r') as fin:
#     temp_model = model_from_json(fin.read())  # Load model

In [109]:
preds_df_val = pd.DataFrame(preds_val)
preds_df_val.columns = ['preds']
preds_df_val.to_csv('output/preds_prophet_hourwise_val.csv',index=False)

In [110]:
preds_df_train = pd.DataFrame(preds_train)
preds_df_train.columns = ['preds']
preds_df_train.to_csv('output/preds_prophet_hourwise_train.csv',index=False)