# Jobathon Nov 2022 Time series Model

## Table of Contents
### 1. [Read Train and Test ](#read)
### 2. [Feature Generation](#feature)
### 3. [Train and Validation Split](#split)
### 4. [Model Evaluation using Facebook Prophet](#model_eval_fbprophet)
### 4. [Model Evaluation using Thyme Boost](#model_eval_thyme)
### 4. [Model Evaluation using Unobserved Components](#model_eval)
### 5. [Model Finalization for Test Prediction](#model_final)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [2]:
pd.options.display.max_columns=500
pd.options.display.max_rows=500

In [3]:
from pandas.tseries.holiday import *

In [4]:
# !pip install pmdarima

In [5]:
# !pip install ThymeBoost

In [6]:
KAGGLE=False

In [7]:
if KAGGLE:
    path = '/kaggle/input/jobathon-nov-2022/'
else:
    path = 'input/'

<a id='read'></a>
## Read Train and Test Data

In [8]:
train=pd.read_csv(path+'train.csv')
print(train.shape)

(94992, 3)


In [9]:
test=pd.read_csv(path+'test.csv')
print(test.shape)

(26304, 2)


In [10]:
train.head()

Unnamed: 0,row_id,datetime,energy
0,1,2008-03-01 00:00:00,1259.985563
1,2,2008-03-01 01:00:00,1095.5415
2,3,2008-03-01 02:00:00,1056.2475
3,4,2008-03-01 03:00:00,1034.742
4,5,2008-03-01 04:00:00,1026.3345


In [11]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94992 entries, 0 to 94991
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   row_id    94992 non-null  int64  
 1   datetime  94992 non-null  object 
 2   energy    93092 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.2+ MB


In [12]:
train['energy'].describe()
targetcol='energy'

In [13]:
train['datetime']=pd.to_datetime(train['datetime'],infer_datetime_format=True)
test['datetime']=pd.to_datetime(test['datetime'],infer_datetime_format=True)

In [14]:
train['datetime'].min(),train['datetime'].max()

(Timestamp('2008-03-01 00:00:00'), Timestamp('2018-12-31 23:00:00'))

<a id='feature'></a>
## Feature Generation

In [15]:
#create hour map based on business, non - business , sleeping hours etc.
hour_map={0:0,1:0,2:0,3:0,4:0,5:0,
         6:1,7:1,8:1,
         9:2,10:2,11:2,
         12:3,13:3,14:3,15:3,
         16:4,17:4,
         18:5,19:5,20:5,
         21:6,22:6,23:6}
 
#Monday and Sunday as group 1, Saturday as group 2, Otherdays as group 3
dayofweek_map = {0:1,6:1,
                5:2,
                1:3,2:3,3:3,4:3}

create basic date related features

In [16]:
def gen_datefeats(data):
    data['year']=data['datetime'].dt.year
    data['month']=data['datetime'].dt.month
    data['day']=data['datetime'].dt.day
    data['hour']=data['datetime'].dt.hour
    data['weekofyear']=data['datetime'].dt.isocalendar().week
    data['dayofweek']=data['datetime'].dt.dayofweek
    data['dayofweek_grp']=data['dayofweek'].replace(dayofweek_map)
    data['quarter']=data['datetime'].dt.quarter
    data['is_weekend']=data['datetime'].dt.dayofweek > 4
    data['day_part']=data['hour'].replace(hour_map)

In [17]:
gen_datefeats(train)
gen_datefeats(test)

create holiday features with special holiday denoting christmas long holidays

In [18]:
#generate holidays feature
def gen_holiday_feat(data,start,end):
    cal = USFederalHolidayCalendar()
    holiday_dates = cal.holidays(start=start, end=end)
    data['is_holiday'] = False
    mask = data['datetime'].dt.date.astype('datetime64').isin(holiday_dates)
    data.loc[mask,'is_holiday']=True   
    
    data['special_holiday']=False
    mask= ((data['datetime'].dt.month==12) & (data['datetime'].dt.day>=24))  \
           | ((data['datetime'].dt.month==1) & (data['datetime'].dt.day<3)) 
    data.loc[mask,'special_holiday']=True   
        
    return holiday_dates

In [19]:
holidays= gen_holiday_feat(train,train['datetime'].dt.date.min(),train['datetime'].dt.date.max())
print(holidays)
print(train['special_holiday'].value_counts())
train['is_holiday'].value_counts()

DatetimeIndex(['2008-05-26', '2008-07-04', '2008-09-01', '2008-10-13',
               '2008-11-11', '2008-11-27', '2008-12-25', '2009-01-01',
               '2009-01-19', '2009-02-16',
               ...
               '2018-01-01', '2018-01-15', '2018-02-19', '2018-05-28',
               '2018-07-04', '2018-09-03', '2018-10-08', '2018-11-12',
               '2018-11-22', '2018-12-25'],
              dtype='datetime64[ns]', length=107, freq=None)
False    92400
True      2592
Name: special_holiday, dtype: int64


False    92424
True      2568
Name: is_holiday, dtype: int64

In [20]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False


generate hour aggregate features month-wise, quarter-wise, week of year-wise and week day group wise

In [21]:
def gen_datetime_comb_feats(data):
    data['dayofweek_hr']=data['dayofweek'].astype('str') + '_'+data['hour'].astype('str')
    data['weekofyear_hr']=data['weekofyear'].astype('str') + '_'+data['hour'].astype('str')
    data['month_hr']=data['month'].astype('str') + '_'+data['hour'].astype('str')
    data['quarter_hr']=data['quarter'].astype('str') + '_'+data['hour'].astype('str')


def gen_mean_feats(train,test,cols,newcolname):
    grouped=train.groupby(cols)[targetcol].mean().reset_index()
    grouped.columns=cols+[newcolname]
    train=train.merge(grouped,on=cols)    
    test=test.merge(grouped,on=cols)   
    return train,test
    
def gen_mean_feats_all(train,test):
    train,test=gen_mean_feats(train,test,['month','hour'],'month_hour_mean')
    train,test=gen_mean_feats(train,test,['quarter','hour'],'quarter_hour_mean')
    train,test=gen_mean_feats(train,test,['weekofyear','hour'],'weekofyear_mean')
    train,test=gen_mean_feats(train,test,['dayofweek_grp','hour'],'dayofweek_grp_mean')  
    
    train.sort_values('datetime',inplace=True)
    train.reset_index(drop=True,inplace=True)
    test.sort_values('datetime',inplace=True)
    test.reset_index(drop=True,inplace=True)
    return train,test

In [22]:
train,test=gen_mean_feats_all(train,test)

In [23]:
gen_datetime_comb_feats(train)
gen_datetime_comb_feats(test)

In [24]:
train.head(10)

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4
5,6,2008-03-01 05:00:00,1033.7685,2008,3,1,5,9,5,2,1,True,0,False,False,1275.482187,1305.534563,1230.498917,1493.890931,5_5,9_5,3_5,1_5
6,7,2008-03-01 06:00:00,1086.78,2008,3,1,6,9,5,2,1,True,1,False,False,1337.872697,1358.662339,1295.098037,1566.537865,5_6,9_6,3_6,1_6
7,8,2008-03-01 07:00:00,1211.742,2008,3,1,7,9,5,2,1,True,1,False,False,1464.527791,1470.300187,1412.6619,1697.378282,5_7,9_7,3_7,1_7
8,9,2008-03-01 08:00:00,1293.693,2008,3,1,8,9,5,2,1,True,1,False,False,1556.391201,1543.597804,1493.338281,1781.379368,5_8,9_8,3_8,1_8
9,10,2008-03-01 09:00:00,1318.9155,2008,3,1,9,9,5,2,1,True,2,False,False,1577.528044,1589.814926,1517.786396,1804.679326,5_9,9_9,3_9,1_9


In [25]:
test.head()

Unnamed: 0,row_id,datetime,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr
0,94993,2019-01-01 00:00:00,2019,1,1,0,1,1,3,1,False,0,1661.002682,1504.829986,1711.103516,1625.815769,1_0,1_0,1_0,1_0
1,94994,2019-01-01 01:00:00,2019,1,1,1,1,1,3,1,False,0,1543.596544,1412.376876,1592.733254,1543.548695,1_1,1_1,1_1,1_1
2,94995,2019-01-01 02:00:00,2019,1,1,2,1,1,3,1,False,0,1462.390875,1352.371501,1506.964956,1493.577402,1_2,1_2,1_2,1_2
3,94996,2019-01-01 03:00:00,2019,1,1,3,1,1,3,1,False,0,1410.076914,1315.962924,1439.720612,1465.82546,1_3,1_3,1_3,1_3
4,94997,2019-01-01 04:00:00,2019,1,1,4,1,1,3,1,False,0,1378.289974,1298.825422,1415.453093,1459.997669,1_4,1_4,1_4,1_4


In [26]:
train['year'].unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018])

In [27]:
test['datetime'].min(),test['datetime'].max()

(Timestamp('2019-01-01 00:00:00'), Timestamp('2021-12-31 23:00:00'))

In [28]:
holidays= gen_holiday_feat(test,test['datetime'].dt.date.min(),test['datetime'].dt.date.max())
print(len(holidays))
print(holidays)
print(test['special_holiday'].value_counts())
test['is_holiday'].value_counts()

31
DatetimeIndex(['2019-01-01', '2019-01-21', '2019-02-18', '2019-05-27',
               '2019-07-04', '2019-09-02', '2019-10-14', '2019-11-11',
               '2019-11-28', '2019-12-25', '2020-01-01', '2020-01-20',
               '2020-02-17', '2020-05-25', '2020-07-03', '2020-09-07',
               '2020-10-12', '2020-11-11', '2020-11-26', '2020-12-25',
               '2021-01-01', '2021-01-18', '2021-02-15', '2021-05-31',
               '2021-07-05', '2021-09-06', '2021-10-11', '2021-11-11',
               '2021-11-25', '2021-12-24', '2021-12-31'],
              dtype='datetime64[ns]', freq=None)
False    25584
True       720
Name: special_holiday, dtype: int64


False    25560
True       744
Name: is_holiday, dtype: int64

In [29]:
targetcol = 'energy'

In [30]:
# train[targetcol].fillna(train[targetcol].mean(),inplace=True)
train[targetcol].fillna(method='ffill',inplace=True)

Create Lag Features

In [31]:
def create_lag(data,lagno_list):
    res = pd.DataFrame()
    for i in lagno_list:
        shifted = data.shift(i)
        res=pd.concat([res,shifted],axis=1)

#     res=pd.concat([data.shift(i) for i in lagno_list],axis=1)
    res.columns=[f'lag_{i}' for i in lagno_list]
    return res

In [32]:
test.shape

(26304, 22)

In [33]:
train['istrain']=1
test['istrain']=0
combined = pd.concat([train,test],axis=0) 
#lag 1 year, 3 year, quarter, month,week
lag_df = create_lag(combined[targetcol],[24,168,720,2160,8760,26304])
combined=pd.concat([combined,lag_df],axis=1)
train=combined[combined['istrain']==1]
test=combined[combined['istrain']==0]

del combined,train['istrain'],test['istrain'],test[targetcol]
print(train.shape,test.shape)

(94992, 29) (26304, 28)


In [34]:
train.head()

Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
0,1,2008-03-01 00:00:00,1259.985563,2008,3,1,0,9,5,2,1,True,0,False,False,1397.227508,1504.829986,1392.384333,1654.027936,5_0,9_0,3_0,1_0,,,,,,
1,2,2008-03-01 01:00:00,1095.5415,2008,3,1,1,9,5,2,1,True,0,False,False,1326.027668,1412.376876,1306.621744,1566.045064,5_1,9_1,3_1,1_1,,,,,,
2,3,2008-03-01 02:00:00,1056.2475,2008,3,1,2,9,5,2,1,True,0,False,False,1287.670058,1352.371501,1259.495448,1511.553768,5_2,9_2,3_2,1_2,,,,,,
3,4,2008-03-01 03:00:00,1034.742,2008,3,1,3,9,5,2,1,True,0,False,False,1262.746667,1315.962924,1230.416947,1478.16859,5_3,9_3,3_3,1_3,,,,,,
4,5,2008-03-01 04:00:00,1026.3345,2008,3,1,4,9,5,2,1,True,0,False,False,1260.754616,1298.825422,1221.394201,1473.652068,5_4,9_4,3_4,1_4,,,,,,


In [35]:
train.columns

Index(['row_id', 'datetime', 'energy', 'year', 'month', 'day', 'hour',
       'weekofyear', 'dayofweek', 'dayofweek_grp', 'quarter', 'is_weekend',
       'day_part', 'is_holiday', 'special_holiday', 'month_hour_mean',
       'quarter_hour_mean', 'weekofyear_mean', 'dayofweek_grp_mean',
       'dayofweek_hr', 'weekofyear_hr', 'month_hr', 'quarter_hr', 'lag_24',
       'lag_168', 'lag_720', 'lag_2160', 'lag_8760', 'lag_26304'],
      dtype='object')

Fill null values in train data using previous hour values

In [36]:
cols = [col for col in train.columns if col.startswith('lag_')]
target_mean = train[targetcol].mean()
for col in cols:
    train[col].fillna(0,inplace=True)

<a id='split'></a>
## Train and Validation Split

Validation Set from 2016 to 2018 <br>
Train Set from 2008 to 2015

In [37]:
import datetime 
train_start = datetime.datetime(year=2008,month=1,day=1,hour=0)
val_start = datetime.datetime(year=2016,month=1,day=1,hour=0)
val_end = datetime.datetime(year=2018,month=12,day=31,hour=23)

X_val= train[(train['datetime']>=val_start) & (train['datetime']<=val_end)].copy()
X_train= train[(train['datetime']>=train_start) & (train['datetime']<val_start)].copy()
print(X_train.shape)
print(X_val.shape)
X_val.head()           

(68688, 29)
(26304, 29)


Unnamed: 0,row_id,datetime,energy,year,month,day,hour,weekofyear,dayofweek,dayofweek_grp,quarter,is_weekend,day_part,is_holiday,special_holiday,month_hour_mean,quarter_hour_mean,weekofyear_mean,dayofweek_grp_mean,dayofweek_hr,weekofyear_hr,month_hr,quarter_hr,lag_24,lag_168,lag_720,lag_2160,lag_8760,lag_26304
68688,68689,2016-01-01 00:00:00,2002.247,2016,1,1,0,53,4,3,1,False,0,True,True,1661.002682,1504.829986,1874.91005,1625.815769,4_0,53_0,1_0,1_0,2045.017,2274.753,1983.0616,1878.0918,2287.2788,1479.1658
68689,68690,2016-01-01 01:00:00,1843.387,2016,1,1,1,53,4,3,1,False,0,True,True,1543.596544,1412.376876,1734.62185,1543.548695,4_1,53_1,1_1,1_1,1881.1468,2118.4592,1827.3788,1878.0918,2120.5124,1375.2627
68690,68691,2016-01-01 02:00:00,1735.1178,2016,1,1,2,53,4,3,1,False,0,True,True,1462.390875,1352.371501,1633.81205,1493.577402,4_2,53_2,1_2,1_2,1781.7982,2020.4548,1719.965,1627.704,2015.9704,1307.8261
68691,68692,2016-01-01 03:00:00,1666.197,2016,1,1,3,53,4,3,1,False,0,True,True,1410.076914,1315.962924,1578.1656,1465.82546,4_3,53_3,1_3,1_3,1719.5984,1936.5034,1648.2336,1564.5266,1939.974,1261.5295
68692,68693,2016-01-01 04:00:00,1630.3924,2016,1,1,4,53,4,3,1,False,0,True,True,1378.289974,1298.825422,1542.4391,1459.997669,4_4,53_4,1_4,1_4,1657.1542,1895.8108,1603.264,1541.553,1885.3868,1228.8682


In [38]:
val_target = X_val[targetcol]
print(X_val[targetcol].isnull().sum())

0


Create simple validation prediction baseline using train mean value 

In [39]:
from sklearn.metrics import mean_squared_error

In [40]:
#compute baseline error by predicting train energy mean as the energy for all time
val_preds_baseline = np.full(len(X_val),train[targetcol].mean())

In [41]:
#compute error score on baseline predictions
val_score = mean_squared_error(val_target,val_preds_baseline,squared=False)
print('valid score:',val_score)

valid score: 431.7300102305795


<a id='model_eval_fbprophet'></a>
## Model Evaluation using Facebook Prophet

In [42]:
# !pip install pystan==2.19.1.1

In [43]:
# !pip install fbprophet

In [44]:
from fbprophet import Prophet
from fbprophet.plot import plot_plotly
from fbprophet.plot import plot_plotly
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [45]:
from multiprocessing import cpu_count
from joblib import Parallel, delayed
from multiprocessing import cpu_count

In [46]:
from sklearn.model_selection import ParameterGrid

In [47]:
def tune_prophet_params(param):
    val_start   = 68688
    val_period  = 26304
    print(param)
    np.random.seed(0)
    train_model =Prophet(uncertainty_samples=0,
                        changepoint_prior_scale = param['changepoint_prior_scale'],
#                              n_changepoints = param['n_changepoints'],
                         changepoint_range = param['changepoint_range'],
                         weekly_seasonality=True,
                         daily_seasonality = True,
                         yearly_seasonality = True,
                         interval_width=0.95)
    train_model.fit(train_df[:val_start])
    future_df = train_model.make_future_dataframe(periods=val_period, freq='H',include_history = False)
#     future_df['cap']=4000
#     future_df['floor']=1000
    train_forecast = train_model.predict(future_df)
    val=train_forecast[['ds','yhat']]
    Actual = train_df[val_start:val_start+val_period]
    RMSE = mean_squared_error(Actual['y'],abs(val['yhat']),squared=False)
    print('RMSE------------------------------------',RMSE)
    model_param = {'RMSE':RMSE}
    model_param.update(param)

    return model_param

Evaluation of Best Tuned Model with additional regressors

In [48]:
def add_regressors(data_prophet,data_orig,regressors):
    df_with_reg = pd.concat([data_prophet.reset_index(drop=True),
                             data_orig[regressors].head(len(data_prophet)).reset_index(drop=True)],axis=1)
    return df_with_reg

In [49]:
# cols = ['hour','dayofweek','weekofyear','quarter_hr','dayofweek_hr','month_hr']
cols = ['hour','dayofweek','quarter','quarter_hr','month_hr']
exog_train = pd.get_dummies(X_train.set_index('datetime')[cols],columns=cols,prefix=cols)
exog_test = pd.get_dummies(X_val.set_index('datetime')[cols],columns=cols,prefix=cols)

y_train = X_train.set_index('datetime')[targetcol].copy()
y_test = X_val.set_index('datetime')[targetcol].copy()

In [50]:
# regressors = list(exog_train.columns)
regressors = [col for col in exog_train.columns if col.startswith('hour_')]
X_train_df = pd.concat([exog_train,y_train],axis=1)
X_train_df = X_train_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_train_df.shape)
X_train_df.head()

(68688, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2008-03-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1259.985563
1,2008-03-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1095.5415
2,2008-03-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1056.2475
3,2008-03-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1034.742
4,2008-03-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1026.3345


In [51]:
X_val_df = pd.concat([exog_test,y_test],axis=1)
X_val_df = X_val_df.reset_index().rename(columns={'datetime': 'ds', 
                        targetcol: 'y'})
print(X_val_df.shape)
X_val_df.head()

(26304, 421)


Unnamed: 0,ds,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,hour_10,hour_11,hour_12,hour_13,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23,dayofweek_0,dayofweek_1,dayofweek_2,dayofweek_3,dayofweek_4,dayofweek_5,dayofweek_6,quarter_1,quarter_2,quarter_3,quarter_4,quarter_hr_1_0,quarter_hr_1_1,quarter_hr_1_10,quarter_hr_1_11,quarter_hr_1_12,quarter_hr_1_13,quarter_hr_1_14,quarter_hr_1_15,quarter_hr_1_16,quarter_hr_1_17,quarter_hr_1_18,quarter_hr_1_19,quarter_hr_1_2,quarter_hr_1_20,quarter_hr_1_21,quarter_hr_1_22,quarter_hr_1_23,quarter_hr_1_3,quarter_hr_1_4,quarter_hr_1_5,quarter_hr_1_6,quarter_hr_1_7,quarter_hr_1_8,quarter_hr_1_9,quarter_hr_2_0,quarter_hr_2_1,quarter_hr_2_10,quarter_hr_2_11,quarter_hr_2_12,quarter_hr_2_13,quarter_hr_2_14,quarter_hr_2_15,quarter_hr_2_16,quarter_hr_2_17,quarter_hr_2_18,quarter_hr_2_19,quarter_hr_2_2,quarter_hr_2_20,quarter_hr_2_21,quarter_hr_2_22,quarter_hr_2_23,quarter_hr_2_3,quarter_hr_2_4,quarter_hr_2_5,quarter_hr_2_6,quarter_hr_2_7,quarter_hr_2_8,quarter_hr_2_9,quarter_hr_3_0,quarter_hr_3_1,quarter_hr_3_10,quarter_hr_3_11,quarter_hr_3_12,quarter_hr_3_13,quarter_hr_3_14,quarter_hr_3_15,quarter_hr_3_16,quarter_hr_3_17,quarter_hr_3_18,quarter_hr_3_19,quarter_hr_3_2,quarter_hr_3_20,quarter_hr_3_21,quarter_hr_3_22,quarter_hr_3_23,quarter_hr_3_3,quarter_hr_3_4,quarter_hr_3_5,quarter_hr_3_6,quarter_hr_3_7,quarter_hr_3_8,quarter_hr_3_9,quarter_hr_4_0,quarter_hr_4_1,quarter_hr_4_10,quarter_hr_4_11,quarter_hr_4_12,quarter_hr_4_13,quarter_hr_4_14,quarter_hr_4_15,quarter_hr_4_16,quarter_hr_4_17,quarter_hr_4_18,quarter_hr_4_19,quarter_hr_4_2,quarter_hr_4_20,quarter_hr_4_21,quarter_hr_4_22,quarter_hr_4_23,quarter_hr_4_3,quarter_hr_4_4,quarter_hr_4_5,quarter_hr_4_6,quarter_hr_4_7,quarter_hr_4_8,quarter_hr_4_9,month_hr_10_0,month_hr_10_1,month_hr_10_10,month_hr_10_11,month_hr_10_12,month_hr_10_13,month_hr_10_14,month_hr_10_15,month_hr_10_16,month_hr_10_17,month_hr_10_18,month_hr_10_19,month_hr_10_2,month_hr_10_20,month_hr_10_21,month_hr_10_22,month_hr_10_23,month_hr_10_3,month_hr_10_4,month_hr_10_5,month_hr_10_6,month_hr_10_7,month_hr_10_8,month_hr_10_9,month_hr_11_0,month_hr_11_1,month_hr_11_10,month_hr_11_11,month_hr_11_12,month_hr_11_13,month_hr_11_14,month_hr_11_15,month_hr_11_16,month_hr_11_17,month_hr_11_18,month_hr_11_19,month_hr_11_2,month_hr_11_20,month_hr_11_21,month_hr_11_22,month_hr_11_23,month_hr_11_3,month_hr_11_4,month_hr_11_5,month_hr_11_6,month_hr_11_7,month_hr_11_8,month_hr_11_9,month_hr_12_0,month_hr_12_1,month_hr_12_10,month_hr_12_11,month_hr_12_12,month_hr_12_13,month_hr_12_14,month_hr_12_15,month_hr_12_16,month_hr_12_17,month_hr_12_18,month_hr_12_19,month_hr_12_2,month_hr_12_20,month_hr_12_21,month_hr_12_22,month_hr_12_23,month_hr_12_3,month_hr_12_4,month_hr_12_5,month_hr_12_6,month_hr_12_7,month_hr_12_8,month_hr_12_9,month_hr_1_0,month_hr_1_1,month_hr_1_10,month_hr_1_11,month_hr_1_12,month_hr_1_13,month_hr_1_14,month_hr_1_15,month_hr_1_16,month_hr_1_17,month_hr_1_18,month_hr_1_19,month_hr_1_2,month_hr_1_20,month_hr_1_21,month_hr_1_22,month_hr_1_23,month_hr_1_3,month_hr_1_4,month_hr_1_5,month_hr_1_6,month_hr_1_7,month_hr_1_8,month_hr_1_9,month_hr_2_0,month_hr_2_1,month_hr_2_10,month_hr_2_11,month_hr_2_12,month_hr_2_13,month_hr_2_14,month_hr_2_15,month_hr_2_16,month_hr_2_17,month_hr_2_18,month_hr_2_19,month_hr_2_2,month_hr_2_20,month_hr_2_21,month_hr_2_22,month_hr_2_23,month_hr_2_3,month_hr_2_4,month_hr_2_5,month_hr_2_6,month_hr_2_7,month_hr_2_8,month_hr_2_9,month_hr_3_0,month_hr_3_1,month_hr_3_10,month_hr_3_11,month_hr_3_12,month_hr_3_13,month_hr_3_14,month_hr_3_15,month_hr_3_16,month_hr_3_17,month_hr_3_18,month_hr_3_19,month_hr_3_2,month_hr_3_20,month_hr_3_21,month_hr_3_22,month_hr_3_23,month_hr_3_3,month_hr_3_4,month_hr_3_5,month_hr_3_6,month_hr_3_7,month_hr_3_8,month_hr_3_9,month_hr_4_0,month_hr_4_1,month_hr_4_10,month_hr_4_11,month_hr_4_12,month_hr_4_13,month_hr_4_14,month_hr_4_15,month_hr_4_16,month_hr_4_17,month_hr_4_18,month_hr_4_19,month_hr_4_2,month_hr_4_20,month_hr_4_21,month_hr_4_22,month_hr_4_23,month_hr_4_3,month_hr_4_4,month_hr_4_5,month_hr_4_6,month_hr_4_7,month_hr_4_8,month_hr_4_9,month_hr_5_0,month_hr_5_1,month_hr_5_10,month_hr_5_11,month_hr_5_12,month_hr_5_13,month_hr_5_14,month_hr_5_15,month_hr_5_16,month_hr_5_17,month_hr_5_18,month_hr_5_19,month_hr_5_2,month_hr_5_20,month_hr_5_21,month_hr_5_22,month_hr_5_23,month_hr_5_3,month_hr_5_4,month_hr_5_5,month_hr_5_6,month_hr_5_7,month_hr_5_8,month_hr_5_9,month_hr_6_0,month_hr_6_1,month_hr_6_10,month_hr_6_11,month_hr_6_12,month_hr_6_13,month_hr_6_14,month_hr_6_15,month_hr_6_16,month_hr_6_17,month_hr_6_18,month_hr_6_19,month_hr_6_2,month_hr_6_20,month_hr_6_21,month_hr_6_22,month_hr_6_23,month_hr_6_3,month_hr_6_4,month_hr_6_5,month_hr_6_6,month_hr_6_7,month_hr_6_8,month_hr_6_9,month_hr_7_0,month_hr_7_1,month_hr_7_10,month_hr_7_11,month_hr_7_12,month_hr_7_13,month_hr_7_14,month_hr_7_15,month_hr_7_16,month_hr_7_17,month_hr_7_18,month_hr_7_19,month_hr_7_2,month_hr_7_20,month_hr_7_21,month_hr_7_22,month_hr_7_23,month_hr_7_3,month_hr_7_4,month_hr_7_5,month_hr_7_6,month_hr_7_7,month_hr_7_8,month_hr_7_9,month_hr_8_0,month_hr_8_1,month_hr_8_10,month_hr_8_11,month_hr_8_12,month_hr_8_13,month_hr_8_14,month_hr_8_15,month_hr_8_16,month_hr_8_17,month_hr_8_18,month_hr_8_19,month_hr_8_2,month_hr_8_20,month_hr_8_21,month_hr_8_22,month_hr_8_23,month_hr_8_3,month_hr_8_4,month_hr_8_5,month_hr_8_6,month_hr_8_7,month_hr_8_8,month_hr_8_9,month_hr_9_0,month_hr_9_1,month_hr_9_10,month_hr_9_11,month_hr_9_12,month_hr_9_13,month_hr_9_14,month_hr_9_15,month_hr_9_16,month_hr_9_17,month_hr_9_18,month_hr_9_19,month_hr_9_2,month_hr_9_20,month_hr_9_21,month_hr_9_22,month_hr_9_23,month_hr_9_3,month_hr_9_4,month_hr_9_5,month_hr_9_6,month_hr_9_7,month_hr_9_8,month_hr_9_9,y
0,2016-01-01 00:00:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2002.247
1,2016-01-01 01:00:00,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1843.387
2,2016-01-01 02:00:00,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1735.1178
3,2016-01-01 03:00:00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1666.197
4,2016-01-01 04:00:00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1630.3924


In [52]:
import random
seed=100

In [53]:
# X_train_df['cap']=4000
# X_train_df['floor']=1000
# X_val_df['cap']=4000
# X_val_df['floor']=1000

In [63]:
%%time
val_start   = len(X_train_df)#10000
val_period  = len(X_val_df)#48
# val_start   = 68688
# val_period  = 26304
np.random.seed(seed)

train_model =Prophet(
                    uncertainty_samples=0,
#                      mcmc_samples = 100,
                    changepoint_prior_scale =0.01,
#                     n_changepoints = 50,
                     changepoint_range = 0.0,
                     weekly_seasonality=True,
                     daily_seasonality = True,
                     yearly_seasonality =True,
                     interval_width=0.95)

# train_model.add_regressor('hour_1',
#                               prior_scale=10,#100
#                               standardize=False
#                              )

# for col in regressors:
#     train_model.add_regressor(col,
#                               prior_scale=10,#100
#                               standardize=False
#                              )


# train_mask = X_train_df[f'hour_{i}']==1
# val_mask = X_val_df[f'hour_{i}']==1
train_model.fit(X_train_df[:val_start],
#                 seed=seed
               )
print('Fit completed')

train_forecast_orig = train_model.predict(X_val_df[:val_period])
# RMSE = mean_squared_error(X_val_df[:val_period]['y'],abs(train_forecast['yhat']),squared=False)
RMSE = mean_squared_error(X_val_df[:val_period]['y'],train_forecast_orig['yhat'],squared=False)
# RMSE = mean_squared_error(X_val_df['y'],abs(train_forecast['yhat']),squared=False)
print('RMSE------------------------------------',RMSE)

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -366.669
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      30        161107   0.000189219        1359.2   3.416e-08       0.001       95  LS failed, Hessian reset 
      47        161109   5.98541e-05        443.65   1.029e-07       0.001      159  LS failed, Hessian reset 
      60        161109   5.39922e-05       125.347           1           1      180   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
RMSE------------------------------------ 224.36625209548467
CPU times: user 5.32 s, sys: 131 ms, total: 5.45 s
Wall time: 5.37 s


In [None]:
# train_model.fit(X_train_df[train_mask][:val_start],
# #                 seed=seed
#                )
# print('Fit completed')

# train_forecast = train_model.predict(X_val_df[val_mask][:val_period])
# # RMSE = mean_squared_error(X_val_df[:val_period]['y'],abs(train_forecast['yhat']),squared=False)
# RMSE = mean_squared_error(X_val_df[val_mask][:val_period]['y'],train_forecast['yhat'],squared=False)
# # RMSE = mean_squared_error(X_val_df['y'],abs(train_forecast['yhat']),squared=False)
# print('RMSE------------------------------------',RMSE)

In [111]:
#default   hour8    non hour8
# 224.36   286.32   221.27

# 208.65   224.99
# 212.93   224.84

# hour =1  186.346 (orig: 208.6)


In [62]:
rmses=[]
for i in range(24):
    mask = X_val_df[f'hour_{i}']==1
    RMSE = mean_squared_error(X_val_df[mask][:val_period]['y'],train_forecast[mask]['yhat'],squared=False)
#     print(f'hour:{i}  RMSE:{RMSE}')
    rmses.append(RMSE)

hour_rmses = pd.DataFrame()    
hour_rmses['hour'] = range(24)
hour_rmses['rmse'] = rmses
hour_rmses

Unnamed: 0,hour,rmse
0,0,199.293125
1,1,212.937914
2,2,222.226774
3,3,231.289022
4,4,242.293174
5,5,255.752924
6,6,263.742334
7,7,282.285694
8,8,286.326049
9,9,254.876139


In [115]:
mask = X_val_df['hour_1']==1
RMSE = mean_squared_error(X_val_df[mask][:val_period]['y'],train_forecast[mask]['yhat'],squared=False)
print('RMSE------------------------------------',RMSE)

RMSE------------------------------------ 208.65959940591793


In [116]:
mask = X_val_df['hour_1']==0
RMSE = mean_squared_error(X_val_df[mask][:val_period]['y'],train_forecast[mask]['yhat'],squared=False)
print('RMSE------------------------------------',RMSE)

RMSE------------------------------------ 224.99774235969718


In [89]:
y_mean = (train_forecast['yhat_upper'] + train_forecast['yhat_lower'] ) / 2
ydiff =  y_mean - train_forecast['yhat']
print(ydiff.describe())
RMSE = mean_squared_error(X_val_df[:val_period]['y'],y_mean,squared=False)
print('RMSE------------------------------------',RMSE)

count    26304.000000
mean        16.460702
std         23.956343
min       -140.593648
25%          1.289172
50%         14.975380
75%         31.454408
max        158.742064
dtype: float64
RMSE------------------------------------ 247.37817175441538


In [None]:
# 2579.996822662037

In [110]:
# train_model.add_seasonality(name='daily', period=24, fourier_order=15, prior_scale=0.1)
# train_model.add_seasonality(name='weekly', period=168, fourier_order=3, prior_scale=0.1)
# train_model.add_seasonality(name='yearly', period=8760, fourier_order=3, prior_scale=0.01)

# RMSE: 683 SCALE: 50,15,2 ORDEr: 15,3,3
# RMSE: 659 SCALE: 15,10,2 ORDEr: 15,3,3
# RMSE: 445 SCALE: 1,1,0.1 ORDER: 15,3,3
# RMSE: 440 SCALE: 0.1,0.1,0.01 ORDER: 15,3,3

# RMSE: 250 mcmc_samples=50, adapt_delta: 0.85
# RMSE: 245 mcmc_samples=50, adapt_delta: 0.99
# RMSE: 227 mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 4 min)
# RMSE: 225 mcmc_samples=50, adapt_delta: 0.99 , chains=1 with hour regressors (time: 4 min)
# RMSE: 225 changepoint_prior_scale=0.1, mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 1 min)
# RMSE: 222 changepoint_prior_scale=0.5, mcmc_samples=50, adapt_delta: 0.99 , chains=1 (time: 1 min)


# RMSE: 284 mcmc_samples=100, adapt_delta: 0.99 , chains=2 (time: 30 min)

# train_w_reg = add_regressors(train_df[:val_start],X_train,regressors)

In [59]:
%%time
val_start   = len(X_train_df)#10000
val_period  = len(X_val_df)#48
np.random.seed(seed)

preds = np.zeros(len(X_val_df))

rmses=[]

for i in range(24):
    
    print()
    print(f'*********** HOUR {i} *******************')
    
    if i==8:
        rmses.append(0)
        continue

    train_model =Prophet(
                        uncertainty_samples=0,
                        changepoint_prior_scale =0.01,
                         changepoint_range = 0.0,
                         weekly_seasonality=True,
                         daily_seasonality = False,
                         yearly_seasonality =True,
                         interval_width=0.95)


    # for col in regressors:
    #     train_model.add_regressor(col,
    #                               prior_scale=10,#100
    #                               standardize=False
    #                              )

    train_mask = X_train_df[f'hour_{i}']==1
    val_mask = X_val_df[f'hour_{i}']==1
    train_model.fit(X_train_df[train_mask][:val_start],
    #                 seed=seed
                   )
    print('Fit completed')

    train_forecast = train_model.predict(X_val_df[val_mask][:val_period])
    RMSE = mean_squared_error(X_val_df[val_mask][:val_period]['y'],train_forecast['yhat'],squared=False)
    print(f'hour:{i}  RMSE:{RMSE}')
    # print('RMSE------------------------------------',RMSE)
    
    preds[val_mask]=train_forecast['yhat']

    rmses.append(RMSE)

hour_rmses_pred = pd.DataFrame()    
hour_rmses_pred['hour'] = range(24)
hour_rmses_pred['rmse'] = rmses
hour_rmses_pred

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.



*********** HOUR 0 *******************
Initial log joint probability = -16.0677
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      17       6760.97    0.00127688       596.946   1.344e-06       0.001       67  LS failed, Hessian reset 
      37       6761.99   7.08623e-06       95.6094   6.587e-08       0.001      127  LS failed, Hessian reset 
      38       6761.99   7.08623e-06       95.6094   4.632e-10       0.001      181  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.



Fit completed
Initial log joint probability = -2731.2
Iteration  1. Log joint probability =    1361.57. Improved by 4092.77.
Iteration  2. Log joint probability =    5695.79. Improved by 4334.22.
Iteration  3. Log joint probability =    5696.69. Improved by 0.89864.
Iteration  4. Log joint probability =    5696.83. Improved by 0.137849.
Iteration  5. Log joint probability =    5697.21. Improved by 0.381983.
Iteration  6. Log joint probability =    6699.91. Improved by 1002.7.
Iteration  7. Log joint probability =     6758.9. Improved by 58.9899.
Iteration  8. Log joint probability =    6759.92. Improved by 1.01264.
Iteration  9. Log joint probability =    6761.77. Improved by 1.85041.
Iteration 10. Log joint probability =    6761.81. Improved by 0.0477557.
Iteration 11. Log joint probability =    6761.83. Improved by 0.0150669.
Iteration 12. Log joint probability =    6761.97. Improved by 0.137913.
Iteration 13. Log joint probability =    6761.97. Improved by 0.00271146.
Iteration 14.

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      19       6693.87   0.000631549       354.723   1.284e-06       0.001       63  LS failed, Hessian reset 
      42       6695.87   9.28078e-05       101.483           1           1       98   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance

hour:1  RMSE:186.34676505206286

*********** HOUR 2 *******************


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -13.4415
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      17       6705.32   0.000751654       370.411   1.403e-06       0.001       61  LS failed, Hessian reset 
      33       6706.85   2.32222e-08       91.2326   2.079e-10       0.001      136  LS failed, Hessian reset 
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
Fit completed
hour:2  RMSE:187.3975799841232

*********** HOUR 3 *******************
Initial log joint probability = -13.4154




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      15       6718.73    0.00148557       383.593   2.404e-06       0.001       62  LS failed, Hessian reset 
      34       6719.05   3.34378e-05       98.8048   2.572e-09       0.001      142  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made
Fit completed



INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -2728.55
Iteration  1. Log joint probability =    3681.85. Improved by 6410.39.
Iteration  2. Log joint probability =    4275.43. Improved by 593.581.
Iteration  3. Log joint probability =    4275.66. Improved by 0.229874.
Iteration  4. Log joint probability =    4275.93. Improved by 0.271389.
Iteration  5. Log joint probability =     6247.8. Improved by 1971.87.
Iteration  6. Log joint probability =     6659.4. Improved by 411.599.
Iteration  7. Log joint probability =    6698.84. Improved by 39.4413.
Iteration  8. Log joint probability =    6718.41. Improved by 19.5767.
Iteration  9. Log joint probability =    6718.48. Improved by 0.0630949.
Iteration 10. Log joint probability =    6718.87. Improved by 0.391493.
Iteration 11. Log joint probability =       6719. Improved by 0.127663.
Iteration 12. Log joint probability =    6719.02. Improved by 0.0188317.
Iteration 13. Log joint probability =    6719.04. Improved by 0.0224963.
Iteration 14. Log joint pr

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      14       6724.25    0.00107017       498.027   1.416e-06       0.001       59  LS failed, Hessian reset 
      20       6724.69   0.000262925       152.728   1.914e-06       0.001      102  LS failed, Hessian reset 
      47       6724.86   4.88296e-06       100.239      0.2528     0.02528      149   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
hour:4  RMSE:189.29168765678352

*********** HOUR 5 *******************
Initial log joint probability = -14.0382
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      25       6731.84   7.21922e-09       111.961    0.000817     0.04937       38   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
hour:5  RMSE:191.8341467590629

*********** HOUR 6 *****************

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -14.5821
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      19       6776.34   0.000101191       88.0074    9.26e-07       0.001       69  LS failed, Hessian reset 
      33       6776.57   4.84955e-05       86.7094   5.068e-07       0.001      124  LS failed, Hessian reset 
      34       6776.57   4.84955e-05       86.7094   2.923e-09       0.001      181  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Fit completed
Initial log joint probability = -2729.71
Iteration  1. Log joint probability =    1586.55. Improved by 4316.27.
Iteration  2. Log joint probability =    2848.12. Improved by 1261.57.
Iteration  3. Log joint probability =    2848.21. Improved by 0.0842385.
Iteration  4. Log joint probability =    6742.88. Improved by 3894.67.
Iteration  5. Log joint probability =    6743.35. Improved by 0.47209.


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


6.20704e-07.
Iteration 28. Log joint probability =    6776.57. Improved by 6.38985e-07.
Iteration 29. Log joint probability =    6776.57. Improved by 3.02121e-07.
Iteration 30. Log joint probability =    6776.57. Improved by 1.30058e-10.
hour:6  RMSE:194.2643019179783

*********** HOUR 7 *******************
Initial log joint probability = -15.9881




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      16        6814.2   0.000294126       163.776   8.721e-07       0.001       59  LS failed, Hessian reset 
      19        6814.3   0.000109822       122.725    1.02e-06       0.001       97  LS failed, Hessian reset 
      38       6814.38   0.000113454       95.4946   6.185e-08       0.001      188  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.



Initial log joint probability = -2731.12
Fit completed
Iteration  1. Log joint probability =    2741.54. Improved by 5472.66.
Iteration  2. Log joint probability =    5714.98. Improved by 2973.44.
Iteration  3. Log joint probability =       5715. Improved by 0.022302.
Iteration  4. Log joint probability =    5715.22. Improved by 0.217404.
Iteration  5. Log joint probability =    6559.07. Improved by 843.85.
Iteration  6. Log joint probability =    6780.71. Improved by 221.643.
Iteration  7. Log joint probability =    6813.47. Improved by 32.7548.
Iteration  8. Log joint probability =    6813.49. Improved by 0.0231627.
Iteration  9. Log joint probability =    6813.92. Improved by 0.431748.
Iteration 10. Log joint probability =    6814.31. Improved by 0.3853.
Iteration 11. Log joint probability =    6814.37. Improved by 0.0604549.
Iteration 12. Log joint probability =    6814.37. Improved by 0.00246074.
Iteration 13. Log joint probability =    6814.38. Improved by 0.00776272.
Iteration 



Initial log joint probability = -13.7477
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      10       6878.66   0.000391308       143.249   3.113e-06       0.001       57  LS failed, Hessian reset 
      13       6878.68   4.85693e-05       89.7547   1.086e-07       0.001      113  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -2728.88
Iteration  1. Log joint probability =    3181.37. Improved by 5910.25.
Iteration  2. Log joint probability =    4966.63. Improved by 1785.26.
Iteration  3. Log joint probability =    6600.11. Improved by 1633.48.
Iteration  4. Log joint probability =     6789.3. Improved by 189.189.
Iteration  5. Log joint probability =    6856.42. Improved by 67.1204.
Iteration  6. Log joint probability =    6878.54. Improved by 22.12.
Iteration  7. Log joint probability =    6878.62. Improved by 0.079739.
Iteration  8. Log joint probability =    6878.68. Improved by 0.0588025.
Iteration  9. Log joint probability =    6878.68. Improved by 0.00291555.
Iteration 10. Log joint probability =    6878.69. Improved by 0.00730653.
Iteration 11. Log joint probability =    6878.69. Improved by 0.000355255.
Iteration 12. Log joint probability =    6878.69. Improved by 0.000922478.
Iteration 13. Log joint probability =    6878.69. Improved by 3.52417e-05.
Ite

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      17       6851.68   0.000490816       269.361   8.747e-07       0.001       80  LS failed, Hessian reset 
      41       6852.43   5.18864e-06       98.2821      0.2884      0.2884      116   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
hour:10  RMSE:187.00238205597404

*********** HOUR 11 *******************




Initial log joint probability = -13.7705


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      17       6950.42   0.000971102       568.078   9.388e-07       0.001       69  LS failed, Hessian reset 
      26       6952.15   0.000911724       439.026   1.671e-06       0.001      116  LS failed, Hessian reset 
      36       6952.35   0.000135191       102.091   8.182e-08       0.001      188  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made
Fit completed

Initial log joint probability = -2728.9
Iteration  1. Log joint probability =    3483.68. Improved by 6212.59.
Iteration  2. Log joint probability =    5557.11. Improved by 2073.43.
Iteration  3. Log joint probability =    5557.34. Improved by 0.224752.
Iteration  4. Log joint probability =    5557.98. Improved by 0.63709.
Iteration  5. Log joint probability =    5558.21. Improved by 0.229813.
Iteration  6. Log joint probability =    5

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       7005.23   0.000820467       528.177   1.236e-06       0.001       70  LS failed, Hessian reset 
      34       7007.57   0.000324417       185.702   1.903e-06       0.001      133  LS failed, Hessian reset 
      40       7007.58   7.97686e-08       90.3988     0.04141      0.6132      145   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance

hour:12  RMSE:188.44885620689382

*********** HOUR 13 *******************
Initial log joint probability = -19.7605




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6969.42    0.00121833       608.737   7.763e-07       0.001       78  LS failed, Hessian reset 
      44       6972.38   0.000139502       98.2955   4.697e-08       0.001      179  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed

Initial log joint probability = -2734.89
Iteration  1. Log joint probability =    3298.66. Improved by 6033.56.
Iteration  2. Log joint probability =    3824.69. Improved by 526.022.
Iteration  3. Log joint probability =    3825.26. Improved by 0.573853.
Iteration  4. Log joint probability =    6426.59. Improved by 2601.33.
Iteration  5. Log joint probability =     6427.4. Improved by 0.809795.
Iteration  6. Log joint probability =    6428.46. Improved by 1.06735.
Iteration  7. Log joint probability =    6428.78. Improved by 0.320381.
Iteration  8. Log joint probability =    6712.06. Improved by 283.273.
Iteration  9. Log joint probability =    6932.99. Improved by 220.932.
Iteration 10. Log joint probability =    6970.27. Improved by 37.2823.
Iteration 11. Log joint probability =    6971.03. Improved by 0.758556.
Iteration 12. Log joint probability =    6971.18. Improved by 0.150651.
Iteration 13. Log joint probability =    6971.48. Improved by 0.304265.
Iteration 14. L

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -24.3032
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       6924.42   0.000586112       349.975   8.154e-07       0.001       75  LS failed, Hessian reset 
      52       6928.13   3.89982e-05           100           1           1      120   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
hour:14  RMSE:200.6182647797532

*********** HOUR 15 *******************
Initial log joint probability = -28.5135




    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6880.76    0.00103344       493.939   7.344e-07       0.001       68  LS failed, Hessian reset 
      31       6884.08   0.000431671       82.0927   8.116e-07       0.001      142  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Fit completed
Initial log joint probability = -2743.65
Iteration  1. Log joint probability =    2963.66. Improved by 5707.3.
Iteration  2. Log joint probability =    4763.56. Improved by 1799.9.
Iteration  3. Log joint probability =    4763.95. Improved by 0.386202.
Iteration  4. Log joint probability =    4764.06. Improved by 0.118245.
Iteration  5. Log joint probability =    6375.98. Improved by 1611.92.
Iteration  6. Log joint probability =    6376.83. Improved by 0.849338.
Iteration  7. Log joint probability =    6849.47. Improved by 472.64.
Iteration  8

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


ty =    6871.79. Improved by 2.11676.
Iteration 15. Log joint probability =    6872.65. Improved by 0.859745.
Iteration 16. Log joint probability =    6874.45. Improved by 1.79525.
Iteration 17. Log joint probability =     6875.4. Improved by 0.953218.
Iteration 18. Log joint probability =    6875.52. Improved by 0.112807.
Iteration 19. Log joint probability =    6876.31. Improved by 0.797174.
Iteration 20. Log joint probability =    6877.56. Improved by 1.25079.
Iteration 21. Log joint probability =    6883.39. Improved by 5.82201.
Iteration 22. Log joint probability =    6883.76. Improved by 0.371275.
Iteration 23. Log joint probability =    6883.84. Improved by 0.0836318.
Iteration 24. Log joint probability =    6883.92. Improved by 0.0822013.
Iteration 25. Log joint probability =    6883.97. Improved by 0.0464835.
Iteration 26. Log joint probability =    6884.07. Improved by 0.102628.
Iteration 27. Log joint probability =    6884.07. Improved by 0.00138575.
Iteration 28. Log joint 

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Initial log joint probability = -31.5492
Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      23       6839.21    0.00143781       316.633    3.58e-06       0.001       68  LS failed, Hessian reset 
      34       6843.38   0.000265681       76.9604   7.184e-07       0.001      122  LS failed, Hessian reset 
      36       6843.43   0.000106046       73.3561   1.326e-06       0.001      163  LS failed, Hessian reset 
      37       6843.43   7.93972e-07       162.005   1.082e-08       0.001      197  LS failed, Hessian reset 
      46       6843.44   5.19761e-05       101.597      0.2257           1      213   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
hour:16  RMSE:207.80861051382516

*********** HOUR 17 *******************
Initial log joint probability = -31.2903


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       6823.05    0.00111246       584.715   1.026e-06       0.001       76  LS failed, Hessian reset 
      35       6826.72   0.000147504       95.4372   1.123e-06       0.001      126  LS failed, Hessian reset 
      44       6826.75   1.19197e-05        95.282   3.936e-08       0.001      196  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made
Fit completed

Initial log joint probability = -2746.42
Iteration  1. Log joint probability =    2976.66. Improved by 5723.08.
Iteration  2. Log joint probability =    4764.97. Improved by 1788.3.
Iteration  3. Log joint probability =    6349.54. Improved by 1584.58.
Iteration  4. Log joint probability =     6810.7. Improved by 461.162.
Iteration  5. Log joint probability =    6826.63. Improved by 15.9272.
Iteration  6. Log joint probability =    682



Initial log joint probability = -28.8141
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6758.06    0.00247581        876.18   1.729e-06       0.001       72  LS failed, Hessian reset 
      31       6761.25   6.17785e-05       83.6245   5.632e-07       0.001      128  LS failed, Hessian reset 
      32       6761.25   6.17785e-05       83.6245   2.988e-08       0.001      185  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made



INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -2743.95
Iteration  1. Log joint probability =    2899.66. Improved by 5643.6.
Iteration  2. Log joint probability =       4997. Improved by 2097.35.
Iteration  3. Log joint probability =    4997.68. Improved by 0.673885.
Iteration  4. Log joint probability =     6117.4. Improved by 1119.72.
Iteration  5. Log joint probability =    6117.43. Improved by 0.0299159.
Iteration  6. Log joint probability =    6117.64. Improved by 0.206262.
Iteration  7. Log joint probability =    6672.64. Improved by 555.
Iteration  8. Log joint probability =    6736.32. Improved by 63.6818.
Iteration  9. Log joint probability =    6760.31. Improved by 23.9869.
Iteration 10. Log joint probability =    6760.77. Improved by 0.464825.
Iteration 11. Log joint probability =    6760.99. Improved by 0.217145.
Iteration 12. Log joint probability =    6761.24. Improved by 0.254556.
Iteration 13. Log joint probability =    6761.32. Improved by 0.0726413.
Iteration 14. Log 



    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6754.51    0.00114461       443.468   9.168e-07       0.001       81  LS failed, Hessian reset 
      29       6758.24   0.000480541       126.721   2.899e-06       0.001      127  LS failed, Hessian reset 
      32       6758.33   0.000149917        98.205    1.28e-06       0.001      175  LS failed, Hessian reset 
      40       6758.37    4.7653e-05       97.3988   1.121e-08       0.001      245  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Fit completed
Initial log joint probability = -2738.89
Iteration  1. Log joint probability =     2872.3. Improved by 5611.19.
Iteration  2. Log joint probability =    4825.58. Improved by 1953.28.
Iteration  3. Log joint probability =    5550.23. Improved by 724.647.
Iteration  4. Log joint probability =    6536.71. Improved by 986.486.
Ite

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


bability =    6758.37. Improved by 0.0102157.
Iteration 15. Log joint probability =    6758.37. Improved by 0.00510784.
Iteration 16. Log joint probability =    6758.37. Improved by 7.97258e-07.
Iteration 17. Log joint probability =    6758.37. Improved by 1.37935e-07.
Iteration 18. Log joint probability =    6758.37. Improved by 1.81444e-08.
Iteration 19. Log joint probability =    6758.37. Improved by 5.99393e-08.
Iteration 20. Log joint probability =    6758.37. Improved by 8.21819e-09.
hour:19  RMSE:211.6445457064888

*********** HOUR 20 *******************
Initial log joint probability = -19.8631
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      23       6781.23    0.00111292       627.161   8.997e-07       0.001       76  LS failed, Hessian reset 
      35       6784.28   0.000108029       120.472   8.587e-07       0.001      129  LS failed, Hessian reset 
      37       6784.28    5.8372e-05       92.5646   6.322e-08       0.001   

INFO:fbprophet:n_changepoints greater than number of observations. Using -1.



Fit completed
Initial log joint probability = -2735
Iteration  1. Log joint probability =    2976.95. Improved by 5711.95.
Iteration  2. Log joint probability =    5395.34. Improved by 2418.39.
Iteration  3. Log joint probability =    5395.84. Improved by 0.500481.
Iteration  4. Log joint probability =    5396.48. Improved by 0.639123.
Iteration  5. Log joint probability =    5567.23. Improved by 170.748.
Iteration  6. Log joint probability =    5567.75. Improved by 0.525685.
Iteration  7. Log joint probability =    5568.18. Improved by 0.422687.
Iteration  8. Log joint probability =    5568.23. Improved by 0.0516151.
Iteration  9. Log joint probability =    5568.41. Improved by 0.185475.
Iteration 10. Log joint probability =    5931.35. Improved by 362.932.
Iteration 11. Log joint probability =    6751.31. Improved by 819.965.
Iteration 12. Log joint probability =    6783.89. Improved by 32.5818.
Iteration 13. Log joint probability =    6784.07. Improved by 0.18076.
Iteration 14. Log



Initial log joint probability = -16.1695
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      18       6808.96    0.00183442       931.602   1.493e-06       0.001       84  LS failed, Hessian reset 
      27       6810.48   5.15334e-05       150.927   5.851e-07       0.001      128  LS failed, Hessian reset 
      40       6810.66   6.34621e-05       95.5731   4.853e-08       0.001      203  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made



INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Initial log joint probability = -2731.3
Iteration  1. Log joint probability =    3113.01. Improved by 5844.32.
Iteration  2. Log joint probability =    5566.86. Improved by 2453.84.
Iteration  3. Log joint probability =    5567.42. Improved by 0.562193.
Iteration  4. Log joint probability =    5567.91. Improved by 0.493342.
Iteration  5. Log joint probability =    5567.95. Improved by 0.0360923.
Iteration  6. Log joint probability =    5567.96. Improved by 0.0127019.
Iteration  7. Log joint probability =       5568. Improved by 0.0369324.
Iteration  8. Log joint probability =    5568.01. Improved by 0.0118531.
Iteration  9. Log joint probability =    5568.05. Improved by 0.0377724.
Iteration 10. Log joint probability =    5568.06. Improved by 0.0110044.
Iteration 11. Log joint probability =     5568.1. Improved by 0.0386122.
Iteration 12. Log joint probability =    5568.11. Improved by 0.0101559.
Iteration 13. Log joint probability =    5568.15. Improved by 0.0394519.
Ite



Initial log joint probability = -15.3657
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       6887.85   0.000544991       352.024   2.152e-06       0.001       81  LS failed, Hessian reset 
      32       6891.25   0.000338558       253.857   1.161e-06       0.001      139  LS failed, Hessian reset 
      47       6891.32   1.34597e-05       100.416   5.834e-09       0.001      216  LS failed, Hessian reset 
Optimization terminated with error: 
  Line search failed to achieve a sufficient decrease, no more progress can be made

Initial log joint probability = -2730.5


INFO:fbprophet:n_changepoints greater than number of observations. Using -1.


Fit completed
Iteration  1. Log joint probability =    2225.98. Improved by 4956.48.
Iteration  2. Log joint probability =    5618.63. Improved by 3392.65.
Iteration  3. Log joint probability =    5618.77. Improved by 0.146361.
Iteration  4. Log joint probability =    5619.79. Improved by 1.02305.
Iteration  5. Log joint probability =    5619.96. Improved by 0.168559.
Iteration  6. Log joint probability =    5620.95. Improved by 0.987063.
Iteration  7. Log joint probability =    5621.14. Improved by 0.190642.
Iteration  8. Log joint probability =    5622.09. Improved by 0.951648.
Iteration  9. Log joint probability =    5622.31. Improved by 0.212609.
Iteration 10. Log joint probability =    5623.22. Improved by 0.916778.
Iteration 11. Log joint probability =    5623.46. Improved by 0.234458.
Iteration 12. Log joint probability =    5624.34. Improved by 0.882431.
Iteration 13. Log joint probability =    5624.59. Improved by 0.25619.
Iteration 14. Log joint probability =    5625.44. Impr

Fit completed
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      21       6855.38   0.000848523        591.61    1.24e-06       0.001       77  LS failed, Hessian reset 
      45       6857.87   8.73161e-07       98.4634       0.187      0.8674      114   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
hour:23  RMSE:190.4998978679353
CPU times: user 1min 7s, sys: 12.6 s, total: 1min 19s
Wall time: 1min 5s


Unnamed: 0,hour,rmse
0,0,187.332357
1,1,186.346765
2,2,187.39758
3,3,188.146769
4,4,189.291688
5,5,191.834147
6,6,194.264302
7,7,199.528167
8,8,0.0
9,9,193.796049


In [65]:
val_mask_8 = X_val_df['hour_8']==1
preds[val_mask_8] = train_forecast_orig[val_mask_8]['yhat']

In [66]:
pd.Series(preds[val_mask]).describe()

count    1096.000000
mean     1917.530697
std       160.263600
min      1547.674327
25%      1793.481494
50%      1911.356213
75%      2041.403277
max      2296.481672
dtype: float64

In [67]:
train_forecast = train_model.predict(X_val_df[:val_period])
RMSE = mean_squared_error(X_val_df[:val_period]['y'],preds,squared=False)
print('RMSE------------------------------------',RMSE)

RMSE------------------------------------ 200.83675875238967


In [111]:
# for col in regressors:
#     train_model.add_regressor(col,prior_scale=2,standardize=False)

In [112]:
# %%time
# np.random.seed(100)
# train_model.fit(X_train_df[:val_start],
#                 seed=100,
#                 chains=4,
#                 control={'adapt_delta': 0.99,
# #                          'max_treedepth': 20
#                         })

In [113]:
# future_df = train_model.make_future_dataframe(periods=val_period, freq='H',include_history = False)
# X_val_df['cap']=4000
# X_val_df['floor']=1000
# future_w_reg = add_regressors(future_df,X_val,regressors)
# np.random.seed(seed)
train_forecast = train_model.predict(X_val_df[:val_period])
RMSE = mean_squared_error(X_val_df[:val_period]['y'],abs(train_forecast['yhat']),squared=False)
# RMSE = mean_squared_error(X_val_df['y'],abs(train_forecast['yhat']),squared=False)
print('RMSE------------------------------------',RMSE)

RMSE------------------------------------ 2579.996822662037


In [114]:
train_model.params

{'k': array([[-1.35045066]]),
 'm': array([[0.77294952]]),
 'delta': array([[ 3.08316624e-01, -1.96373907e-01,  2.79175986e-01,
          9.80756973e-01,  3.86184085e-01,  2.36364539e-07,
         -4.64470011e-02,  3.33694229e-01,  7.20085442e-01,
         -2.50540117e+00,  1.85133023e+00, -1.95431932e-01,
         -1.38176840e+00,  9.94796137e-01, -1.50604983e+00,
         -1.55864295e-05,  1.40514804e-01,  1.46892722e+00,
          1.91720877e-05, -2.22334981e-06,  1.85894336e-06,
          3.46391598e-01,  3.74932751e-01,  2.32170925e-06,
         -5.16741408e-01]]),
 'sigma_obs': array([[0.05933308]]),
 'beta': array([[ 0.01589306,  0.02475566, -0.01742891, -0.00941122,  0.00258842,
         -0.00388551, -0.06623547, -0.03588465, -0.03214661,  0.0065722 ,
         -0.00404668,  0.00178886,  0.00111381, -0.00349183]]),
 'trend': array([[0.77294952, 0.77279534, 0.77264117, ..., 0.75784442, 0.75789996,
         0.7579555 ]])}

In [138]:
train_model.params

{'k': array([[-0.88514886]]),
 'm': array([[0.66600261]]),
 'delta': array([[ 9.14155528e-08,  1.61763169e-01,  1.05112958e+00,
          3.29164144e-01,  8.05161734e-02,  3.18748759e-06,
         -3.03761673e-03, -2.24862480e+00,  5.47116294e-07,
          2.52836591e+00,  4.14730457e-07, -3.44690511e-01,
          2.51529226e-01,  4.49843052e-03, -1.68443161e+00,
          1.64315982e-07,  4.37472304e-08,  2.31372416e+00,
          3.83829565e-02, -2.31199367e+00,  3.39078956e-01,
          6.19868496e-01, -2.60692226e-01,  4.50571166e-05,
         -4.71186399e-07]]),
 'sigma_obs': array([[0.04805574]]),
 'beta': array([[ 0.01601887,  0.03336807, -0.02051907, -0.01746586, -0.00013766,
         -0.00313971, -0.12490724, -0.04320853, -0.01491489,  0.01153598,
         -0.00398572,  0.00218769, -0.00420669,  0.00025067]]),
 'trend': array([[0.66600261, 0.66560154, 0.66520048, ..., 0.74404938, 0.74404007,
         0.74403076]])}

In [None]:
from fbprophet.serialize import model_to_json, model_from_json

with open('fbprophet_mcmc100_w_reg.json', 'w') as fout:
    fout.write(model_to_json(train_model))  # Save model

with open('fbprophet_mcmc100_w_reg.json', 'r') as fin:
    temp_model = model_from_json(fin.read())  # Load model

In [None]:
train_forecast[['yhat']].to_csv('preds.csv',index=False)

In [None]:
train_forecast['yhat']