In [1]:
import pandas as pd
import numpy as np
import datetime
import itertools

from pandas.tseries.holiday import USFederalHolidayCalendar
import matplotlib.pyplot as plt

from prophet import Prophet
from prophet.diagnostics import cross_validation
#pull in the performance metrics
from prophet.diagnostics import performance_metrics

import warnings
warnings.filterwarnings('ignore')

import wrangle

##### Prep Regressors

In [2]:
def get_prophet_df_w_meantemp():
    '''
    Retrieves a cleaned dataframe and formats it for input into
    the FB Prophet model.
    
    NOTE: Prophet does not support timezone - need it in UTC, then make tz naive
    '''
    #Acquire combined dataframe
    df = wrangle.get_combined_df(get_central = False)
    #Calculate mean_temp column
    df['mean_temp'] = (df.hs_temp + df.gv_temp + df.pl_temp + df.vc_temp)/4

    #Pull index/load/temp data into new dataframe
    df2 = pd.DataFrame(df[['ercot_load','mean_temp']])
    
    #Move index out
    df2.reset_index(drop=False, inplace=True)
    #Rename columns
    df2.rename(columns = {'datetime':'ds','ercot_load':'y'},inplace=True)
    #Make TZ naive
    df2.ds = df2.ds.dt.tz_localize(None)
    
    return df2

In [3]:
#get new df with the meantemp column
dfr = get_prophet_df_w_meantemp()

trainr = dfr[dfr.ds < '2018-01-01 06:00:00']

In [4]:
trainr

Unnamed: 0,ds,y,mean_temp
0,2010-01-01 06:00:00,7931.241900,49.025
1,2010-01-01 07:00:00,7775.456846,48.025
2,2010-01-01 08:00:00,7704.815982,46.800
3,2010-01-01 09:00:00,7650.575724,45.775
4,2010-01-01 10:00:00,7666.708317,44.925
...,...,...,...
70123,2018-01-01 01:00:00,12061.549401,44.200
70124,2018-01-01 02:00:00,12015.663549,41.925
70125,2018-01-01 03:00:00,11883.114122,40.275
70126,2018-01-01 04:00:00,11754.250889,39.700


##### Prep Holidays

holidays: pd.DataFrame with columns holiday (string) and ds (date type) and optionally columns lower_window and upper_window which specify arange of days around the date to be included as holidays. lower_window=-2 will include 2 days prior to the date as holidays. Also optionally can have a column prior_scale specifying the prior scale for that holiday.

In [5]:
#create calendar object
cal = USFederalHolidayCalendar()
#get as list of dates
train_holidays = cal.holidays(start=trainr.ds.min(),end=trainr.ds.max())

# Transition to dataframe with holiday, ds columns
holiday_df = pd.DataFrame(trainr.ds)
holiday_df['holiday'] = holiday_df.ds.dt.date.astype(str).isin(train_holidays.astype(str)).astype(int)

In [6]:
holiday_df

Unnamed: 0,ds,holiday
0,2010-01-01 06:00:00,0
1,2010-01-01 07:00:00,0
2,2010-01-01 08:00:00,0
3,2010-01-01 09:00:00,0
4,2010-01-01 10:00:00,0
...,...,...
70123,2018-01-01 01:00:00,1
70124,2018-01-01 02:00:00,1
70125,2018-01-01 03:00:00,1
70126,2018-01-01 04:00:00,1


In [7]:
only_holidays = holiday_df[holiday_df.holiday==1]

In [8]:
only_holidays.holiday = only_holidays.holiday.astype(str)
only_holidays

Unnamed: 0,ds,holiday
402,2010-01-18 00:00:00,1
403,2010-01-18 01:00:00,1
404,2010-01-18 02:00:00,1
405,2010-01-18 03:00:00,1
406,2010-01-18 04:00:00,1
...,...,...
70123,2018-01-01 01:00:00,1
70124,2018-01-01 02:00:00,1
70125,2018-01-01 03:00:00,1
70126,2018-01-01 04:00:00,1


In [9]:
only_holidays.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1902 entries, 402 to 70127
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   ds       1902 non-null   datetime64[ns]
 1   holiday  1902 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 44.6+ KB


##### Put our hyperparameters together

In [10]:
d_opts = [10,20,30]
w_opts = [10,20]
reg_scale = [10, 30]
# y_opts = [1] #change to 1 & 10
d_fo = range(2,4)
w_fo = range(2,4)
# y_fo = range(3,5)

param_list = list(itertools.product(d_opts,w_opts,d_fo,d_fo,reg_scale))

In [11]:
param_list[0:4]

[(10, 10, 2, 2, 10),
 (10, 10, 2, 2, 30),
 (10, 10, 2, 3, 10),
 (10, 10, 2, 3, 30)]

In [12]:
#initialize model performance dataframe
model_perf = pd.DataFrame()

In [13]:
#Set metrics to evaluate
metrics = ['rmse','mape']

In [14]:
#Loop over the parameter combinations
for params in param_list[25:35]:
    #GENERATE MODEL
    m = Prophet(yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
                holidays=only_holidays)
    #ADD SEASONALITY
    m = m.add_seasonality(name='daily', 
                          period=1, 
                          fourier_order=params[2],
                          prior_scale=params[0]
                         )
    m = m.add_seasonality(name='weekly', 
                          period=7, 
                          fourier_order=params[3],
                          prior_scale=params[1]
                         )
#     m = m.add_seasonality(name='yearly', 
#                           period=365.25, 
#                           fourier_order=params[5],
#                           prior_scale=params[2]
#                          )
    #ADD REGRESSOR
    m = m.add_regressor('mean_temp', prior_scale = params[4])
    
    #FIT MODEL
    m.fit(trainr)
    
    #CREATE CROSS VALIDATION DF
    df_cv_loop = cross_validation(m, initial='1461 days', period='36 days', horizon = '3 days') 
    #get 1 day/3day metrics
    df_cv_loop_1d = performance_metrics(df_cv_loop, rolling_window=.33, metrics=metrics)
    df_cv_loop_3d = performance_metrics(df_cv_loop, rolling_window=1, metrics=metrics)
    
    #STORE MODEL PARAMETERS AND PERFORMANCE
    dict = {
        'daily_order': params[2],
        'weekly_order': params[3],
        'daily_scale': params[0],
        'weekly_scale': params[1],
        'regressor_scale': params[4],
        '1d_rmse': df_cv_loop_1d.loc[0,'rmse'],
        '1d_mape': df_cv_loop_1d.loc[0,'mape'],
        '3d_rmse': df_cv_loop_3d.loc[0,'rmse'],
        '3d_mape': df_cv_loop_3d.loc[0,'mape']  
    }
    model_perf = model_perf.append(dict,ignore_index=True)


17:04:27 - cmdstanpy - INFO - Chain [1] start processing
17:04:51 - cmdstanpy - INFO - Chain [1] done processing


  0%|          | 0/41 [00:00<?, ?it/s]

17:04:54 - cmdstanpy - INFO - Chain [1] start processing
17:05:08 - cmdstanpy - INFO - Chain [1] done processing
17:05:09 - cmdstanpy - INFO - Chain [1] start processing
17:05:22 - cmdstanpy - INFO - Chain [1] done processing
17:05:23 - cmdstanpy - INFO - Chain [1] start processing
17:05:38 - cmdstanpy - INFO - Chain [1] done processing
17:05:40 - cmdstanpy - INFO - Chain [1] start processing
17:05:54 - cmdstanpy - INFO - Chain [1] done processing
17:05:55 - cmdstanpy - INFO - Chain [1] start processing
17:06:07 - cmdstanpy - INFO - Chain [1] done processing
17:06:08 - cmdstanpy - INFO - Chain [1] start processing
17:06:20 - cmdstanpy - INFO - Chain [1] done processing
17:06:21 - cmdstanpy - INFO - Chain [1] start processing
17:06:37 - cmdstanpy - INFO - Chain [1] done processing
17:06:38 - cmdstanpy - INFO - Chain [1] start processing
17:06:52 - cmdstanpy - INFO - Chain [1] done processing
17:06:53 - cmdstanpy - INFO - Chain [1] start processing
17:07:08 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

17:20:02 - cmdstanpy - INFO - Chain [1] start processing
17:20:14 - cmdstanpy - INFO - Chain [1] done processing
17:20:15 - cmdstanpy - INFO - Chain [1] start processing
17:20:30 - cmdstanpy - INFO - Chain [1] done processing
17:20:31 - cmdstanpy - INFO - Chain [1] start processing
17:20:43 - cmdstanpy - INFO - Chain [1] done processing
17:20:45 - cmdstanpy - INFO - Chain [1] start processing
17:20:56 - cmdstanpy - INFO - Chain [1] done processing
17:20:58 - cmdstanpy - INFO - Chain [1] start processing
17:21:11 - cmdstanpy - INFO - Chain [1] done processing
17:21:12 - cmdstanpy - INFO - Chain [1] start processing
17:21:25 - cmdstanpy - INFO - Chain [1] done processing
17:21:27 - cmdstanpy - INFO - Chain [1] start processing
17:21:43 - cmdstanpy - INFO - Chain [1] done processing
17:21:44 - cmdstanpy - INFO - Chain [1] start processing
17:21:57 - cmdstanpy - INFO - Chain [1] done processing
17:21:58 - cmdstanpy - INFO - Chain [1] start processing
17:22:12 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

17:34:55 - cmdstanpy - INFO - Chain [1] start processing
17:35:07 - cmdstanpy - INFO - Chain [1] done processing
17:35:09 - cmdstanpy - INFO - Chain [1] start processing
17:35:22 - cmdstanpy - INFO - Chain [1] done processing
17:35:24 - cmdstanpy - INFO - Chain [1] start processing
17:35:38 - cmdstanpy - INFO - Chain [1] done processing
17:35:39 - cmdstanpy - INFO - Chain [1] start processing
17:35:52 - cmdstanpy - INFO - Chain [1] done processing
17:35:53 - cmdstanpy - INFO - Chain [1] start processing
17:36:05 - cmdstanpy - INFO - Chain [1] done processing
17:36:06 - cmdstanpy - INFO - Chain [1] start processing
17:36:22 - cmdstanpy - INFO - Chain [1] done processing
17:36:23 - cmdstanpy - INFO - Chain [1] start processing
17:36:38 - cmdstanpy - INFO - Chain [1] done processing
17:36:40 - cmdstanpy - INFO - Chain [1] start processing
17:36:52 - cmdstanpy - INFO - Chain [1] done processing
17:36:54 - cmdstanpy - INFO - Chain [1] start processing
17:37:06 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

17:49:38 - cmdstanpy - INFO - Chain [1] start processing
17:49:54 - cmdstanpy - INFO - Chain [1] done processing
17:49:55 - cmdstanpy - INFO - Chain [1] start processing
17:50:08 - cmdstanpy - INFO - Chain [1] done processing
17:50:09 - cmdstanpy - INFO - Chain [1] start processing
17:50:22 - cmdstanpy - INFO - Chain [1] done processing
17:50:24 - cmdstanpy - INFO - Chain [1] start processing
17:50:39 - cmdstanpy - INFO - Chain [1] done processing
17:50:40 - cmdstanpy - INFO - Chain [1] start processing
17:50:52 - cmdstanpy - INFO - Chain [1] done processing
17:50:53 - cmdstanpy - INFO - Chain [1] start processing
17:51:07 - cmdstanpy - INFO - Chain [1] done processing
17:51:08 - cmdstanpy - INFO - Chain [1] start processing
17:51:22 - cmdstanpy - INFO - Chain [1] done processing
17:51:24 - cmdstanpy - INFO - Chain [1] start processing
17:51:37 - cmdstanpy - INFO - Chain [1] done processing
17:51:38 - cmdstanpy - INFO - Chain [1] start processing
17:51:54 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

18:04:17 - cmdstanpy - INFO - Chain [1] start processing
18:04:28 - cmdstanpy - INFO - Chain [1] done processing
18:04:29 - cmdstanpy - INFO - Chain [1] start processing
18:04:45 - cmdstanpy - INFO - Chain [1] done processing
18:04:47 - cmdstanpy - INFO - Chain [1] start processing
18:04:59 - cmdstanpy - INFO - Chain [1] done processing
18:05:01 - cmdstanpy - INFO - Chain [1] start processing
18:05:16 - cmdstanpy - INFO - Chain [1] done processing
18:05:18 - cmdstanpy - INFO - Chain [1] start processing
18:05:30 - cmdstanpy - INFO - Chain [1] done processing
18:05:32 - cmdstanpy - INFO - Chain [1] start processing
18:05:45 - cmdstanpy - INFO - Chain [1] done processing
18:05:46 - cmdstanpy - INFO - Chain [1] start processing
18:06:02 - cmdstanpy - INFO - Chain [1] done processing
18:06:03 - cmdstanpy - INFO - Chain [1] start processing
18:06:20 - cmdstanpy - INFO - Chain [1] done processing
18:06:22 - cmdstanpy - INFO - Chain [1] start processing
18:06:33 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

18:18:25 - cmdstanpy - INFO - Chain [1] start processing
18:18:37 - cmdstanpy - INFO - Chain [1] done processing
18:18:39 - cmdstanpy - INFO - Chain [1] start processing
18:18:52 - cmdstanpy - INFO - Chain [1] done processing
18:18:54 - cmdstanpy - INFO - Chain [1] start processing
18:19:08 - cmdstanpy - INFO - Chain [1] done processing
18:19:09 - cmdstanpy - INFO - Chain [1] start processing
18:19:25 - cmdstanpy - INFO - Chain [1] done processing
18:19:26 - cmdstanpy - INFO - Chain [1] start processing
18:19:41 - cmdstanpy - INFO - Chain [1] done processing
18:19:42 - cmdstanpy - INFO - Chain [1] start processing
18:19:54 - cmdstanpy - INFO - Chain [1] done processing
18:19:55 - cmdstanpy - INFO - Chain [1] start processing
18:20:11 - cmdstanpy - INFO - Chain [1] done processing
18:20:13 - cmdstanpy - INFO - Chain [1] start processing
18:20:24 - cmdstanpy - INFO - Chain [1] done processing
18:20:25 - cmdstanpy - INFO - Chain [1] start processing
18:20:40 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

18:34:05 - cmdstanpy - INFO - Chain [1] start processing
18:34:18 - cmdstanpy - INFO - Chain [1] done processing
18:34:20 - cmdstanpy - INFO - Chain [1] start processing
18:34:35 - cmdstanpy - INFO - Chain [1] done processing
18:34:37 - cmdstanpy - INFO - Chain [1] start processing
18:34:51 - cmdstanpy - INFO - Chain [1] done processing
18:34:52 - cmdstanpy - INFO - Chain [1] start processing
18:35:12 - cmdstanpy - INFO - Chain [1] done processing
18:35:13 - cmdstanpy - INFO - Chain [1] start processing
18:35:29 - cmdstanpy - INFO - Chain [1] done processing
18:35:31 - cmdstanpy - INFO - Chain [1] start processing
18:35:50 - cmdstanpy - INFO - Chain [1] done processing
18:35:52 - cmdstanpy - INFO - Chain [1] start processing
18:36:14 - cmdstanpy - INFO - Chain [1] done processing
18:36:15 - cmdstanpy - INFO - Chain [1] start processing
18:36:30 - cmdstanpy - INFO - Chain [1] done processing
18:36:31 - cmdstanpy - INFO - Chain [1] start processing
18:36:49 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

18:50:39 - cmdstanpy - INFO - Chain [1] start processing
18:50:50 - cmdstanpy - INFO - Chain [1] done processing
18:50:51 - cmdstanpy - INFO - Chain [1] start processing
18:51:06 - cmdstanpy - INFO - Chain [1] done processing
18:51:07 - cmdstanpy - INFO - Chain [1] start processing
18:51:18 - cmdstanpy - INFO - Chain [1] done processing
18:51:19 - cmdstanpy - INFO - Chain [1] start processing
18:51:32 - cmdstanpy - INFO - Chain [1] done processing
18:51:34 - cmdstanpy - INFO - Chain [1] start processing
18:51:48 - cmdstanpy - INFO - Chain [1] done processing
18:51:49 - cmdstanpy - INFO - Chain [1] start processing
18:52:03 - cmdstanpy - INFO - Chain [1] done processing
18:52:05 - cmdstanpy - INFO - Chain [1] start processing
18:52:20 - cmdstanpy - INFO - Chain [1] done processing
18:52:21 - cmdstanpy - INFO - Chain [1] start processing
18:52:35 - cmdstanpy - INFO - Chain [1] done processing
18:52:37 - cmdstanpy - INFO - Chain [1] start processing
18:52:53 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

19:06:14 - cmdstanpy - INFO - Chain [1] start processing
19:06:24 - cmdstanpy - INFO - Chain [1] done processing
19:06:25 - cmdstanpy - INFO - Chain [1] start processing
19:06:38 - cmdstanpy - INFO - Chain [1] done processing
19:06:40 - cmdstanpy - INFO - Chain [1] start processing
19:06:57 - cmdstanpy - INFO - Chain [1] done processing
19:06:59 - cmdstanpy - INFO - Chain [1] start processing
19:07:12 - cmdstanpy - INFO - Chain [1] done processing
19:07:13 - cmdstanpy - INFO - Chain [1] start processing
19:07:26 - cmdstanpy - INFO - Chain [1] done processing
19:07:27 - cmdstanpy - INFO - Chain [1] start processing
19:07:39 - cmdstanpy - INFO - Chain [1] done processing
19:07:40 - cmdstanpy - INFO - Chain [1] start processing
19:07:58 - cmdstanpy - INFO - Chain [1] done processing
19:07:59 - cmdstanpy - INFO - Chain [1] start processing
19:08:10 - cmdstanpy - INFO - Chain [1] done processing
19:08:11 - cmdstanpy - INFO - Chain [1] start processing
19:08:27 - cmdstanpy - INFO - Chain [1]

  0%|          | 0/41 [00:00<?, ?it/s]

19:21:20 - cmdstanpy - INFO - Chain [1] start processing
19:21:30 - cmdstanpy - INFO - Chain [1] done processing
19:21:32 - cmdstanpy - INFO - Chain [1] start processing
19:21:44 - cmdstanpy - INFO - Chain [1] done processing
19:21:45 - cmdstanpy - INFO - Chain [1] start processing
19:22:00 - cmdstanpy - INFO - Chain [1] done processing
19:22:01 - cmdstanpy - INFO - Chain [1] start processing
19:22:14 - cmdstanpy - INFO - Chain [1] done processing
19:22:15 - cmdstanpy - INFO - Chain [1] start processing
19:22:26 - cmdstanpy - INFO - Chain [1] done processing
19:22:28 - cmdstanpy - INFO - Chain [1] start processing
19:22:41 - cmdstanpy - INFO - Chain [1] done processing
19:22:43 - cmdstanpy - INFO - Chain [1] start processing
19:23:00 - cmdstanpy - INFO - Chain [1] done processing
19:23:01 - cmdstanpy - INFO - Chain [1] start processing
19:23:14 - cmdstanpy - INFO - Chain [1] done processing
19:23:16 - cmdstanpy - INFO - Chain [1] start processing
19:23:31 - cmdstanpy - INFO - Chain [1]

In [15]:
model_perf

Unnamed: 0,daily_order,weekly_order,daily_scale,weekly_scale,regressor_scale,1d_rmse,1d_mape,3d_rmse,3d_mape
0,2.0,2.0,20.0,20.0,30.0,1951.049408,0.138458,1871.907046,0.132427
1,2.0,3.0,20.0,20.0,10.0,1948.811357,0.138311,1869.253744,0.132127
2,2.0,3.0,20.0,20.0,30.0,1948.987554,0.13824,1869.32891,0.132078
3,3.0,2.0,20.0,20.0,10.0,1947.618786,0.138273,1868.062006,0.132153
4,3.0,2.0,20.0,20.0,30.0,1946.571001,0.138214,1867.004396,0.13211
5,3.0,3.0,20.0,20.0,10.0,1943.942713,0.138001,1864.215799,0.131748
6,3.0,3.0,20.0,20.0,30.0,1943.076638,0.138041,1863.454956,0.131769
7,2.0,2.0,30.0,10.0,10.0,1951.285463,0.138506,1871.91763,0.132446
8,2.0,2.0,30.0,10.0,30.0,1950.526751,0.1384,1871.495137,0.132386
9,2.0,3.0,30.0,10.0,10.0,1948.816169,0.138263,1869.418926,0.132099


In [17]:
model_perf.to_csv('perf.csv')