In [14]:
import pandas as pd
import numpy as np
import datetime
import itertools

from pandas.tseries.holiday import USFederalHolidayCalendar
import matplotlib.pyplot as plt

from prophet import Prophet
from prophet.diagnostics import cross_validation
#pull in the performance metrics
from prophet.diagnostics import performance_metrics

import wrangle

##### Prep Regressors

In [1]:
def get_prophet_df_w_meantemp():
    '''
    Retrieves a cleaned dataframe and formats it for input into
    the FB Prophet model.
    
    NOTE: Prophet does not support timezone - need it in UTC, then make tz naive
    '''
    #Acquire combined dataframe
    df = wrangle.get_combined_df(get_central = False)
    #Calculate mean_temp column
    df['mean_temp'] = (df.hs_temp + df.gv_temp + df.pl_temp + df.vc_temp)/4

    #Pull index/load/temp data into new dataframe
    df2 = pd.DataFrame(df[['ercot_load','mean_temp']])
    
    #Move index out
    df2.reset_index(drop=False, inplace=True)
    #Rename columns
    df2.rename(columns = {'datetime':'ds','ercot_load':'y'},inplace=True)
    #Make TZ naive
    df2.ds = df2.ds.dt.tz_localize(None)
    
    return df2

In [3]:
#get new df with the meantemp column
dfr = get_prophet_df_w_meantemp()

trainr = dfr[dfr.ds < '2018-01-01 06:00:00']

In [4]:
trainr

Unnamed: 0,ds,y,mean_temp
0,2010-01-01 06:00:00,7931.241900,49.025
1,2010-01-01 07:00:00,7775.456846,48.025
2,2010-01-01 08:00:00,7704.815982,46.800
3,2010-01-01 09:00:00,7650.575724,45.775
4,2010-01-01 10:00:00,7666.708317,44.925
...,...,...,...
70123,2018-01-01 01:00:00,12061.549401,44.200
70124,2018-01-01 02:00:00,12015.663549,41.925
70125,2018-01-01 03:00:00,11883.114122,40.275
70126,2018-01-01 04:00:00,11754.250889,39.700


##### Prep Holidays

holidays: pd.DataFrame with columns holiday (string) and ds (date type) and optionally columns lower_window and upper_window which specify arange of days around the date to be included as holidays. lower_window=-2 will include 2 days prior to the date as holidays. Also optionally can have a column prior_scale specifying the prior scale for that holiday.

In [6]:
#create calendar object
cal = USFederalHolidayCalendar()
#get as list of dates
train_holidays = cal.holidays(start=trainr.ds.min(),end=trainr.ds.max())

# Transition to dataframe with holiday, ds columns
holiday_df = pd.DataFrame(trainr.ds)
holiday_df['holiday'] = holiday_df.ds.dt.date.astype(str).isin(train_holidays.astype(str)).astype(int)

In [7]:
holiday_df

Unnamed: 0,ds,holiday
0,2010-01-01 06:00:00,0
1,2010-01-01 07:00:00,0
2,2010-01-01 08:00:00,0
3,2010-01-01 09:00:00,0
4,2010-01-01 10:00:00,0
...,...,...
70123,2018-01-01 01:00:00,1
70124,2018-01-01 02:00:00,1
70125,2018-01-01 03:00:00,1
70126,2018-01-01 04:00:00,1


In [8]:
only_holidays = holiday_df[holiday_df.holiday==1]

In [9]:
only_holidays.holiday = only_holidays.holiday.astype(str)
only_holidays

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


Unnamed: 0,ds,holiday
402,2010-01-18 00:00:00,1
403,2010-01-18 01:00:00,1
404,2010-01-18 02:00:00,1
405,2010-01-18 03:00:00,1
406,2010-01-18 04:00:00,1
...,...,...
70123,2018-01-01 01:00:00,1
70124,2018-01-01 02:00:00,1
70125,2018-01-01 03:00:00,1
70126,2018-01-01 04:00:00,1


In [10]:
only_holidays.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1902 entries, 402 to 70127
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   ds       1902 non-null   datetime64[ns]
 1   holiday  1902 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 44.6+ KB


##### Put our hyperparameters together

In [11]:
d_opts = [10,20,30]
w_opts = [10,20,30]
# y_opts = [1] #change to 1 & 10
d_fo = range(2,4)
w_fo = range(2,4)
# y_fo = range(3,5)

param_list = list(itertools.product(d_opts,w_opts,d_fo,d_fo))

In [12]:
param_list[0:3]

[(10, 10, 2, 2), (10, 10, 2, 3), (10, 10, 3, 2)]

In [13]:
#initialize model performance dataframe
model_perf = pd.DataFrame()

In [None]:
#Loop over the parameter combinations
for params in param_list[10:]:
    #GENERATE MODEL
    m = Prophet(yearly_seasonality=False,
                weekly_seasonality=False,
                daily_seasonality=False,
                holidays=only_holidays)
    #ADD SEASONALITY
    m = m.add_seasonality(name='daily', 
                          period=1, 
                          fourier_order=params[2],
                          prior_scale=params[0]
                         )
    m = m.add_seasonality(name='weekly', 
                          period=7, 
                          fourier_order=params[3],
                          prior_scale=params[1]
                         )
#     m = m.add_seasonality(name='yearly', 
#                           period=365.25, 
#                           fourier_order=params[5],
#                           prior_scale=params[2]
#                          )
    #ADD REGRESSOR
    m = m.add_regressor('mean_temp')
    
    #FIT MODEL
    m.fit(trainr)
    
    #CREATE CROSS VALIDATION DF
    df_cv_loop = cross_validation(m, initial='1461 days', period='36 days', horizon = '3 days') 
    #get 1 day/3day metrics
    df_cv_loop_1d = performance_metrics(df_cv_loop, rolling_window=.33, metrics=metrics)
    df_cv_loop_3d = performance_metrics(df_cv_loop, rolling_window=1, metrics=metrics)
    
    #STORE MODEL PARAMETERS AND PERFORMANCE
    dict = {
        'daily_order': params[2],
        'weekly_order': params[3],
        'daily_scale': params[0],
        'weekly_scale': params[1],
        '1d_rmse': df_cv_loop_1d.loc[0,'rmse'],
        '1d_mape': df_cv_loop_1d.loc[0,'mape'],
        '3d_rmse': df_cv_loop_3d.loc[0,'rmse'],
        '3d_mape': df_cv_loop_3d.loc[0,'mape']  
    }
    model_perf = model_perf.append(dict,ignore_index=True)
