# Seasonal ARIMA Modeling 
This script contains code for running Seasonal ARIMA Models. There is code for rolling forecasts, dynamic forecasting, and predicting future values. Initial work for a grid search was written as well. 

# Import packages and load functions

In [None]:
import pandas as pd #for data analysis/manipulation
import numpy as np
#from azureml import Workspace # connect to the Azure environment 
import pyodbc # connect to the database
import matplotlib.pyplot as plt # plotting package 
import time
import pytz
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error, mean_absolute_error 
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.api import arma_order_select_ic
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import datetime
from datetime import timedelta
from statsmodels.tsa.statespace.sarimax import SARIMAX


In [None]:
def time_fmt(time_entry, original_tz = 'US/Pacific', new_tz = 'US/Eastern'):
    '''Convert the timezone for a timestamp object'''
    input_time = time_entry.replace(tzinfo=pytz.timezone(original_tz))
    conv_time = input_time.astimezone(pytz.timezone(new_tz))
    return conv_time


def mape_calc(actual, predicted):
    act, pred = np.array(actual), np.array(predicted)
    mape = np.mean(np.abs((act - pred)/act)*100)
    return mape

def accuracy_metrics(actual, predicted, print_values = True):

    metrics = []
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    mape = mape_calc(actual, predicted)
    
    metrics.append(mse)
    metrics.append(rmse)
    metrics.append(mae)
    metrics.append(mape)
    
    if print_values == True: 
        print('Accuracy Metrics')
        print('MSE: {}'.format(mse))
        print('RMSE: {}'.format(rmse))
        print('MAE: {}'.format(mae))
        print('MAPE: {}'.format(mape))
    
    return metrics

def inverse_difference(data, yhat, interval = 1):
    return yhat + data[-interval]

def predictions_plot(test, predictions, days= 14, col = 'target col'):

    test_sub = test.head(days)
    test_sub['Predictions'] = predictions
    test_sub = test_sub.reset_index()
    test_sub['Day'] = test_sub['Date'].apply(lambda x: x.weekday_name)
    
    plt.plot(test_sub['Date'], test_sub['Predictions'], color = 'red', label = 'Predictions')
    plt.plot(test_sub['Date'], test[col][:days].values)
    plt.title('Forecast Two Weeks Out')
    plt.xticks(rotation = 45)
    plt.legend(loc = 'best')
    plt.show()
    
    return test_sub

def sarima_model_run(data, col, order, seasonal_order):

    data= data[['Date', col]]
    train = data[data['Date'] < '2019-05-01']
    test = data[data['Date']>= '2019-05-01']
    #test = test[:14].reset_index().drop('index', axis = 1)
    train = train.set_index('Date')
    
    sarima_model = SARIMAX(train[col], order=order, seasonal_order = seasonal_order)
    sarima_fit = sarima_model.fit()
    print(sarima_fit.summary())
    
    results = sarima_fit.fittedvalues.to_frame()

    test14 = test[col][:14]

    # forecast and accuracy metrics 
    forecast14 = sarima_fit.forecast(14)
    forecast_adj = [0 if x < 0 else x for x in forecast14]
    
    accuracy_metrics(test14, forecast_adj)
    
    predictions_plot(test, forecast_adj, col = col )
    
    
def walk_forward_validation(data, col, order, seasonal_order, holidays = False, split_date = '2019-05-01', 
                            forecast_days = 14, forecast_window = 1, exog_vars = ['Holiday Flag'], print_values = True):
    ''' This function retrains the mondel at the forecast_window step size. '''
    predictions = []
    percent_error = []
    if holidays == False: 
        # split dataset
        data= data[['Date', col]]
        train = data[data['Date'] < split_date]
        test_full = data[data['Date']>= split_date]

        test_df = test_full[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)

        train = train[col].values
        test = test_full[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            # fit model, make forecast 
            sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order )
            sarima_model_fit = sarima_model.fit(disp=False)
            yhat = sarima_model_fit.forecast(forecast_window)
            fcast_int = sarima_model_fit.get_forecast(forecast_window)
            predictions.append(yhat[forecast_window -1])
            pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
            if print_values == True: 
                print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
            # add actual observation to history for the next loop
            history.append(test[i])
            percent_error.append(pct_error)

        forecast_adj = [0 if x < 0 else x for x in predictions]
        
        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)
        
        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:]) 

        pred_df = predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
        
        #test_plot = test_full[test_full['Date'] >= pd.to_datetime(split_date, format='%Y-%m-%d') + timedelta(days=forecast_window)]
        #print(test_plot)
        #predictions_plot_conf(test_plot, fcast_int, col = col, days = forecast_days)
    
    else: 
        train = data[data['Date'] < split_date]
        test_full = data[data['Date']>= split_date]

        #train_holidays = train['Holiday Flag'].values
        #test_holidays14 = test_full[['Date','Holiday Flag']].set_index('Date')
        #train_exog = train[['Holiday Flag','DayBeforeHoliday','DayAfterHoliday', 'IsMonthStart']].values
        train_exog = train[exog_vars].values
        #test_exog = test_full[['Date','Holiday Flag','DayBeforeHoliday','DayAfterHoliday', 'IsMonthStart']].set_index('Date')
        exog_vars.append('Date')
        test_exog = test_full[exog_vars].set_index('Date')
        #test_holidays = test_full['Holiday Flag'].values
        
        test_df = test_full[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)
    
        train = train[col].values
        test = test_full[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            # fit model and make forecast
            #sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order, exog = train_holidays)
            #sarima_model_fit = sarima_model.fit(disp=False)
            #yhat = sarima_model_fit.forecast(forecast_window, exog = test_holidays14[i:i+2])
            #fcast_int = sarima_model_fit.get_prediction(forecast_window, exog = test_holidays14[i:i+2])
            #predictions.append(yhat[forecast_window -1])
            #pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
            #print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
            # add actual observation to history for the next loop
            #history.append(test[i])
            #train_holidays = np.append(train_holidays, test_holidays[i])
            #percent_error.append(pct_error)
            try: 
                sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order, exog = train_exog.astype(float))
                sarima_model_fit = sarima_model.fit(disp=False)
                yhat = sarima_model_fit.forecast(forecast_window, exog = test_exog[i:i+forecast_window].astype(float))

                predictions.append(yhat[forecast_window -1])
                pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
                if print_values == True:
                    print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
                # add actual observation to history for the next loop
                history.append(test[i])
                train_exog = np.vstack([train_exog, test_exog[i:i+1]])

                percent_error.append(pct_error)
            except: 
                predictions.append(999)
                history.append(test[i])
                train_exog = np.vstack([train_exog, test_exog[i:i+1]])

        forecast_adj = [0 if x < 0 else x for x in predictions]

        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)
        
        test_df = test_df[test_df['predictions'] != 999]
        
        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:]) 
        test_plot = test_full[test_full['Date'] >= pd.to_datetime(split_date, format='%Y-%m-%d') + timedelta(days=forecast_window)]
        
        pred_df = predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
        #print(fcast_int)
        #predictions_plot_conf(test_plot, fcast_int, col = col, days = forecast_days)

    return pred_df 

def two_day_ahead(data, col, order, seasonal_order, holidays = False, split_date = '2019-05-01'):
    '''Roll ahead forecasting with step size of 2 time periods.'''
    predictions_one_day = []
    predictions_two_day = []
    # split dataset
    data = data[['Date', col]]
    train = data[data['Date'] < split_date]
    test_df = data[data['Date']>= split_date]

    test_df = test_df[:3].reset_index().drop('index', axis = 1)

    train = train[col].values
    test = test_df[col].values

    history = [x for x in train]

    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model, make forecast 
        sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order)
        sarima_model_fit = sarima_model.fit(disp=False)
        yhat = sarima_model_fit.forecast(2)
        #print(yhat)
        #print(yhat[0])
        predictions_one_day.append(yhat[0])
        predictions_two_day.append(yhat[1])
        print('iteration {}: {} actual: {}'.format(i, yhat[0], test[i]))
        # add actual observation to history for the next loop
        history.append(test[i])

    forecast_adj_one_day = [0 if x < 0 else x for x in predictions_one_day]

    forecast_adj_two_day = [0 if x < 0 else x for x in predictions_two_day]


    test_df['one_day'] = forecast_adj_one_day
    test_df['two_day'] = forecast_adj_two_day
    test_df['two_day'] = test_df['two_day'].shift()
    
    print(test_df[2:])
    accuracy = accuracy_metrics(test, forecast_adj_one_day)
    accuracy = accuracy_metrics(test[2:], test_df['two_day'][2:])

    predictions_plot(test_df[2:], forecast_adj_one_day[2:], col = 'target col', days= 2)

    return test_df

def dynamic_forecasting(data, col, order, seasonal_order, holidays = False, split_date = '2019-05-01', 
                            forecast_days = 14, forecast_window = 1, exog_vars = ['Holiday Flag'], print_values = True):
    predictions = []
    percent_error = []
    if holidays == False: 
        # split dataset
        data= data[['Date', col]]
        train = data[data['Date'] < split_date]
        test_full = data[data['Date']>= split_date]

        test_df = test_full[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)

        train = train[col].values
        test = test_full[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            # fit model, make forecast 
            sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order )
            sarima_model_fit = sarima_model.fit(disp=False)
            yhat = sarima_model_fit.forecast(forecast_window)
            fcast_int = sarima_model_fit.get_forecast(forecast_window)
            predictions.append(yhat[forecast_window -1])
            pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
            if print_values == True: 
                print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
            # add actual observation to history for the next loop
            history.append(test[i])
            percent_error.append(pct_error)

        forecast_adj = [0 if x < 0 else x for x in predictions]
        
        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)
        
        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:]) 

        pred_df = predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
        
        #test_plot = test_full[test_full['Date'] >= pd.to_datetime(split_date, format='%Y-%m-%d') + timedelta(days=forecast_window)]
        #print(test_plot)
        #predictions_plot_conf(test_plot, fcast_int, col = col, days = forecast_days)
    
    else: 
        train = data[data['Date'] < split_date]
        test_full = data[data['Date']>= split_date]
        train_exog = train[exog_vars].values
        exog_vars.append('Date')
        test_exog = test_full[exog_vars].set_index('Date')
        
        test_df = test_full[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)
        print(train[['Date', 'target col']].tail())
        train = train[col].values
        test = test_full[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            try: 
                sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order, exog = train_exog.astype(float))
                sarima_model_fit = sarima_model.fit(disp=False)
                yhat = sarima_model_fit.forecast(forecast_window, exog = test_exog[i:i+forecast_window].astype(float))
                predictions.append(yhat[forecast_window -1])
                pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
                if print_values == True:
                    print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
                #print(yhat)
                #print(yhat[0])
                #print(test[i])
                # add actual observation to history for the next loop
                #history.append(test[i])
                # appends the predicted value to the train set 
                history.append(yhat[0])
                train_exog = np.vstack([train_exog, test_exog[i:i+1]])
                percent_error.append(pct_error)
            except: 
                predictions.append(999)
                # filling in the missing with the actual.. in the real code filled in with the average
                history.append(test[i])
                train_exog = np.vstack([train_exog, test_exog[i:i+1]])

        forecast_adj = [0 if x < 0 else x for x in predictions]

        print(len(predictions))

        print(len(test_df))
        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)
        #print(test_df)
        test_df = test_df[test_df['predictions'] != 999]
        
        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:]) 
        test_plot = test_full[test_full['Date'] >= pd.to_datetime(split_date, format='%Y-%m-%d') + timedelta(days=forecast_window)]
        
        pred_df = predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
        #print(fcast_int)
        #predictions_plot_conf(test_plot, fcast_int, col = col, days = forecast_days)

    return test_df

## FORECASTING FUTURE VALUES ##
def create_exog_data(data, vars_list, start):
    start = start 
    #start = datetime.datetime.today()
    vars_set = vars_list 
    print(vars_set)
    train_exog = data[vars_set].values
    train_exog = train_exog.astype(float)
    
    vars_set.append('Date')
    
    # create the exog variables for the prediction window     
    dt = pd.date_range(start, start +timedelta(days=19)) # if running on friday have to remove 2 sets of weekends
    two_week_df = pd.DataFrame(pd.to_datetime(dt))
    two_week_df['Date'] = pd.DataFrame(two_week_df[0].dt.date)
    two_week_df['Weekday'] = two_week_df[0].dt.day_name()
    two_week_df['Date'] = pd.to_datetime(two_week_df['Date'], format='%Y-%m-%d')

    # remove the weekends 
    two_week_df = two_week_df[(two_week_df['Weekday'] != 'Saturday') & (two_week_df['Weekday'] != 'Sunday')]
    
    cal = calendar()
    holidays_list = cal.holidays(start = start, end = start + timedelta(days=19), return_name= True) 
    holidays = holidays_list.to_frame(name= 'Holiday Name').rename_axis('Date').reset_index()
    beforeholiday = (holidays['Date'] + timedelta(days=-1)).tolist()
    afterholiday = (holidays['Date'] + timedelta(days=1)).tolist()
    # holiday name column to the data frame
    two_week_df = pd.merge(two_week_df, holidays,how ='left', on='Date')
    
    # Holiday Flags 
    two_week_df['Holiday Flag'] = two_week_df['Holiday Name'].notnull().astype(int)
    two_week_df['DayAfterHoliday'] = two_week_df['Date'].apply(lambda x: 1 if x in afterholiday else 0)
    two_week_df['DayBeforeHoliday'] = two_week_df['Date'].apply(lambda x: 1 if x in beforeholiday else 0)
    # add first of the month flag 
    two_week_df['IsMonthStart'] = two_week_df['Date'].apply(lambda x: x.is_month_start)
    # flag the first monday of the month
    two_week_df['FirstMonday'] = two_week_df['Date'].apply(lambda x: 1 if (((x.day-1) // 7 + 1) == 1) & (x.dayofweek == 0) else 0 )
    
    pred_exog = two_week_df[vars_set].set_index('Date')
    
    return train_exog, pred_exog
    
def run_sarima(data, col, order, seasonal_order, forecast_days, exog_data = True, exog_vars = ['Holiday Flag'], start = datetime.datetime.today()):
    
    data = data[data['Date'] < start]
    #two_day = []
    #two_week = []
    exog = exog_vars
    if exog_data == True:
        print('Exog_list: {}'.format(exog))
        train_exog, pred_exog = create_exog_data(data, exog, start)
        #print(data[col].head())
        #print(train_exog)
        sarima_model = SARIMAX(data[col], order=order, seasonal_order = seasonal_order, exog = train_exog)
        sarima_model_fit = sarima_model.fit(disp=False)
        yhat = sarima_model_fit.forecast(14, exog = pred_exog.astype(float))
        #two_day.append(yhat[1])
        yhat = pd.DataFrame(yhat).reset_index()
        
    else: 
        sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order)
        sarima_model_fit = sarima_model.fit(disp=False)
        yhat = sarima_model_fit.forecast(forecast_window)
        yhat = pd.DataFrame(yhat).reset_index()
    
    pred_exog = pred_exog.reset_index()
    pred_exog['{} Forecast'.format(col)] = yhat[0]
    
    return pred_exog


## SARIMA 

In [None]:
fig, ax = plt.subplots(2, figsize=(8,8))
x = la['target col']
ax[0] = plot_acf(x, ax=ax[0], lags = 25)
ax[1] = plot_pacf(x, ax=ax[1], lags = 25)
#ax[2].plot(x)

In [None]:
train = df[df['Date'] < '2019-05-01']
test = df[df['Date']>= '2019-05-01']
print(train.head(10))
train = train.set_index('Date')

print(len(train))
print(len(test))

In [None]:
sarima_model_run(df, 'target col', order=(5,0,5), seasonal_order = (0,1,2,5))

## Add Holidays as exogenous variables

In [None]:
train = df[df['Date'] < '2019-05-01']
test = df[df['Date']>= '2019-05-01']

train_holidays = train['Holiday Flag'].values
test_holidays14 = test[['Date','Holiday Flag']][:14].set_index('Date')

In [None]:
sarima_model = SARIMAX(train['target col'], order=(5,0,5), seasonal_order = (0,1,2,7), exog = train_holidays)
sarima_fit = sarima_model.fit()
print(sarima_fit.summary())

results = sarima_fit.fittedvalues.to_frame()

test14 = test['taret col'][:14]

# forecast and accuracy metrics 
forecast14 = sarima_fit.forecast(14, exog = test_holidays14)
forecast_adj = [0 if x < 0 else x for x in forecast14 ]

accuracy_metrics(test14, forecast_adj)
predictions_plot(test, forecast_adj)

# Day Ahead Forecasts 

In [None]:
%time
walk_forward_validation(df, 'target col', (5,0,5),(0,1,2,7))

In [None]:
act, pred = walk_forward_validation(df, 'target col', (5,0,5),(0,1,2,7), holidays = True)

In [None]:
act['predicted'] = pred 
act =act[['Date', 'target col', 'predicted']]
act

In [None]:
two_day_ahead(df, 'target col', (5,0,5), (0,1,2,7), split_date = '2019-04-01')

## Cross Validation on 90 Days

In [None]:
pct_e = walk_forward_validation(df, 'target col', (5,0,5), (1,1,0,5), split_date = '2019-03-01', holidays = False, forecast_days = 60, forecast_window = 2)

### Rolling 2 day forecasts with Holidays 

In [None]:
pct_e = walk_forward_validation(df, 'target col', (5,0,5), (1,1,0,5), split_date = '2019-03-01', holidays = True, forecast_days = 90, forecast_window = 2)

# SARIMA Parameters Grid Search 

In [None]:
def sarima_cv(data, col, order, seasonal_order, holidays = False, split_date = '2019-05-01', 
                            forecast_days = 14, forecast_window = 1):
    predictions = []
    percent_error = []
    if holidays == False: 
        # split dataset
        data= data[['Date', col]]
        train = data[data['Date'] < split_date]
        test = data[data['Date']>= split_date]

        test_df = test[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)

        train = train[col].values
        test = test[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            # fit model, make forecast 
            sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order )
            sarima_model_fit = sarima_model.fit(disp=False)
            yhat = sarima_model_fit.forecast(forecast_window)
            predictions.append(yhat[forecast_window -1])
            pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
            #print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
            # add actual observation to history for the next loop
            history.append(test[i])
            percent_error.append(pct_error)

        forecast_adj = [0 if x < 0 else x for x in predictions]
        
        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)
        
        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:]) 

        #predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
    
    else: 
        train = data[data['Date'] < split_date]
        test = data[data['Date']>= split_date]

        train_holidays = train['Holiday Flag'].values
        test_holidays14 = test[['Date','Holiday Flag']].set_index('Date')
        test_holidays = test['Holiday Flag'].values
        
        test_df = test[:forecast_days + forecast_window].reset_index().drop('index', axis = 1)
    
        train = train[col].values
        test = test[col].values

        history = [x for x in train]

        # step over each time-step in the test set
        for i in range(len(test_df)):
            # fit model and make forecast
            sarima_model = SARIMAX(history, order=order, seasonal_order = seasonal_order, exog = train_holidays)
            sarima_model_fit = sarima_model.fit(disp=False)
            yhat = sarima_model_fit.forecast(forecast_window, exog = test_holidays14[i:i+2])
            predictions.append(yhat[forecast_window -1])
            pct_error = (test[i+1] - yhat[forecast_window - 1])/test[i+1]*100
            #print('iteration {}: {} actual: {} pct_error: {} '.format(i, yhat[forecast_window -1], test[i+1], pct_error))
            # add actual observation to history for the next loop
            history.append(test[i])
            train_holidays = np.append(train_holidays, test_holidays[i])
            percent_error.append(pct_error)

        forecast_adj = [0 if x < 0 else x for x in predictions]

        test_df['predictions'] = predictions
        test_df['predictions'] = test_df['predictions'].shift(forecast_window-1)

        accuracy = accuracy_metrics(test_df[col][forecast_window:], test_df['predictions'][forecast_window:], print_values = False) 

        #predictions_plot(test_df[forecast_window:], test_df['predictions'][forecast_window:], col = col, days= forecast_days)
    

    return accuracy

In [None]:
# working on CV loop for the SARIMA parameters 
def sarima_cv_run(data, col, order, holidays = False, split_date= '2019-05-01', forecast_days =14, forecast_window = 1):

    parameters = []
    mse = []
    rmse = []
    mae = []

    P_list = [0,1,2]
    D_list = [0,1]
    Q_list = [0,1,2]

    for P in P_list: 
        for D in D_list:
            for Q in Q_list:
                try: 
                    acc_metrics = sarima_cv(data, col, order, (P,D,Q, 5), holidays = holidays, split_date = split_date, forecast_days = forecast_days, forecast_window = forecast_window)
                    parameters.append('({})({},{},{},5)'.format(order,P,D,Q))
                    mse.append(acc_metrics[0])
                    rmse.append(acc_metrics[1])
                    mae.append(acc_metrics[2])
                    print('Model: ({})({},{},{},5)  RMSE: {}  MAE:{}'.format(order,P,D,Q,acc_metrics[1], acc_metrics[2] ))
                except: 
                    continue
    #resultsdf = pd.DataFrame(parameters, mse, rmse, mae, columns = ['Parameters', 'MSE', 'RMSE', 'MAE'])
    resultsdf = pd.DataFrame()
    resultsdf['Parameters'] = parameters
    resultsdf['MSE'] = mse
    resultsdf['RMSE'] = rmse
    resultsdf['MAE'] = mae
    
    min_rmse = resultsdf['RMSE'].min()
    min_mae = resultsdf['MAE'].min()
    
    best_params_rmse = resultsdf[resultsdf['RMSE'] == min_rmse]['Parameters']
    best_params_mae = resultsdf[resultsdf['MAE'] == min_mae]['Parameters']
    
    return resultsdf, best_params_rmse, best_params_mae
            



In [None]:
results, best_rmse, best_mae = sarima_cv_run(df, 'target col', (5,0,5), holidays = True, split_date= '2019-03-01', forecast_days = 60, forecast_window = 2)

In [None]:
results.head()

In [None]:
best_rmse

## Multiple Exogenous Variables 

In [None]:
train = df[df['Date'] < '2019-05-01']
test = df[df['Date']>= '2019-05-01']

train_holidays = train['Holiday Flag'].values
test_holidays14 = test[['Date','Holiday Flag']][:14].set_index('Date')

train_exog = train[['Holiday Flag','DayBeforeHoliday','DayAfterHoliday', 'IsMonthStart']].values
test_exog = test[['Date','Holiday Flag','DayBeforeHoliday','DayAfterHoliday', 'IsMonthStart']][:14].set_index('Date')

In [None]:
type(train_exog)
#type(test_exog)

In [None]:
train_exog

In [None]:
sarima_model = SARIMAX(train['target col'], order=(5,0,5), seasonal_order = (1,1,1,5), exog = train_exog.astype(float))
sarima_fit = sarima_model.fit()
print(sarima_fit.summary())

results = sarima_fit.fittedvalues.to_frame()

test14 = test['target col'][:14]

# forecast and accuracy metrics 
forecast14 = sarima_fit.forecast(14, exog = test_exog.astype(float))
pred = sarima_fit.get_prediction(14, )
forecast_adj = [0 if x < 0 else x for x in forecast14 ]

accuracy_metrics(test14, forecast_adj)
predictions_plot(test, forecast_adj)

In [None]:
pred_df = walk_forward_validation(df, 'target col', (5,0,5), (1,1,1,5), split_date = '2019-03-01', holidays = True, 
                                  forecast_days = 90, forecast_window = 2, 
                                  exog_vars = ['Holiday Flag', 'IsMonthStart', 'DayBeforeHoliday', 'DayAfterHoliday', 'FirstMonday'])

# Forcasting Future Values

In [None]:
start = datetime.datetime.today()
start

In [None]:
dt = pd.date_range(start, start +timedelta(days=18)) # if running on friday have to remove 2 sets of weekends
dt_df = pd.DataFrame(pd.to_datetime(dt))
dt_df['Date'] = pd.DataFrame(dt_df[0].dt.date)
dt_df['Weekday'] = dt_df[0].dt.day_name()
dt_df['Date'] = pd.to_datetime(dt_df['Date'], format='%Y-%m-%d')
# remove the weekends 
dt_df = dt_df[(dt_df['Weekday'] != 'Saturday') & (dt_df['Weekday'] != 'Sunday')]
print(len(dt_df))
print(dt_df.dtypes)
dt_df

In [None]:
run_sarima(df, 'target col', (5,0,5), (1,1,0,5), 14, exog_data = True)