In [None]:
# TimeSeriesMultiReg

In [111]:
# -*- coding: utf-8 -*-
"""
Created on Tue Nov  6 14:44:31 2018
@author: ahellman
"""
import numpy as np
import pandas as pd
import itertools
#import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
#from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from fbprophet import Prophet
#from fbprophet.diagnostics import cross_validation
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA


class TimeSeriesMultiReg():
    
    def __init__(self):
        pass
    
    
    def _Run_Regressions(
        self,
        df,
        forecastDays,
        region="None",
        forecasts={"holtwinters", "arima", "sarima", "prophet"}
    ):
        """This is the main working function to:
            1. break out the arrays from the dataset
            2. Call the other functions
            3. Run the MAPE on each of the functions"""
        
        df = self.format_df(df)
        

        actualsDict = self.dimensionDictionary(df)
        
        actualsDictKey = []
        for key in actualsDict:
            actualsDictKey.append(key)  
        
        # Fill dictionary with unique intersections of the actuals data:
        for i in range(0, len(actualsDictKey)):
            dfFiltered = df.iloc[0:,1].isin(actualsDictKey[i])
            for j in range(1, len(df.columns)-1):
                dfFiltered &= df.iloc[0:,j].isin(actualsDictKey[i])
            
            actualsDict[actualsDictKey[i]] = df[dfFiltered]
        # Fill dictionary with unique uintersections of the actuals data:

        # Dict mapping subdimensions to (model, forecast) tuples.
        # model, forecast = forecast_results[subdimension] <- how to call.
        forecast_results = {}

        for subDimension in actualsDictKey:
            # Initialize the list of forecast results for the subdimension.
            forecast_results[subDimension] = []
            if "prophet" in forecasts:
                try:
                    if region == "None":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnNone(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                    elif region == "NA":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnNA(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                    elif region == "UK":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnUK(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                    elif region == "FR":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnUK(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                    elif region == "CE":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnCE(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                    elif region == "SoEu":
                        forecast_results[subDimension].append((
                            "Prophet",
                            self.ProphetFxnSoEu(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        ))
                except Exception:
                    pass  # If we can't run this without generating exceptions, skip.
            
            if "holtwinters" in forecasts:
                try:
                    forecast_results[subDimension].append(
                        (
                            "HW",
                            self.HWFxn(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        )
                    )
                except Exception:
                    pass  # If we can't run this without generating exceptions, skip.
            
            if "sarima" in forecasts:
                try:
                    forecast_results[subDimension].append(
                        (
                            "Sarima",
                            self.SarimaFxn(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        )
                    )
                except Exception:
                    pass  # If we can't run this without generating exceptions, skip.
            
            if "arima" in forecasts:
                try:
                    forecast_results[subDimension].append(
                        (
                            "Arima",
                            self.ArimaFxn(
                                actualsDict[subDimension],
                                forecastDays
                            )
                        )
                    )
                except Exception:
                    pass  # If we can't run this without generating exceptions, skip.
        
        # Bail if no forecasts were executed.
        if not forecast_results:
            raise ValueError("Unable to execute any forecasts.")
            
        
        # A dictionary mapping the subdimensions to a list of (model, MAPE) tuples.
        # model, mape = mape_results[subdimension] <- how to call.
        mape_results = {}
        for subDimension in actualsDictKey:
            # Initialize to an empty list for the specified subdimension.
            mape_results[subDimension] = []
            for model, forecast in forecast_results[subDimension]:
                mape_results[subDimension].append((
                    model, 
                    self.mapeCalc(
                        forecast[:len(actualsDict[subDimension])],
                        actualsDict[subDimension]['y']
                    )
                ))
        
        # A dictionary mapping the subdimension to a tuple of (model, mape, forecast)
        # model, mape, forecast = bestMapes[dimension] <- Usage.
        bestModels = {}
        for subDimension in actualsDictKey:
            bestModel = None
            for model_forecast, model_mape in zip(forecast_results[subDimension], mape_results[subDimension]):
                model, forecast = model_forecast
                model, mape = model_mape

                if (bestModel is None) or ((bestModel[1] < mape).all()):
                    bestModel = (model, mape, forecast)
            bestModels[subDimension] = bestModel
            #bestMapeForecastModel[subDimension] = (mape, forecast, model)
            lenforecast = len(actualsDict[subDimension])
            lenactualsDict = len(actualsDict[subDimension]['y'])
            dfff = (forecast[:len(actualsDict[subDimension])], actualsDict[subDimension]['y'])

        return bestModels
        #return bestMapeForecastModel
        #return lenactualsDict
        #return dfff
    


    def format_df(self, df):
        if 'ds' not in df.columns:
            try:
                df = df.rename(columns={'Date': 'ds'})
            except:
                df = df.rename(columns={'date': 'ds'})
        
        if 'y' not in df.columns:
            try:
                df = df.rename(columns={df.columns[len(df.columns)-1:][0]: 'y'})
            except:
                pass
        
        return df

    
    def mapeCalc(self, pf, xf):

        dff = pd.concat([pf,xf],axis=1)
        dff.columns = ['predicted', 'real']
        dff['abspctdiff'] = abs(dff['predicted']-dff['real'])/dff['real']
        mape = np.mean(dff['abspctdiff'])
        
        return mape
    

    def dimensionDictionary(self, df):
        
        # Build new arrays to store the individual forecasts - all of this will be stored in a dictionary
        actualsDict = {} #Dictionary to hold all the dataframes of the forecasts
        actualsDictKey = [] #Arry to hold all the dictionary keys
    
        uniqueList = [] #Find and store all the uniqe dimensions of the data
        for i in range(0,len(df.columns)):
            if df.columns[i] != 'ds' and df.columns[i] != 'y':
                uniqueList.append(pd.unique(df.iloc[0:,i].values))
        
        #Combine the arrays in the uniqueList dic into one array of all combinations:
        iterList = list(itertools.product(*uniqueList))
        for i in iterList:
            actualsDictKey.append(i)
        
        #Combine the arrays in the uniqueList dic into one array of all combinations:
        #for i in range(0, len(uniqueList[0])-1):
        #    actualsDictKey.append(uniqueList[0][i])
        
        #Add an empty array into all the intersectons of the unique values
        for i in range(0, len(actualsDictKey)):
            actualsDict[actualsDictKey[i]] = []
            
        return actualsDict
       
        
    def ProphetFxnNA(self, df, daysForecast):
        
        easter = pd.DataFrame({
          'holiday': 'easterSunday',
          'ds': pd.to_datetime(['2010-04-04','2011-04-24','2012-04-08','2013-03-31','2014-04-20',
                                '2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31']),
          'lower_window': -2,
          'upper_window': 0,
        })
    
        memorial = pd.DataFrame({
          'holiday': 'memorialMonday',
          'ds': pd.to_datetime(['2010-05-31','2011-05-30','2012-05-28','2013-05-27','2014-05-26',
                                '2015-05-25','2016-05-30','2017-05-29','2018-05-28','2019-05-27',
                                '2020-05-25','2021-05-31','2022-05-30','2023-05-29','2024-05-27']),
          'lower_window': -2,
          'upper_window': 0,
        })
    
        laborday = pd.DataFrame({
          'holiday': 'laborMonday',
          'ds': pd.to_datetime(['2010-09-6','2011-09-5','2012-09-3','2013-09-2','2014-09-1',
                                '2015-09-7','2016-09-5','2017-09-4','2018-09-3','2019-09-2',
                                '2020-09-7','2021-09-6','2022-09-5','2023-09-4','2024-09-2']),
          'lower_window': -2,
          'upper_window': 0,
        })
        
        thxgiving = pd.DataFrame({
          'holiday': 'thanksgiving',
          'ds': pd.to_datetime(['2010-11-25','2011-11-24','2012-11-22','2013-11-28','2014-11-27',
                                '2015-11-26','2016-11-24','2017-11-23','2018-11-22','2019-11-28',
                                '2020-11-26','2021-11-25','2022-11-24','2023-11-23','2024-11-28',]),
          'lower_window': 0,
          'upper_window': 1,
        })
    
        holidays = pd.concat((easter, memorial, laborday, thxgiving))
        
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']

    
    def ProphetFxnCE(self, df, daysForecast):
    
        easter = pd.DataFrame({
              'holiday': 'easterSunday',
              'ds': pd.to_datetime(['2010-04-04','2011-04-24','2012-04-08','2013-03-31','2014-04-20',
                                    '2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                    '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31']),
              'lower_window': -2,
              'upper_window': 1,
            })
        
        ascensionDay = pd.DataFrame({
              'holiday': 'ascensionDay',
              'ds': pd.to_datetime(['2014-05-29','2015-05-14','2016-05-05','2017-05-25','2018-05-10',
                                    '2019-05-30','2020-05-21','2021-05-13','2022-05-26','2023-05-18','2024-05-09',]),
              'lower_window': 0,
              'upper_window': 2,
            })
        
        whit = pd.DataFrame({
              'holiday': 'whitMonday',
              'ds': pd.to_datetime(['2014-06-09','2015-05-25','2016-05-16','2017-06-05','2018-05-21',
                                    '2019-06-10','2020-06-01','2021-05-24','2022-06-06','2023-05-29','2024-05-20',]),
              'lower_window': -1,
              'upper_window': 0,
            })
        
        holidays = pd.concat((easter, ascensionDay, whit))
    
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']
    

    def ProphetFxnFR(self, df, daysForecast):
        
        easter = pd.DataFrame({
              'holiday': 'easterSunday',
              'ds': pd.to_datetime(['2010-04-04','2011-04-24','2012-04-08','2013-03-31','2014-04-20',
                                    '2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                    '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31']),
              'lower_window': -2,
              'upper_window': 1,
            })
        
        ascensionDay = pd.DataFrame({
              'holiday': 'ascensionDay',
              'ds': pd.to_datetime(['2014-05-29','2015-05-14','2016-05-05','2017-05-25','2018-05-10',
                                    '2019-05-30','2020-05-21','2021-05-13','2022-05-26','2023-05-18','2024-05-09',]),
              'lower_window': 0,
              'upper_window': 2,
            })
        
        whit = pd.DataFrame({
              'holiday': 'whitMonday',
              'ds': pd.to_datetime(['2014-06-09','2015-05-25','2016-05-16','2017-06-05','2018-05-21',
                                    '2019-06-10','2020-06-01','2021-05-24','2022-06-06','2023-05-29','2024-05-20',]),
              'lower_window': -1,
              'upper_window': 0,
            })
        
        holidays = pd.concat((easter, ascensionDay, whit))
        
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']
    

    def ProphetFxnNone(self, df, daysForecast):
    
        m = Prophet(daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']
    

    def ProphetFxnSoEu(self, df, daysForecast):
        
        easter = pd.DataFrame({
              'holiday': 'easterSunday',
              'ds': pd.to_datetime(['2010-04-04','2011-04-24','2012-04-08','2013-03-31','2014-04-20',
                                    '2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                    '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31']),
              'lower_window': -2,
              'upper_window': 1,
            })
        
        holidays = pd.concat((easter))
        
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']
    
    
    def ProphetFxnUK(self, df, daysForecast):
    
        easter = pd.DataFrame({
              'holiday': 'easterSunday',
              'ds': pd.to_datetime(['2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                    '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31','2025-04-20']),
            
              'lower_window': -2,
              'upper_window': 1,
            })
        
        #First monday in may
        mayDay = pd.DataFrame({
              'holiday': 'mayday',
              'ds': pd.to_datetime(['2015-05-04','2016-05-02','2017-05-01','2018-05-07','2019-05-06',
                                    '2020-05-04','2021-05-03','2022-05-02','2023-05-01','2024-05-06','2025-05-05']),
              'lower_window': -1,
              'upper_window': 0,
            })
        
        #Last monday in may
        springBank = pd.DataFrame({
              'holiday': 'springbank',
              'ds': pd.to_datetime(['2015-05-25','2016-05-30','2017-05-29','2018-05-28','2019-05-27',
                                    '2020-05-25','2021-05-31','2022-05-30','2023-05-29','2024-05-27','2025-05-26']),
              'lower_window': -1,
              'upper_window': 0,
            })
        
        summerbank = pd.DataFrame({
              'holiday': 'springbank',
              'ds': pd.to_datetime(['2015-08-31','2016-08-29','2017-08-28','2018-08-27','2019-08-26','2020-08-31',
                                    '2021-08-30','2022-08-29','2023-08-28','2024-08-26','2025-08-25']),
              'lower_window': -1,
              'upper_window': 0,
        })
        holidays = pd.concat((easter, mayDay, springBank, summerbank))
        
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        return forecast['yhat']
    
    
    def SarimaFxn(self, df, fcstdays):

        series = df['y']
    
        # evaluate parameters
        p_values = range(0, 3)
        d_values = range(0, 2)
        q_values = range(0, 3)
        P_values = range(0, 3)
        D_values = range(0, 2)
        Q_values = range(0, 3)
        m = 7 #weekly seasonality
    
        fcstdays = 365
    
        best_score, best_params, best_seasonal_params = float("inf"), None, None
        for p in p_values:
            for d in d_values:
                for q in q_values:
                    for P in P_values:
                        for D in D_values:
                            for Q in Q_values:
                                order = (p,d,q)
                                sorder = (P,D,Q,m)
                                try:
                                    mse = self.evaluate_sarima_model(series, order, sorder, fcstdays)
                                    if mse < best_score:
                                        best_score, best_params, best_seasonal_params = mse, order, sorder
                                except:
                                    # TODO: Logging.
                                    pass
        #print('Best SARIMA %s %s MSE=%.3f' % (best_params, best_seasonal_params, best_score))
        #print('Best Params: ', best_params)
    
    
        # apply model
        model = sm.tsa.statespace.SARIMAX(series,
                                          order=(best_params[0],best_params[1],best_params[2]),
                                          seasonal_order=(best_seasonal_params[0],best_seasonal_params[1],best_seasonal_params[2],m),
                                          enforce_stationarity=True,
                                          enforce_invertibility=True)
        model_fit = model.fit(transparams=True, maxiter=50, method='newton')
        print(model_fit.summary())
    
        pred_fcst = model_fit.predict(start=0, end=len(df['y'])-1, dynamic=False)
        sarima_forecast = model_fit.forecast(fcstdays)
        sarima_series = pred_fcst.append(sarima_forecast)
    
        return sarima_series

    
    def evaluate_sarima_model(self, X, order, sorder, fcstdays):
        # make predictions
        predictions = list()
        model = sm.tsa.statespace.SARIMAX(X,order=order,
                                          seasonal_order=sorder,
                                          enforce_stationarity=True,
                                          enforce_invertibility=True)
        model_fit = model.fit(transparams=True, maxiter=50, method='newton')
        predictions = model_fit.predict(start=0, end=len(X)-1, dynamic=False)
        #yhat = model_fit.forecast(fcstdays)    
        # calculate out of sample error
        error = mean_squared_error(X, predictions)
        print('error = ', error)
        return error
    
    
#    def HoltWinters(self, df, daysForecast):
#
#        seasonalP = 365
#        m_mul = ExponentialSmoothing(df['y'], seasonal='mul', seasonal_periods=seasonalP).fit()
#        hwForecast = m_mul.forecast(daysForecast)
#        HWfull = m_mul.fittedvalues.append(hwForecast)
#
#        return HWfull
    
    def HWFxn(self, df, fcstdays):

        series = df['y']
    
        # evaluate parameters
        t_params = ['add', 'mul', None]
        d_params = [True, False]
        s_params = ['add', 'mul', None]
        p = 365
        b_params = [True, False]
        r_params = [True, False]
           
        #t_params = ['add']
        #d_params = [True]
        #s_params = ['add']
        #p = 365
        #b_params = [True]
        #r_params = [True]

        best_score, best_params = float("inf"), None
        series = series.astype('double')
        for t in t_params:
            for d in d_params:
                for s in s_params:
                    for b in b_params:
                        for r in r_params:
                            params = (t,d,s,p,b,r)
                            try:
                                mse = self.evaluate_hw_model(series, params, fcstdays)
                                if mse < best_score:
                                    best_score, best_params = mse, params
                                #print('HW MSE=%.3f' % (mse))
                                print('HW params', params)
                            except:
                                pass  # TODO: Add logging.
        # apply model
        model = ExponentialSmoothing(series, trend=best_params[0], damped=best_params[1], seasonal=best_params[2], seasonal_periods=best_params[3])
        model_fit = model.fit(optimized=True, use_boxcox=best_params[4], remove_bias=best_params[5])
        print(model_fit.summary())
    
        pred_fcst = model_fit.fittedvalues
        hw_forecast = model_fit.forecast(fcstdays)
        hw_series = pred_fcst.append(hw_forecast)
    
        return hw_series

    
    def evaluate_hw_model(self, X, hw_params, fcstdays):
        (t,d,s,p,b,r) = hw_params
        # make predictions
        predictions = list()
        model = ExponentialSmoothing(X, trend=t, damped=d, seasonal=s, seasonal_periods=365)
        model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
        predictions = model_fit.fittedvalues
        #yhat = model_fit.forecast(fcstdays)
        # calculate out of sample error
        error = mean_squared_error(X, predictions)
        print('error = ', error)
        print('hw_params = ', hw_params)
        return error
    
    def ArimaFxn(self, df, fcstdays):

       series = df['y']

       # evaluate parameters
       p_values = range(0, 6)
       d_values = range(0, 2)
       q_values = range(0, 6)

       fcstdays = 365

       best_score, best_params = float("inf"), None
       for p in p_values:
           for d in d_values:
               for q in q_values:
                    order = (p,d,q)
                    try:
                        mse = self.evaluate_arima_model(series, order, fcstdays)
                        if mse < best_score:
                            best_score, best_params = mse, order
                            print('ARIMA %s %s MSE=%.3f' % (order, mse))
                            print('Best ARIMA %s MSE=%.3f' % (best_params, best_score))
                            print('Best Params: ', best_params)
                    except:
                        pass  # TODO: Add logging.

       # apply model
       model = ARIMA(series, order=(best_params[0],best_params[1],best_params[2]))
       model_fit = model.fit(transparams=True)
       print(model_fit.summary())

       pred_fcst = model_fit.predict(start=1, end=len(df['y'])-1, dynamic=False)
       arima_forecast = model_fit.forecast(fcstdays)[0]
       af = pd.Series(arima_forecast)
       arima_series = pred_fcst.append(af)

       return arima_series


    def evaluate_arima_model(self, X, order, fcstdays):
       # make predictions
       predictions = list()
       model = ARIMA(X,order=order)
       model_fit = model.fit(transparams=True)
       predictions = model_fit.predict(start=1, end=len(X), dynamic=False)
       #yhat = model_fit.forecast(fcstdays)
       # calculate out of sample error
       error = mean_squared_error(X, predictions)
       print('error = ', error)
       return error

In [112]:
import pandas as pd
data = pd.read_csv('NA_NB_1_20181126.csv')
data.shape

(7414, 3)

In [113]:
# from time_series_multireg import TimeSeriesMultiReg
model = TimeSeriesMultiReg()

In [114]:
import pandas as pd
data = pd.read_csv('NA_NB_1_20181126.csv')
data.shape

model = TimeSeriesMultiReg()

import warnings
warnings.filterwarnings("ignore")
x_arima = model._Run_Regressions(data, 365, forecasts={"arima"})
#x_all = model._Run_Regressions(data, 365)
#x_hw_arima_prophet = model._Run_Regressions(data, 365, forecasts={"holtwinters","arima","prophet"})
#x_hw = model._Run_Regressions(data, 365, forecasts={"holtwinters"})
#x_sarima = model._Run_Regressions(data, 365, forecasts={"sarima"})
#x_prophet = model._Run_Regressions(data, 365, forecasts={"prophet"})


error =  156829.66863864052
error =  118251.6869925596
error =  106167.16919156243
error =  98565.10839785644
error =  97476.20377309962
error =  2852739.6788911414
error =  2758718.5033208686
error =  77813.58636071102
error =  150752.30345150686
error =  110260.89038637289
error =  107033.44485952008
error =  2514181.6629254925
error =  2751774.1313081784
error =  2751358.8621354257
error =  2750815.7056953562
error =  2750555.645498087
error =  2750121.2827913393
error =  85387.53780911754
error =  107513.96416849835
error =  107513.76986752645
error =  107207.63110718154
error =  108178.45089503893
error =  2587732.1647843886
error =  2751310.7755381027
error =  2750418.329787104
error =  2826229.787822118
error =  2764140.9785928973
error =  2762357.728912969
error =  90252.61405205808
error =  107626.17263878159
error =  107865.49949815987
error =  146529.3725586483
error =  2630379.4367515077
error =  2750490.1548901214
error =  2749809.407721363
error =  2761659.2150330488
erro

error =  154198.55171871404
error =  154273.28271410646
error =  154272.1761229918
error =  147848921.30461967
error =  154474.49587252768
                              ARMA Model Results                              
Dep. Variable:                      y   No. Observations:                 1059
Model:                     ARMA(1, 0)   Log Likelihood               -5660.530
Method:                       css-mle   S.D. of innovations             50.647
Date:                Thu, 29 Nov 2018   AIC                          11327.060
Time:                        13:28:10   BIC                          11341.955
Sample:                             0   HQIC                         11332.705
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        316.2751     51.364      6.158      0.000     215.604     

error =  416605.2344335437
error =  196487.5165366922
error =  6538704.979213394
error =  6610568.366968148
error =  6601889.807494861
error =  6624628.299088012
error =  1429.23658251081
error =  1767.904426798399
error =  17284.48285400101
error =  13981.886350725848
error =  26268.067268237024
error =  28210.453344723934
error =  6530405.615827411
error =  6600539.90765843
error =  6603829.879668294
error =  6601882.910133086
error =  6610679.892451039
error =  6626523.487233165
error =  1471.9036161681015
error =  11155.758688494294
error =  14657.37865908167
error =  14004.683371694362
error =  23393.52336527325
error =  32298.887369171585
error =  6565924.819552689
error =  6608180.099391213
error =  6600233.636172778
error =  6623621.8842770215
error =  6596881.4435362015
error =  6617235.086647408
error =  9178.017137512086
error =  17407.859986966192
error =  23894.54198855317
error =  26431.305921184416
error =  14115.469376195715
error =  28357.236136219522
error =  6574980.

In [115]:
import csv
with open('arima_all_yhat.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(x_arima)

In [110]:
x_prophet

1059

In [71]:
import csv
with open('prophet_all_yhat.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(x_prophet)

Error: iterable expected, not float

In [2]:
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-64-e2abd6c24e24> in <module>()
      2 warnings.filterwarnings("ignore")
      3 #x_arima = model._Run_Regressions(data, 365, forecasts={"arima"})
----> 4 x_all = model._Run_Regressions(data, 365)

<ipython-input-60-85726345dc46> in _Run_Regressions(self, df, forecastDays, region, forecasts)
    184                 model, mape = model_mape
    185 
--> 186                 if (bestModel is None) or ((bestModel[1] < mape).all()):
    187                     bestModel = (model, mape, forecast)
    188             bestModels[subDimension] = bestModel

AttributeError: 'bool' object has no attribute 'all'
    
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-65-1d6684c7257f> in <module>()
      3 #x_arima = model._Run_Regressions(data, 365, forecasts={"arima"})
      4 #x_all = model._Run_Regressions(data, 365)
----> 5 x_hw_arima_prophet = model._Run_Regressions(data, 365, forecasts={"holtwinters", "arima", "prophet"})

<ipython-input-60-85726345dc46> in _Run_Regressions(self, df, forecastDays, region, forecasts)
    184                 model, mape = model_mape
    185 
--> 186                 if (bestModel is None) or ((bestModel[1] < mape).all()):
    187                     bestModel = (model, mape, forecast)
    188             bestModels[subDimension] = bestModel

AttributeError: 'bool' object has no attribute 'all'



In [68]:
x_hw

{('Undefined',): ('HW', 0.42797985288540363, 0      1202.714761
  10     1224.839341
  20     1106.829419
  30     1171.257707
  40     1138.548503
  50     1203.069846
  60     1294.933963
  70     2061.375680
  80     1420.165528
  90     1367.114489
  100    1364.043389
  110    1385.250749
  120    1393.763467
  130    1467.999357
  140    1381.839951
  150    1200.939782
  160    1259.194835
  170    1370.732963
  180    1450.827681
  190    1450.547146
  200    1627.002621
  210    1311.806158
  220    1292.215946
  230    1268.699843
  240    1308.771147
  250    1421.896688
  260    1445.886388
  270    1482.907127
  280    1296.439311
  290    1224.147622
            ...     
  519     336.572606
  520     335.803416
  521     335.036568
  522     334.272051
  523     333.509859
  524     332.749982
  525     331.992411
  526     331.237140
  527     330.484158
  528     329.733458
  529     328.985032
  530     328.238870
  531     327.494966
  532     326.753310
  533     32

In [69]:
x_arima

{('Undefined',): ('Arima', 0.36358295101970234, 10     1172.335955
  20     1073.114499
  30     1149.484953
  40     1108.593687
  50     1168.727902
  60     1230.666145
  70     1910.182783
  80     1184.964141
  90     1242.692988
  100    1251.111778
  110    1269.152043
  120    1271.557411
  130    1331.090285
  140    1245.098356
  150    1118.816503
  160    1201.200379
  170    1281.178886
  180    1322.070153
  190    1304.029888
  200    1451.960059
  210    1163.917165
  220    1204.207090
  230    1189.173536
  240    1227.058092
  250    1312.448678
  260    1306.435256
  270    1331.691627
  280    1175.342666
  290    1152.491664
  300    1171.133271
            ...     
  335    1092.896629
  336    1092.896629
  337    1092.896629
  338    1092.896629
  339    1092.896629
  340    1092.896629
  341    1092.896629
  342    1092.896629
  343    1092.896629
  344    1092.896629
  345    1092.896629
  346    1092.896629
  347    1092.896629
  348    1092.896629
  349    

In [71]:
x_sarima

{('Undefined',): ('Sarima', 0.4395981075052672, 0         0.000000
  10     1173.644595
  20     1086.236987
  30     1150.099747
  40     1123.011468
  50     1177.022268
  60     1247.753423
  70     1915.735129
  80     1385.780862
  90     1343.640156
  100    1338.174204
  110    1361.374529
  120    1371.963986
  130    1475.530133
  140    1327.153314
  150    1239.485466
  160    1272.899783
  170    1362.206091
  180    1405.650009
  190    1401.617311
  200    1446.027011
  210    1352.535640
  220    1304.075279
  230    1293.079406
  240    1315.225839
  250    1395.518663
  260    1412.750504
  270    1454.662720
  280    1302.623529
  290    1246.163600
            ...     
  519     868.592771
  520     884.119258
  521     863.475104
  522     872.653835
  523     882.207215
  524     832.785137
  525     857.981689
  526     867.638516
  527     856.945388
  528     876.166934
  529     877.423587
  530     859.017681
  531     897.412803
  532     846.824600
  533    

In [64]:
x_prophet

{('Other',): ('Prophet', 0.31765195008575847, 0       1099.905994
  1       1043.518023
  2       1154.811333
  3       1308.227687
  4       1422.462570
  5       1486.236205
  6       1495.929152
  7       1428.149159
  8       1364.664121
  9       1464.269592
  10      1602.238414
  11      1697.935095
  12      1740.445233
  13      1726.531659
  14      1633.337921
  15      1542.653755
  16      1612.919251
  17      1720.567238
  18      1785.900545
  19      1798.574070
  20      1755.867296
  21      1635.521910
  22      1519.339025
  23      1565.320955
  24      1650.909018
  25      1697.127946
  26      1693.926358
  27      1638.772726
  28      1509.635359
  29      1387.920486
             ...     
  1395    1301.178534
  1396    1465.235660
  1397    1567.930687
  1398    1592.516626
  1399    1534.041902
  1400    1356.918270
  1401    1188.437864
  1402    1276.091007
  1403    1427.810113
  1404    1519.125947
  1405    1533.515379
  1406    1466.250615
  1407    

In [7]:
with open('x_prophet_mult_all.txt', 'w') as f:
    print(x_prophet, file=f)
with open('x_hw_all.txt', 'w') as f:
    print(x_hw, file=f)
with open('x_arima_wider_all.txt', 'w') as f:
    print(x_arima, file=f)
#with open('x_sarima50_nm.txt', 'w') as f:
    #print(x_sarima, file=f)

In [99]:
with open('x_sarima50_nm.txt', 'w') as f:
    print(x_sarima, file=f)

In [104]:
with open('x_sarima50_ncg.txt', 'w') as f:
    print(x_sarima, file=f)

In [None]:
with open('x_sarima50_newton.txt', 'w') as f:
    print(x_sarima, file=f)

In [90]:
x_prophet

(0.26790048226276586, 0       2361.854101
 1       2367.968985
 2       2911.961912
 3       3155.609276
 4       3242.407233
 5       3307.796281
 6       3279.616225
 7       3157.220028
 8       3134.894235
 9       3639.036321
 10      3837.708684
 11      3874.181424
 12      3883.834998
 13      3795.954424
 14      3610.929065
 15      3522.807390
 16      3956.190965
 17      4085.934698
 18      4055.440637
 19      4000.231378
 20      3851.079923
 21      3609.295664
 22      3468.417538
 23      3850.832418
 24      3937.632820
 25      3871.676120
 26      3787.845846
 27      3617.536696
 28      3362.047994
 29      3213.437811
            ...     
 1394    1440.573422
 1395    1829.109206
 1396    1942.823565
 1397    1914.400635
 1398    1869.424527
 1399    1742.334439
 1400    1533.340458
 1401    1422.563729
 1402    1804.934227
 1403    1913.011572
 1404    1879.394144
 1405    1829.582236
 1406    1698.364620
 1407    1486.113600
 1408    1372.645650
 1409    1752

In [91]:
with open('x_prophet_outputs_all_v1.txt', 'w') as f:
    print(x_prophet, file=f)

<bound method TimeSeriesMultiReg.ProphetFxnNone of <__main__.TimeSeriesMultiReg object at 0x1c2d664d30>>


In [27]:
def ProphetFxnNA(df, daysForecast):
        
        easter = pd.DataFrame({
          'holiday': 'easterSunday',
          'ds': pd.to_datetime(['2010-04-04','2011-04-24','2012-04-08','2013-03-31','2014-04-20',
                                '2015-04-05','2016-03-27','2017-04-16','2018-04-01','2019-04-21',
                                '2020-04-12','2021-04-04','2022-04-17','2023-04-09','2024-03-31']),
          'lower_window': -2,
          'upper_window': 0,
        })
    
        memorial = pd.DataFrame({
          'holiday': 'memorialMonday',
          'ds': pd.to_datetime(['2010-05-31','2011-05-30','2012-05-28','2013-05-27','2014-05-26',
                                '2015-05-25','2016-05-30','2017-05-29','2018-05-28','2019-05-27',
                                '2020-05-25','2021-05-31','2022-05-30','2023-05-29','2024-05-27']),
          'lower_window': -2,
          'upper_window': 0,
        })
    
        laborday = pd.DataFrame({
          'holiday': 'laborMonday',
          'ds': pd.to_datetime(['2010-09-6','2011-09-5','2012-09-3','2013-09-2','2014-09-1',
                                '2015-09-7','2016-09-5','2017-09-4','2018-09-3','2019-09-2',
                                '2020-09-7','2021-09-6','2022-09-5','2023-09-4','2024-09-2']),
          'lower_window': -2,
          'upper_window': 0,
        })
        
        thxgiving = pd.DataFrame({
          'holiday': 'thanksgiving',
          'ds': pd.to_datetime(['2010-11-25','2011-11-24','2012-11-22','2013-11-28','2014-11-27',
                                '2015-11-26','2016-11-24','2017-11-23','2018-11-22','2019-11-28',
                                '2020-11-26','2021-11-25','2022-11-24','2023-11-23','2024-11-28',]),
          'lower_window': 0,
          'upper_window': 1,
        })
    
        holidays = pd.concat((easter, memorial, laborday, thxgiving))
        
        m = Prophet(holidays=holidays, daily_seasonality = False, yearly_seasonality = True, weekly_seasonality = True,
                   seasonality_mode='multiplicative')
        m.fit(df)
        future = m.make_future_dataframe(periods=daysForecast) #Make a new dataframe that predicts the next number of days    
        forecast = m.predict(future)
        #print(forecast['yhat'])
        
        return forecast['yhat']


In [32]:
import pandas as pd
data = pd.read_csv('NA_NB_direct.csv')
data.shape
px = ProphetFxnNA(data, 365)

In [40]:
import csv
with open('data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(px)

In [38]:
import pandas as pd
data = pd.read_csv('NA_NB_SEO.csv')
data.shape
px_seo = ProphetFxnNA(data, 365)

In [41]:
import csv
with open('data_seo.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(px_seo)

In [47]:
import csv
with open('data_seoCol.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(px_seo.T)

In [33]:
data.head()

Unnamed: 0,ds,RLT Marketing Channel Name,y
0,1/1/16,Direct,1089
1,1/2/16,Direct,1592
2,1/3/16,Direct,1812
3,1/4/16,Direct,2098
4,1/5/16,Direct,2347


In [34]:
px

0       1521.402430
1       1574.719982
2       1908.195875
3       1973.045792
4       2010.224805
5       2059.714305
6       2055.332319
7       2004.220520
8       2039.490061
9       2348.795544
10      2385.624268
11      2390.517748
12      2403.834876
13      2360.219494
14      2267.443187
15      2258.851205
16      2522.172942
17      2513.581869
18      2473.530374
19      2442.981477
20      2357.528251
21      2225.589404
22      2180.654798
23      2410.334445
24      2373.193604
25      2309.207777
26      2259.472000
27      2159.983094
28      2019.200724
29      1970.106561
           ...     
1394    2904.286303
1395    3672.086071
1396    3658.851607
1397    3569.522980
1398    3523.082039
1399    3327.678029
1400    3005.798064
1401    2949.874557
1402    3714.011361
1403    3693.746109
1404    3596.574984
1405    3542.210540
1406    3338.550250
1407    3008.463945
1408    2945.869242
1409    3706.798385
1410    3681.962716
1411    3581.128139
1412    3524.378360


In [None]:
        for subDimension in actualsDictKey:
            # Initialize to an empty list for the specified subdimension.
            mape_results[subDimension] = []
            for model, forecast in forecast_results[subDimension]:
                mape_results[subDimension].append((
                    model, 
                    self.mapeCalc(
                        forecast[:len(actualsDict[subDimension].groupby('ds', as_index=False).sum())],
                        actualsDict[subDimension].groupby('ds', as_index=False).sum()['y']
                    )
                ))