In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stat
import scipy.io as scipio
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime

In [2]:
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.sarimax import SARIMAXResults

  from pandas.core import datetools


In [3]:
## read train & test data
data_train = pd.DataFrame(pd.read_csv("data_train.csv"))
data_test = pd.DataFrame(pd.read_csv("data_test.csv"))
data_train = data_train.dropna(0)
data_test = data_test.dropna(0)

## index by visit_date column 
## the dates are not unique yet
data_train = data_train.set_index("visit_date", drop=True)
data_test = data_test.set_index("visit_date", drop=True)

## convert the dates from string to datetime
arima_dates_train = pd.Series(data_train.index.values).\
apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
arima_dates_test = pd.Series(data_test.index.values).\
apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

## get the unique date index for full data
data = pd.concat([data_train,data_test],0)
arima_dates_full = pd.Series(data.index.values).\
apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

## Train-Val Split

In [7]:
import os
## leave out Feb 2017 data for each restaurant df 
## as validation split
def train_val_split(path):
    """Takes as input a directory, splits all the training csv 
    files in the directory as training and validation, appends the 
    dataframes to separate dictionaries, where keys are rest ids"""
    train_dict = {}
    val_dict = {}
    for rest_file in os.listdir(path)[1:]:
        data = pd.DataFrame(pd.read_csv(path + rest_file))
        data = data.set_index("visit_date",drop=True)
        ## convert index to datetime to filter
        data.index = pd.Series(data.index).\
        apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
        ## filter the records before 2017-02-01 for training
        data_train = data[data.index<datetime.strptime("2017-02-01", "%Y-%m-%d")]
        ## filter the records after 2017-02-01 for validation
        data_val = data[data.index>=datetime.strptime("2017-02-01", "%Y-%m-%d")]
        ## return two datasets: training and validation
        train_dict[rest_file] = data_train
        val_dict[rest_file] = data_val
    return train_dict, val_dict

In [8]:
path = "arima_rest_data/train/"
train_dict, val_dict = train_val_split(path)[0], train_val_split(path)[1]

In [9]:
endog_col = ["visitors"]
exog_col = ['Friday', 'Monday', 'Saturday','Sunday', 'Thursday', 
            'Tuesday', 'Wednesday','avg_temperature1','high_temperature1', 
            'holiday_flg', 'hours_sunlight1','low_temperature1',
           'total_area_visitors', 'total_genre_visitors']

## Grid Search

In [13]:
## manual param grid
import itertools

order_p = np.array([1,7])
order_d = np.arange(1,3) ## order of differencing
order_q = np.array([1,7])
s_order_p = np.array([1,7])
s_order_d = np.arange(1,3) ## order of seasonal differencing
s_order_q = np.array([1,7])
s_order_s = np.array([4,1]) ## seasons per year
param_trend = ["n","c"]

grid_lists = [order_p, order_d, order_q, s_order_p, s_order_d,\
             s_order_q, s_order_s, param_trend]

grid = list(itertools.product(*grid_lists))

In [14]:
endog_col = ["visitors"]
exog_col = ['Friday', 'Monday', 'Saturday','Sunday', 'Thursday', 
            'Tuesday', 'Wednesday','avg_temperature1','high_temperature1', 
            'holiday_flg', 'hours_sunlight1','low_temperature1',
           'total_area_visitors', 'total_genre_visitors']

In [17]:
grid = list(itertools.product(*grid_lists))
## delete from directory
# air_28dbe91c4c9656be.csv
# air_2a485b92210c98b5.csv

# grid_model_params = {}
for params in grid:
    print (params)
    ## will average the learned parameters of all 
    ## the restaurants for every set of parameter 
    ## then will pass them to the prediction function
    ## as start params
    grid_model_params[params] = {}
    ## select 150 random restaurants
    for rest in [os.listdir("arima_rest_data/train/")[1:][ix] for ix in \
                 [np.random.choice(665) for x in range(10)]]:
        print (str(rest))
        ## train_dict: previously written training dictionary of all rests 
        data = train_dict[rest]
        if "total_area_visitors" not in data.columns.values or "total_genre_visitors" not in data.columns.values:
            None
        elif data[exog_col].shape[0]<2 or data[exog_col].shape[1]<8\
        or len(data[endog_col])<8: 
            None
        ## params_exog = np.linalg.pinv(exog).dot(endog)
        elif data[endog_col].shape[0] < 1 or data[endog_col].shape[1] < 1:
            None
        elif len((np.linalg.pinv(data[exog_col]).dot(data[endog_col])).shape) < 2: 
            None
        # endog = endog - np.dot(exog, params_exog)
        elif np.dot(data[exog_col], np.linalg.pinv(data[exog_col]).dot(data[endog_col])).shape[0]<1\
        or np.dot(data[exog_col], np.linalg.pinv(data[exog_col]).dot(data[endog_col])).shape[1]<1:
            None
        elif rest == "air_28dbe91c4c9656be.csv":
            None
        elif rest == "air_4dea8d17f6f59c56.csv":
            None
        elif rest == ".ipynb_checkpoints":
            None
        else:
            sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                           order=(int(params[0]),int(params[1]),int(params[2])),seasonal_order=(int(params[3]),int(params[4]),int(params[5]),int(params[6])),\
                                    trend=params[7], mle_regression=True, enforce_invertibility=False)

            grid_model_params[params][rest] = sarimax_model.fit(maxiter=1000,disp=True,return_params=True,full_output=True)
        

In [18]:
def loss_mape(true, pred):
    """args:
    - true: the true array of "visitor" numbers 
            for the determined timeframe - shape: (n_days,)
    - pred: the predicted array of "visitor" numbers
            for the determined timeframe - shape: (n_days,)
       returns:
    - avg_percent_error: average percentage error in terms
                        of visitors"""
    abs_error = np.abs(np.subtract(true,pred))
    loss = np.divide(abs_error,true)
    avg_percent_error = np.mean(loss)
    return avg_percent_error

In [19]:
def loss_squared(true, pred):
    """args:
    - true: the true array of "visitor" numbers 
            for the determined timeframe - shape: (n_days,)
    - pred: the predicted array of "visitor" numbers
            for the determined timeframe - shape: (n_days,)
       returns:
    - avg_square_error: average squared error in terms
                        of visitors, averaged by the number
                        of days"""
    num_days = len(true)
    sq_error = np.square(np.array(true-pred))
    avg_square_error = (1/num_days)*np.sum(sq_error)
    return avg_square_error

In [20]:
grid_search_iters = {}

In [22]:
class SARIMAXGridSearch(object):
    def __init__(self, param_grid=grid, rest_dir="arima_rest_data/train/",\
                 num_rests=10, len_rests=600):
        
        self.param_grid = param_grid
        self.rest_dir = rest_dir
        self.num_rests = num_rests
        self.len_rests = len_rests
        
    def select_setting(self, grid_index):
        
        ## n = int index of param_grid
        param_grid = self.param_grid
        rest_dir = self.rest_dir
        num_rests = self.num_rests
        len_rests = self.len_rests
        ## grid settings
        params = param_grid[grid_index]
        rests = [os.listdir(rest_dir)[1:][ix] for ix in \
                 [np.random.choice(len_rests) for x in range(num_rests)]]
        
        self.params = params
        self.rests = rests
        return self
    
    def averaging_fit(self):
        params = self.params
        rests = self.rests
        model_params = {}
        
        if len(rests) == 1:
            if "total_area_visitors" not in train_dict[rests[0]].columns.values or\
            "total_genre_visitors" not in train_dict[rests[0]].columns.values:
                pass
            elif train_dict[rests[0]].shape[0] < 24 and val_dict[rests[0]].shape[0] < 8:
                pass
            else:
                data = train_dict[rest]
                val_data = val_dict[rest]
                sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                                   order=(int(params[0]),int(params[1]),int(params[2])),\
                                            seasonal_order=(int(params[3]),int(params[4]),\
                                                            int(params[5]),int(params[6])),\
                                            trend=params[7], mle_regression=True, \
                                            enforce_invertibility=False)
                results_object = sarimax_model.fit(maxiter=1000,method="bfgs")
                self.results_object = results_object

        else:
            rest_params = []
            for rest_ix in range(len(rests)):
                if "total_area_visitors" not in train_dict[rests[rest_ix]].columns.values or\
                "total_genre_visitors" not in train_dict[rests[rest_ix]].columns.values:
                    continue
                elif train_dict[rests[rest_ix]].shape[0] < 24 and val_dict[rests[rest_ix]].shape[0] < 24:
                    continue
                else:
                    data = train_dict[rests[rest_ix]]
                    sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                                   order=(int(params[0]),int(params[1]),int(params[2])),\
                                            seasonal_order=(int(params[3]),int(params[4]),\
                                                            int(params[5]),int(params[6])),\
                                            trend=params[7], mle_regression=True, \
                                            enforce_invertibility=False)
                    results_object = sarimax_model.fit(start_params=None,maxiter=1000,method="bfgs",\
                                                          return_params=True)
                    rest_params.append(results_object)
                    final_start_params = np.mean(rest_params,0)
            self.average_params = final_start_params
        return self
    
    def bag_pred(self):
        rests = self.rests
        params = self.params
        average_params = self.average_params
        rest_preds = {}
        for rest in rests:
            data = train_dict[rest]
            val_data = val_dict[rest]
            print (rest)
            sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                               order=(int(params[0]),int(params[1]),int(params[2])),\
                                        seasonal_order=(int(params[3]),int(params[4]),\
                                                        int(params[5]),int(params[6])),\
                                        trend=params[7], mle_regression=True, \
                                        enforce_invertibility=False,time_varying_regression=False)
            final_model = sarimax_model.fit(start_params=average_params,\
                                              maxiter=1000,method="bfgs")
            bag_preds_ = final_model.predict\
                    (start=len(data)-1, end=len(data)+len(val_data)-1,dynamic=True,\
                    exog=val_data[exog_col])
            rest_preds[rest] = bag_preds_
            
        self.preds = rest_preds
        return self, rest_preds
    
    def bag_losses(self):
        rests = self.rests
        params = self.params
        average_params = self.average_params
        preds = self.preds
        bagged_rest_scores = {}
        for rest in self.rests:
            val_data = val_dict[rest]
            pred = preds[rest]
            true = np.array(val_data["visitors"])
            abs_percent_loss = loss_mape(true,pred[:len(true)])
            square_loss = loss_squared(true,pred[:len(true)])
            bagged_rest_scores[rest] = [abs_percent_loss,square_loss] 
        return bagged_rest_scores
    
    def iterative_fit(self):
        ### param yazdirabildigimize gore, ortalama almali yapabilriiz
        ## !!!!!!!!!!! yay 
        ## bir de tekrar fit etmemeli yazmaya calis
        params = self.params
        rests = self.rests
        model_params = {}
        
        if len(rests) == 1:
            data = train_dict[rest]
            val_data = val_dict[rest]
            sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                               order=(int(params[0]),int(params[1]),int(params[2])),\
                                        seasonal_order=(int(params[3]),int(params[4]),\
                                                        int(params[5]),int(params[6])),\
                                        trend=params[7], mle_regression=True, \
                                        enforce_invertibility=False)
            
            results_object = sarimax_model.fit(maxiter=1000,method="bfgs")
            self.results_object = results_object
        else:
            for rest_ix in range(len(rests)):
                if "total_area_visitors" not in train_dict[rests[rest_ix]].columns.values or\
                "total_genre_visitors" not in train_dict[rests[rest_ix]].columns.values:
                    continue
                elif train_dict[rests[rest_ix]].shape[0] < 24 and val_dict[rests[rest_ix]].shape[0] < 24:
                    continue
                else:
                    data = train_dict[rests[rest_ix]]
                    val_data = val_dict[rests[rest_ix]]
                    sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                                   order=(int(params[0]),int(params[1]),int(params[2])),\
                                            seasonal_order=(int(params[3]),int(params[4]),\
                                                            int(params[5]),int(params[6])),\
                                            trend=params[7], mle_regression=True, \
                                            enforce_invertibility=False)
                    if rest_ix == 0:
                        results_object = sarimax_model.fit(start_params=None,maxiter=1000,method="bfgs",\
                                                          return_params=True)
                        model_params[rests[rest_ix]] = results_object
                    elif rest_ix != 0 and rest_ix < len(rests)-1:
                        results_object = sarimax_model.fit(start_params=model_params[rests[rest_ix-1]],maxiter=1000,method="bfgs",\
                                                          return_params=True)
                        model_params[rests[rest_ix]] = results_object
                    elif rest_ix == len(rests)-1:
                        results_object = sarimax_model.fit(start_params=model_params[rests[rest_ix-1]],\
                                                           maxiter=1000,method="bfgs",return_params=True)
                        model_params[rests[rest_ix]] = results_object
            self.results_object = model_params[rests[len(rests)-1]]
        return self
    
    def final_preds(self):
        rests = self.rests
        params = self.params
        results_object = self.results_object
        
        rest_preds = {}
        for rest in rests:
            if "total_area_visitors" not in train_dict[rest].columns.values or\
                "total_genre_visitors" not in train_dict[rest].columns.values:
                continue
            elif train_dict[rest].shape[0] < 24 and val_dict[rest].shape[0] < 24:
                continue
            else:
                data = train_dict[rest]
                val_data = val_dict[rest]
                sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                                   order=(int(params[0]),int(params[1]),int(params[2])),\
                                            seasonal_order=(int(params[3]),int(params[4]),\
                                                            int(params[5]),int(params[6])),\
                                            trend=params[7], mle_regression=True, \
                                            enforce_invertibility=False,time_varying_regression=False)
                final_model = sarimax_model.fit(start_params=results_object,\
                                                  maxiter=1000,method="bfgs")
                preds_ = final_model.predict\
                        (start=len(data)-1, end=len(data)+len(val_data)-1,dynamic=True,\
                        exog=val_data[exog_col])
                rest_preds[rest] = preds_

        self.preds = rest_preds
        return self, rest_preds
    
    def final_scores(self):
        preds = self.preds
#         print (preds)
        rests = self.rests
#         print (rests)
        rest_scores = {}
        for rest in rests:
            val_data = val_dict[rest]
            pred = preds[rest]
            true = np.array(val_data["visitors"])
            abs_percent_loss = loss_mape(true,pred[:len(true)])
            square_loss = loss_squared(true,pred[:len(true)])
            rest_scores[rest] = [abs_percent_loss,square_loss]
        return rest_scores

In [23]:
grid_search = {}
grid_try = [(1,1,1,1,1,1,1,"c")]
for p in range(len(grid_try)):
    print (grid_try[p])
    gsearch = SARIMAXGridSearch(param_grid=grid_try, rest_dir="arima_rest_data/train/",\
                 num_rests=100, len_rests=600)
    gsearch = gsearch.select_setting(p)
    gsearch.iterative_fit()
    gsearch.final_preds()
    grid_search[grid_try[p]] = gsearch.final_scores()
# grid_search

In [26]:
pd.DataFrame(grid_search[(1,1,1,1,1,1,1,"c")]).to_csv("grid_search_preds/scores/iterative/(1,1,1,1,1,1,1,c)")

In [28]:
gsearch = SARIMAXGridSearch(param_grid=grid, rest_dir="arima_rest_data/train/",\
                 num_rests=15, len_rests=600)

In [29]:
gsearch = gsearch.select_setting(7)

In [30]:
gsearch.averaging_fit()

Optimization terminated successfully.
         Current function value: 3.418568
         Iterations: 229
         Function evaluations: 244
         Gradient evaluations: 244
Optimization terminated successfully.
         Current function value: 2.988528
         Iterations: 199
         Function evaluations: 221
         Gradient evaluations: 221
Optimization terminated successfully.
         Current function value: 3.075766
         Iterations: 245
         Function evaluations: 264
         Gradient evaluations: 264
         Current function value: 3.669097
         Iterations: 89
         Function evaluations: 154
         Gradient evaluations: 144




         Current function value: 3.870229
         Iterations: 335
         Function evaluations: 484
         Gradient evaluations: 473




Optimization terminated successfully.
         Current function value: 3.700973
         Iterations: 236
         Function evaluations: 255
         Gradient evaluations: 255
         Current function value: 2.969288
         Iterations: 209
         Function evaluations: 297
         Gradient evaluations: 285




Optimization terminated successfully.
         Current function value: 3.613834
         Iterations: 190
         Function evaluations: 199
         Gradient evaluations: 199
Optimization terminated successfully.
         Current function value: 2.217192
         Iterations: 140
         Function evaluations: 154
         Gradient evaluations: 154
         Current function value: 4.040285
         Iterations: 414
         Function evaluations: 530
         Gradient evaluations: 521




         Current function value: 4.563948
         Iterations: 321
         Function evaluations: 410
         Gradient evaluations: 398




Optimization terminated successfully.
         Current function value: 3.531848
         Iterations: 214
         Function evaluations: 231
         Gradient evaluations: 231
Optimization terminated successfully.
         Current function value: 3.324684
         Iterations: 203
         Function evaluations: 218
         Gradient evaluations: 218
         Current function value: 3.703693
         Iterations: 244
         Function evaluations: 286
         Gradient evaluations: 275




<__main__.SARIMAXGridSearch at 0x1c1abfd198>

In [31]:
gsearch.bag_pred()

air_735bcbe1763d6e98.csv
Optimization terminated successfully.
         Current function value: 3.418581
         Iterations: 226
         Function evaluations: 237
         Gradient evaluations: 237
air_35512c42db0868da.csv
Optimization terminated successfully.
         Current function value: 2.988528
         Iterations: 160
         Function evaluations: 169
         Gradient evaluations: 169
air_b45b8e456f53942a.csv
Optimization terminated successfully.
         Current function value: 3.067858
         Iterations: 185
         Function evaluations: 193
         Gradient evaluations: 193
air_56ea46c14b2dd967.csv
         Current function value: 3.667021
         Iterations: 248
         Function evaluations: 354
         Gradient evaluations: 342
air_3bb99a1fe0583897.csv




Optimization terminated successfully.
         Current function value: 3.869868
         Iterations: 242
         Function evaluations: 263
         Gradient evaluations: 263
air_dea0655f96947922.csv
Optimization terminated successfully.
         Current function value: 3.700973
         Iterations: 231
         Function evaluations: 263
         Gradient evaluations: 263
air_d4d218b451f82c3d.csv
         Current function value: 2.987064
         Iterations: 237
         Function evaluations: 274
         Gradient evaluations: 264
air_8093d0b565e9dbdf.csv




Optimization terminated successfully.
         Current function value: 3.613483
         Iterations: 473
         Function evaluations: 525
         Gradient evaluations: 525
air_caf996ac27206301.csv
Optimization terminated successfully.
         Current function value: 2.214641
         Iterations: 141
         Function evaluations: 151
         Gradient evaluations: 151
air_465bddfed3353b23.csv
         Current function value: 4.042214
         Iterations: 364
         Function evaluations: 632
         Gradient evaluations: 612
air_399904bdb7685ca0.csv




         Current function value: 4.564443
         Iterations: 269
         Function evaluations: 379
         Gradient evaluations: 368
air_9aa92007e3628dbc.csv




         Current function value: 3.531059
         Iterations: 251
         Function evaluations: 313
         Gradient evaluations: 301
air_9c6787aa03a45586.csv




         Current function value: nan
         Iterations: 31
         Function evaluations: 119
         Gradient evaluations: 118
air_c3585b0fba3998d0.csv




Optimization terminated successfully.
         Current function value: 3.324684
         Iterations: 194
         Function evaluations: 215
         Gradient evaluations: 213
air_97b2a9f975fc702c.csv
Optimization terminated successfully.
         Current function value: 3.718183
         Iterations: 307
         Function evaluations: 330
         Gradient evaluations: 330


(<__main__.SARIMAXGridSearch at 0x1c1abfd198>,
 {'air_35512c42db0868da.csv': 337     6.219898
  338     7.491017
  339     7.662160
  340     8.966549
  341    10.885319
  342    10.513021
  343     5.674808
  344     6.363556
  345     8.301537
  346     6.454974
  347     8.572288
  348    12.973392
  349    10.355408
  350     6.220201
  351     6.506216
  352     8.185356
  353     7.337406
  354     8.706348
  355    11.582749
  356    11.119042
  357     5.184369
  358     7.130954
  359     8.078373
  360     7.374720
  361     9.962675
  362    11.462032
  363    11.267487
  364     6.682769
  365     7.085599
  dtype: float64, 'air_399904bdb7685ca0.csv': 378    21.434092
  379    35.863454
  380    16.832199
  381    48.824591
  382    39.443823
  383    31.212278
  384    12.386226
  385    19.694704
  386    25.350725
  387    24.337405
  388    46.250057
  389    54.823268
  390    34.688540
  391    18.268206
  392    22.509926
  393    22.050580
  394    22.070123
  395  

In [32]:
gsearch.bag_losses()

{'air_35512c42db0868da.csv': [0.9050488850149555, 14.803595513455448],
 'air_399904bdb7685ca0.csv': [0.8190052639915087, 408.55234115709425],
 'air_3bb99a1fe0583897.csv': [0.697097741333219, 292.77662859933423],
 'air_465bddfed3353b23.csv': [0.5138629106134974, 225.29325866378471],
 'air_56ea46c14b2dd967.csv': [0.8074954867790721, 195.98230681769328],
 'air_735bcbe1763d6e98.csv': [2.805219513362827, 82.309830539164423],
 'air_8093d0b565e9dbdf.csv': [0.3948572576585378, 196.66957268840667],
 'air_97b2a9f975fc702c.csv': [0.5454366869689788, 323.08739885427008],
 'air_9aa92007e3628dbc.csv': [0.501445254960189, 367.05581942925335],
 'air_9c6787aa03a45586.csv': [0.6910849341954948, 915.29350433938771],
 'air_b45b8e456f53942a.csv': [1.6961959053274809, 92.446616956130214],
 'air_c3585b0fba3998d0.csv': [1.808139008541642, 110.64072580783402],
 'air_caf996ac27206301.csv': [0.7708130735324437, 13.405655818394575],
 'air_d4d218b451f82c3d.csv': [1.831434940349982, 54.230373273943975],
 'air_dea06

In [35]:
def grid_search_losses(param_grid=None,rest_dir="arima_rest_data/train/",\
              num_rests=10, len_rests=665):
    ## initialize grid_model_params dict
    ## keys: (order param lists tuples in the grid)
    ## vals: {dicts with keys=rest_ids: values=corresponding preds}  
    grid_model_preds = {}
    ## for each parameter tuple in the passed grid
    for params in param_grid:
        print (params)
        ## initialize an empty dict
        grid_model_preds[params] = {}
        ## select "num_rests" number of restaurant ids from the directory
        for rest in [os.listdir(rest_dir)[1:][ix] for ix in \
                 [np.random.choice(len_rests) for x in range(num_rests)]]:
            ## the training data of the corresponding rest_id
            print (rest)
            if "total_area_visitors" not in train_dict[rest].columns.values or\
            "total_genre_visitors" not in train_dict[rest].columns.values:
                None
            elif train_dict[rest].shape[0] < 24 and val_dict[rest].shape[0] < 8:
                None
            else:    
                data = train_dict[rest]
                ## the validation data of the corresponding rest_id
                val_data = val_dict[rest]
                ## SARIMAX model for the current parameters and data
                sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                               order=(int(params[0]),int(params[1]),int(params[2])),\
                                        seasonal_order=(int(params[3]),int(params[4]),\
                                                        int(params[5]),int(params[6])),\
                                        trend=params[7], mle_regression=True, \
                                        enforce_invertibility=False)

                results_object = sarimax_model.fit(maxiter=1000,method="bfgs")
                ## predictions: len(preds) = val_data.shape[0]
                
                sarimax_model1 = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
                               order=(int(params[0]),int(params[1]),int(params[2])),\
                                        seasonal_order=(int(params[3]),int(params[4]),\
                                                        int(params[5]),int(params[6])),\
                                        trend=params[7], mle_regression=True, \
                                        enforce_invertibility=False)
                results_object1 = sarimax_model1.fit(start_params=results_object.params,\
                                                    maxiter=1000, method="bfgs")
                
                predictions_ = results_object1.predict\
                (start=len(data)-1, end=len(data)+len(val_data)-1,dynamic=True,\
                exog=val_data[exog_col].head(2))
                
                pred = np.array(predictions_)
                true = np.array(val_data[endog_col])
                
                abs_percent_loss = loss_mape(true,pred)
                print (abs_percent_loss)
                square_loss = loss_squared(true,pred)
                print (square_loss)
                grid_model_preds[params][rest] = [abs_percent_loss,square_loss]

    return grid_model_preds

In [36]:
for iter_num in list(grid_search_iters.keys()): 
    iter_data = pd.DataFrame(grid_search_iters[iter_num])
    iter_data.to_csv("grid_search_preds/" + str(iter_num) + ".csv")

In [37]:
# params = (1,1,1,1,1,1,1,"c")
# sarimax_model = SARIMAX(endog=data[endog_col],exog=data[exog_col],\
#                                order=(int(params[0]),int(params[1]),int(params[2])),\
#                                         seasonal_order=(int(params[3]),int(params[4]),\
#                                                         int(params[5]),int(params[6])),\
#                                         trend=params[7], mle_regression=True, \
#                                         enforce_invertibility=False)
# results_object1 = sarimax_model.fit(maxiter=1000,method="bfgs",\
#                                          return_params=False)
# results_object1.plot_diagnostics(figsize=(10,20))

In [194]:
np.divide(np.abs(np.subtract(true[:24],pred[:24])), np.abs(true[:24]))

array([ 0.02812093,  0.07614396,  0.28566933,  0.98531615,  1.18059439,
        0.02814182,  0.04082045,  0.06383652,  0.74416884,  0.28821706,
        0.66639737,  1.45516273,  0.01043705,  0.37161201,  0.45667246,
        0.51899032,  0.23114242,  0.4349709 ,  7.82004663,  0.6566238 ,
        0.17851903,  0.07183971,  0.11575361,  0.49033753])

In [198]:
for param in grid: 
    if param in list(grid_model_params.keys()):
        param_data = pd.DataFrame(grid_model_params[param])
        param_data.to_csv("param_grid_csv/" + str(param) + ".csv")