### Import Necessary Packages

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import warnings
import os
import math
import itertools
from tqdm import tqdm
from matplotlib.pyplot import figure
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from datetime import datetime, timedelta

### Constant

In [17]:
LOCS = ["BKK", "Chiangmai", "Rayong", "Saraburi", "Khonkaen", "Surat"]
REGIONS = ['Thailand', 'Myanmar', 'Cambodia', 'Lao_PDR', 'P_Malaysia']

base_dataset_path = './datasci_dataset_2022'
base_processed_data_path = './processed_data'

## General Functions

In [18]:
def load_data(base_processed_data_path, PROVINCES, train = True):
    data = {}
    for province in PROVINCES:
        if (train):
            path = f'{base_processed_data_path}/{province}/{province.lower()}_train.csv'
        else:
            path = f'{base_processed_data_path}/{province}/{province.lower()}_test.csv'
            
        df = pd.read_csv(path)
        df['date_time'] = pd.to_datetime(df['date_time'], format = '%Y-%m-%d %H:%M:%S')
        df.sort_values(by = 'date_time', inplace = True)
        df.set_index('date_time', inplace = True)
        df.index.freq = '6h'
        data[province] = df
    return data

def read_groundtruth_data(base_dataset_path, province):
    path = f'{base_dataset_path}/{province}/test/{province.lower()}_test.csv'
    df = pd.read_csv(path)
    df.rename(columns = {df.columns[0]: 'date_time'}, inplace = True)
    df['date_time'] = pd.to_datetime(df['date_time'], format = '%Y-%m-%d %H:%M:%S')
    df.sort_values(by = 'date_time', inplace = True)
    df.drop_duplicates(inplace = True)
    
    return df

def get_current_dt():
    current_dt = datetime.now()
    return current_dt.strftime('%Y-%m-%d %H:%M:%S')

## Load all the train and test data

In [19]:
dfs_train = load_data(base_processed_data_path, LOCS, train = True)
dfs_test = load_data(base_processed_data_path, LOCS, train = False)

## Example Train Data

In [20]:
dfs_train['BKK']

Unnamed: 0_level_0,PM2.5,wind_speed,wind_dir,temp,hotspot_thailand,hotspot_myanmar,hotspot_cambodia,hotspot_lao_pdr,hotspot_p_malaysia,month_1,...,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,wind_dir_sin,wind_dir_cos
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-07-01 06:00:00,23.0,17.0,242.0,27.8,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.8829,-0.4695
2017-07-01 12:00:00,15.0,18.0,250.0,32.6,0.0,0.0,1.0,0.0,1.0,0,...,0,0,1,0,0,0,0,0,-0.9397,-0.3420
2017-07-01 18:00:00,10.0,26.0,255.0,30.1,0.0,0.0,1.0,0.0,1.0,0,...,0,0,1,0,0,0,0,0,-0.9659,-0.2588
2017-07-02 00:00:00,17.0,20.0,262.0,28.1,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9903,-0.1392
2017-07-02 06:00:00,35.0,18.0,247.0,27.4,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9205,-0.3907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-29 18:00:00,8.0,24.0,257.0,30.7,1.0,0.0,1.0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,-0.9744,-0.2250
2020-06-30 00:00:00,9.0,23.0,257.0,28.9,0.0,0.0,0.0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,-0.9744,-0.2250
2020-06-30 06:00:00,14.0,23.0,255.0,28.3,0.0,0.0,0.0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,-0.9659,-0.2588
2020-06-30 12:00:00,17.0,23.0,250.0,31.6,1.0,0.0,0.0,0.0,0.0,0,...,0,1,0,0,0,0,0,0,-0.9397,-0.3420


## Example Test Data

In [22]:
dfs_test['BKK']

Unnamed: 0_level_0,PM2.5,wind_speed,wind_dir,temp,hotspot_thailand,hotspot_myanmar,hotspot_cambodia,hotspot_lao_pdr,hotspot_p_malaysia,month_1,...,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12,wind_dir_sin,wind_dir_cos
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-07-01 00:00:00,9.0,36.0,262.0,30.9,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9903,-0.1392
2020-07-01 06:00:00,11.0,55.0,270.0,28.3,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-1.0000,-0.0000
2020-07-01 12:00:00,15.0,51.0,260.0,29.7,2.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9848,-0.1736
2020-07-01 18:00:00,14.0,35.0,265.0,29.5,2.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9962,-0.0872
2020-07-02 00:00:00,18.0,60.0,275.0,28.7,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9962,0.0872
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-30 18:00:00,15.0,29.0,270.0,26.0,0.0,0.0,4.0,2.0,0.0,0,...,0,1,0,0,0,0,0,0,-1.0000,-0.0000
2021-07-01 00:00:00,15.0,20.0,265.0,28.4,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9962,-0.0872
2021-07-01 06:00:00,14.0,21.0,275.0,26.5,0.0,0.0,0.0,0.0,0.0,0,...,0,0,1,0,0,0,0,0,-0.9962,0.0872
2021-07-01 12:00:00,14.0,17.0,245.0,32.3,0.0,0.0,5.0,0.0,1.0,0,...,0,0,1,0,0,0,0,0,-0.9063,-0.4226


## SARIMAX Model

### Functions involved

In [6]:
def initialize_SARIMAX_models(df_train, exog_columns, best_order, best_seasonal_order, target = 'PM2.5'):
    '''
    [Input]
        - df_train = a pandas DataFrame of all training data
        - exog_columns = list of all exog vairables name
        - best_order = (p, d ,q)
        - best_seasonal_order = (P, D, Q, s)
        - target = PM2.5
    [Output]
        - target_result
        - exog_result_d
    '''
    # create a model for target varaiable
    target_mod = SARIMAX(df_train[target], 
                         exog = df_train[exog_columns],
                         order = best_order, seasonal_order = best_seasonal_order,
                         enforce_stationarity = False, enforce_invertibility = False)   
    target_result = target_mod.fit(disp = 0)
    
    # create models for exog variables (except for month_x (no need model to predict))
    exog_result_d = {}
    for exog_variable in exog_columns:
        if (exog_variable.startswith('month_')):
            continue
        exog_mod = SARIMAX(df_train[exog_variable],
                          order = best_order, seasonal_order = best_seasonal_order,
                          enforce_stationarity = False, enforce_invertibility = False) 
        exog_result = exog_mod.fit(disp = 0)
        exog_result_d[exog_variable] = exog_result
    
    return target_result, exog_result_d


def get_predictions(df_test, target_result, exog_result_d, exog_columns, is_month_included = True, target = 'PM2.5', n_steps = 12):
    # results which will be used to forecast are needed to be update to predict the next timestep
    target_result_update = target_result
    exog_result_d_update = exog_result_d
    
    all_target_predicted = []
    for i in tqdm(range(df_test.shape[0])):
        # use for debugging ------------------------------
#         if (i == 2):
#             break
        # ------------------------------------------------
        
        current_ts = df_test.index[i]
        
        # First, append the target data / exog data of 'current timestep' to target_result_update
        target_result_update = target_result_update.append(endog = df_test[target].iloc[i:i+1],
                                                          exog = df_test[exog_columns].iloc[i:i+1])
        # Next, append the exog data of 'current timestep' to each exog_result in exog_result_d_update
        for exog_variable in exog_columns:
            if (exog_variable.startswith('month_')):
                continue
            exog_result_d_update[exog_variable] = exog_result_d_update[exog_variable].append(endog = df_test[exog_variable].iloc[i:i+1])
    
        # Then, we will use the exog result to forecast the exog variables in the next 12 timesteps first
        exog_predicted = []
        for exog_variable in exog_columns:
            if (exog_variable.startswith('month_')):
                continue
            exog_p = exog_result_d_update[exog_variable].forecast(steps = n_steps)
            exog_p = exog_p.rename(exog_variable)
            exog_predicted.append(exog_p)
        exog_predicted = pd.concat(exog_predicted, axis = 1)
        if (is_month_included):
            df_next_n_ts = generate_month_exog_columns(current_ts)
            exog_predicted = pd.merge(exog_predicted, df_next_n_ts, on = df_next_n_ts.index, how = 'left')
        
        # Use the predicted exog variables to predict the target vairable for the next 12 timesteps
        target_predicted = target_result_update.forecast(exog = exog_predicted[exog_columns], steps = n_steps)
        
        for predict_ts in target_predicted.index:
            all_target_predicted.append([current_ts, predict_ts, target_predicted[predict_ts]])
            
    all_target_predicted = pd.DataFrame(all_target_predicted, columns = ['Time', 'Predicted', 'PM2.5_Predicted'])
    return all_target_predicted

def remove_missing_and_invalid_date_from_predictions(df_predicted, df_gt, province):
    # remove missing date first
    missing_date_path = f'./datasci_dataset_2022/_missing_date/missing_test_date_{province.lower()}.csv'
    df_missing_date = pd.read_csv(missing_date_path, usecols = ['0']).rename({'0': 'Predicted'}, axis = 1)
    df_missing_date['Predicted'] = pd.to_datetime(df_missing_date['Predicted'], format = '%Y-%m-%d %H:%M:%S')
    df_missing_date['missing'] = True
    
    df_predicted = pd.merge(df_predicted, df_missing_date, on = 'Predicted', how = 'left')
    df_predicted.fillna({'missing': False}, inplace = True)
    
    df_predicted = df_predicted[df_predicted['missing'] == False].iloc[:, :-1]
    
    # remove out of range date
    first_dt = df_gt.iloc[0]['date_time']
    last_dt = df_gt.iloc[-1]['date_time']
    
    df_predicted = df_predicted[(first_dt <= df_predicted['Predicted']) & (df_predicted['Predicted'] <= last_dt)]
    return df_predicted

def combine_grondtruth_data(df_predicted, df_gt):
    df_gt = df_gt.copy()
    df_gt.rename(columns = {'date_time': 'Predicted'}, inplace = True)
    
    df_predicted = pd.merge(df_predicted, df_gt, on = 'Predicted', how = 'left')
    return df_predicted

def calculate_RMSE(df_predicted):
    df_predicted['adiff'] = abs(df_predicted['PM2.5_Predicted'] - df_predicted['PM2.5'])
    df_predicted['diff2'] = df_predicted['adiff']**2
    rmse = (df_predicted['diff2'].mean())**0.5
    return rmse, df_predicted

#### Helper functions

In [7]:
def generate_month_exog_columns(current_ts, freq = '6h', n_steps = 12):
    df_next_n_ts = pd.DataFrame([current_ts +  pd.tseries.frequencies.to_offset('6h')*i  for i in range (1, n_steps+1)], columns = ['date_time'])
    df_next_n_ts['month'] = df_next_n_ts['date_time'].dt.month
    
    for i in range (1, 13):
        df_next_n_ts[f'month_{i}'] = df_next_n_ts['month'] == i
        df_next_n_ts[f'month_{i}'] = df_next_n_ts[f'month_{i}'].astype(int)
        
    df_next_n_ts.drop(columns = 'month', inplace = True)
    
    return df_next_n_ts.set_index('date_time')

In [8]:
warnings.filterwarnings("ignore")

#### Overall

In [9]:
def generate_result_on_single_province(province, df_train, df_test, exog_columns, best_order, best_seasonal_order, is_month_included):   
    df_gt = read_groundtruth_data(base_dataset_path, province)
    target_result, exog_result_d = initialize_SARIMAX_models(df_train, exog_columns, best_order, best_seasonal_order)
    df_predicted = get_predictions(df_test, target_result, exog_result_d, exog_columns, is_month_included = is_month_included)
    df_predicted = remove_missing_and_invalid_date_from_predictions(df_predicted, df_gt, province)
    df_predicted = combine_grondtruth_data(df_predicted, df_gt)
    rmse, df_predicted = calculate_RMSE(df_predicted)
    df_predicted['province'] = province
    
    return df_predicted, rmse

def generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename):
    all_predicted = []
    
    i = 1
    for province in province_arr:
        df_train = dfs_train[province]
        df_test = dfs_test[province]
        exog_columns = exog_columns_d[province]
        best_order = best_order_d[province]
        best_seasonal_order = best_seasonal_order_d[province]
        is_month_included = is_month_included_d[province]
        
        print(f'Province [{i}/{len(province_arr)}]: {province}')
        print(f'(start at: {get_current_dt()})')
        print(f'Parameters: order = {best_order}, seasonal_order = {best_seasonal_order}')
        print(f'Exog Columns: {exog_columns}')
        df_predicted, rmse = generate_result_on_single_province(province, df_train, df_test, exog_columns, best_order, best_seasonal_order, is_month_included = is_month_included)
        print(f'Test on SARIMAX with RMSE = {rmse:.4f}')
        print(f'(finish at: {get_current_dt()})\n')
        all_predicted.append(df_predicted)
        i += 1
    
    all_predicted = pd.concat(all_predicted, axis = 0)
    all_predicted.reset_index(drop = True, inplace = True)
    
    # final evaluation
    final_rmse = (all_predicted['diff2'].mean())**0.5
    print('-----------------------------------------------------------------')
    print(f'Final test on SARIMAX with RMSE = {final_rmse:.4f}')
    print('-----------------------------------------------------------------')
    
    # save result
    df = pd.DataFrame()
    df['id'] = np.arange(all_predicted.shape[0])
    df['Predicted'] = all_predicted['PM2.5_Predicted']
    
    base_path = f'./submission'
    # check if path exists (if not create it)
    if not os.path.exists(base_path):
        os.mkdir(base_path)
    path = f'{base_path}/{filename}'
    
    df.to_csv(path, index = False)
    print(f"Total of {df.shape[0]} predicted rows")
    
    return all_predicted, df

## Generate Predictions

In [23]:
standard_exog_columns = ['wind_speed', 'wind_dir', 'temp']
standard_exog_columns_2 = ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']
month_exog_columns = [f'month_{i}' for i in range (1, 13)]
REGIONS = ['Thailand', 'Myanmar', 'Cambodia', 'Lao_PDR', 'P_Malaysia']

### 0.1 (Baseline - uses only standard exog columns)

In [28]:
#baseline 0.1
exog_columns_d = {
    'BKK': standard_exog_columns,
    'Chiangmai': standard_exog_columns,
    'Rayong': standard_exog_columns,
    'Saraburi': standard_exog_columns,
    'Khonkaen': standard_exog_columns,
    'Surat': standard_exog_columns
}

is_month_included_d = {
    'BKK': False, 'Chiangmai': False, 'Rayong': False, 'Saraburi': False, 'Khonkaen': False, 'Surat': False
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 12
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (0, 1, 1, s),
    'Surat': (0, 1, 1, s)    
}

province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_0_1 = 'submission0_1.csv'
all_predicted_0_1, df_submission_0_1 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_0_1)

Province [1/6]: BKK
(start at: 2022-04-21 02:39:10)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [41:23<00:00,  1.70s/it]


Test on SARIMAX with RMSE = 6.0741
(finish at: 2022-04-21 03:20:57)

Province [2/6]: Chiangmai
(start at: 2022-04-21 03:20:57)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [39:43<00:00,  1.63s/it]


Test on SARIMAX with RMSE = 9.7507
(finish at: 2022-04-21 04:00:59)

Province [3/6]: Rayong
(start at: 2022-04-21 04:00:59)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [39:57<00:00,  1.64s/it]


Test on SARIMAX with RMSE = 7.5361
(finish at: 2022-04-21 04:41:19)

Province [4/6]: Saraburi
(start at: 2022-04-21 04:41:19)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [39:33<00:00,  1.62s/it]


Test on SARIMAX with RMSE = 10.7188
(finish at: 2022-04-21 05:21:12)

Province [5/6]: Khonkaen
(start at: 2022-04-21 05:21:12)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [36:04<00:00,  1.48s/it]


Test on SARIMAX with RMSE = 10.1522
(finish at: 2022-04-21 05:57:33)

Province [6/6]: Surat
(start at: 2022-04-21 05:57:33)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp']


100%|███████████████████████████████████████| 1464/1464 [25:29<00:00,  1.05s/it]


Test on SARIMAX with RMSE = 5.4867
(finish at: 2022-04-21 06:23:15)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 8.5169
-----------------------------------------------------------------
Total of 94248 predicted rows


### 0.2 (Baseline 2 - Change wind_dir to wind_dir_sin and wind_dir_cos)

In [30]:
#baseline 0.2
exog_columns_d = {
    'BKK': standard_exog_columns_2,
    'Chiangmai': standard_exog_columns_2,
    'Rayong': standard_exog_columns_2,
    'Saraburi': standard_exog_columns_2,
    'Khonkaen': standard_exog_columns_2,
    'Surat': standard_exog_columns_2
}

is_month_included_d = {
    'BKK': False, 'Chiangmai': False, 'Rayong': False, 'Saraburi': False, 'Khonkaen': False, 'Surat': False
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 12
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (0, 1, 1, s),
    'Surat': (0, 1, 1, s)    
}


province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_0_2 = 'submission0_2.csv'
all_predicted_0_2, df_submission_0_2 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_0_2)

Province [1/6]: BKK
(start at: 2022-04-21 10:59:12)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|███████████████████████████████████████| 1464/1464 [50:05<00:00,  2.05s/it]


Test on SARIMAX with RMSE = 6.1158
(finish at: 2022-04-21 11:49:45)

Province [2/6]: Chiangmai
(start at: 2022-04-21 11:49:45)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|███████████████████████████████████████| 1464/1464 [49:31<00:00,  2.03s/it]


Test on SARIMAX with RMSE = 9.7592
(finish at: 2022-04-21 12:39:39)

Province [3/6]: Rayong
(start at: 2022-04-21 12:39:39)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|███████████████████████████████████████| 1464/1464 [55:57<00:00,  2.29s/it]


Test on SARIMAX with RMSE = 7.5783
(finish at: 2022-04-21 13:36:02)

Province [4/6]: Saraburi
(start at: 2022-04-21 13:36:02)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|█████████████████████████████████████| 1464/1464 [1:00:28<00:00,  2.48s/it]


Test on SARIMAX with RMSE = 10.7516
(finish at: 2022-04-21 14:36:59)

Province [5/6]: Khonkaen
(start at: 2022-04-21 14:36:59)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|███████████████████████████████████████| 1464/1464 [56:31<00:00,  2.32s/it]


Test on SARIMAX with RMSE = 10.1081
(finish at: 2022-04-21 15:33:56)

Province [6/6]: Surat
(start at: 2022-04-21 15:33:56)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp']


100%|███████████████████████████████████████| 1464/1464 [39:44<00:00,  1.63s/it]


Test on SARIMAX with RMSE = 5.4792
(finish at: 2022-04-21 16:13:56)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 8.5271
-----------------------------------------------------------------
Total of 94248 predicted rows


### 1. Use month columns and some hotspot columns with standard_exog_columns (s = 4)

In [14]:
exog_columns_d = {
    'BKK': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Chiangmai': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Rayong': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Saraburi': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Khonkaen': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Surat': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 4]] + month_exog_columns
}

is_month_included_d = {
    'BKK': True, 'Chiangmai': True, 'Rayong': True, 'Saraburi': True, 'Khonkaen': True, 'Surat': True
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 4
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (1, 1, 1, s),
    'Surat': (0, 1, 1, 12)    
}

province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_1 = 'submission1.csv'
all_predicted_1, df_submission_1 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_1)

Province [1/6]: BKK
(start at: 2022-04-20 02:10:56)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 4)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [13:03<00:00,  1.87it/s]


Test on SARIMAX with RMSE = 7.9826
(finish at: 2022-04-20 02:24:15)

Province [2/6]: Chiangmai
(start at: 2022-04-20 02:24:15)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 4)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [15:53<00:00,  1.54it/s]


Test on SARIMAX with RMSE = 13.2276
(finish at: 2022-04-20 02:40:32)

Province [3/6]: Rayong
(start at: 2022-04-20 02:40:32)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 4)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [14:12<00:00,  1.72it/s]


Test on SARIMAX with RMSE = 9.9906
(finish at: 2022-04-20 02:55:06)

Province [4/6]: Saraburi
(start at: 2022-04-20 02:55:06)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 4)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [14:28<00:00,  1.69it/s]


Test on SARIMAX with RMSE = 11.6945
(finish at: 2022-04-20 03:09:57)

Province [5/6]: Khonkaen
(start at: 2022-04-20 03:09:57)
Parameters: order = (1, 0, 1), seasonal_order = (1, 1, 1, 4)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [15:24<00:00,  1.58it/s]


Test on SARIMAX with RMSE = 10.9698
(finish at: 2022-04-20 03:25:51)

Province [6/6]: Surat
(start at: 2022-04-20 03:25:51)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_p_malaysia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [39:41<00:00,  1.63s/it]


Test on SARIMAX with RMSE = 5.3740
(finish at: 2022-04-20 04:06:24)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 10.1866
-----------------------------------------------------------------
Total of 94248 predicted rows


### 2.

In [15]:
exog_columns_d = {
    'BKK': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Chiangmai': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Rayong': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Saraburi': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Khonkaen': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Surat': standard_exog_columns + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 4]] + month_exog_columns
}

is_month_included_d = {
    'BKK': True, 'Chiangmai': True, 'Rayong': True, 'Saraburi': True, 'Khonkaen': True, 'Surat': True
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 12
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (1, 1, 1, s),
    'Surat': (0, 1, 1, 12)    
}

province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_2 = 'submission2.csv'
all_predicted_2, df_submission_2 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_2)

Province [1/6]: BKK
(start at: 2022-04-20 04:06:24)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:10:18<00:00,  2.88s/it]


Test on SARIMAX with RMSE = 6.6816
(finish at: 2022-04-20 05:18:14)

Province [2/6]: Chiangmai
(start at: 2022-04-20 05:18:14)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:21:43<00:00,  3.35s/it]


Test on SARIMAX with RMSE = 12.0228
(finish at: 2022-04-20 06:41:27)

Province [3/6]: Rayong
(start at: 2022-04-20 06:41:27)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [59:46<00:00,  2.45s/it]


Test on SARIMAX with RMSE = 7.9307
(finish at: 2022-04-20 07:42:43)

Province [4/6]: Saraburi
(start at: 2022-04-20 07:42:43)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:00:03<00:00,  2.46s/it]


Test on SARIMAX with RMSE = 12.6007
(finish at: 2022-04-20 08:44:15)

Province [5/6]: Khonkaen
(start at: 2022-04-20 08:44:15)
Parameters: order = (1, 0, 1), seasonal_order = (1, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:16:55<00:00,  3.15s/it]


Test on SARIMAX with RMSE = 12.4214
(finish at: 2022-04-20 10:02:45)

Province [6/6]: Surat
(start at: 2022-04-20 10:02:45)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir', 'temp', 'hotspot_thailand', 'hotspot_p_malaysia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [46:52<00:00,  1.92s/it]


Test on SARIMAX with RMSE = 5.3740
(finish at: 2022-04-20 10:50:14)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 9.9270
-----------------------------------------------------------------
Total of 94248 predicted rows


### 3.

In [24]:
exog_columns_d = {
    'BKK': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Chiangmai': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Rayong': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Saraburi': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 2]] + month_exog_columns,
    'Khonkaen': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 1, 3]] + month_exog_columns,
    'Surat': standard_exog_columns_2 + [f'hotspot_{REGIONS[i].lower()}' for i in [0, 4]] + month_exog_columns
}

is_month_included_d = {
    'BKK': True, 'Chiangmai': True, 'Rayong': True, 'Saraburi': True, 'Khonkaen': True, 'Surat': True
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 12
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (1, 1, 1, s),
    'Surat': (0, 1, 1, s)    
}

province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_3 = 'submission3.csv'
all_predicted_3, df_submission_3 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_3)

Province [1/6]: BKK
(start at: 2022-04-20 12:26:25)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:19:33<00:00,  3.26s/it]


Test on SARIMAX with RMSE = 6.6024
(finish at: 2022-04-20 13:47:04)

Province [2/6]: Chiangmai
(start at: 2022-04-20 13:47:04)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:31:14<00:00,  3.74s/it]


Test on SARIMAX with RMSE = 12.0486
(finish at: 2022-04-20 15:19:20)

Province [3/6]: Rayong
(start at: 2022-04-20 15:19:20)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:18:50<00:00,  3.23s/it]


Test on SARIMAX with RMSE = 7.8922
(finish at: 2022-04-20 16:39:18)

Province [4/6]: Saraburi
(start at: 2022-04-20 16:39:18)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_cambodia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:13:17<00:00,  3.00s/it]


Test on SARIMAX with RMSE = 12.5936
(finish at: 2022-04-20 17:53:38)

Province [5/6]: Khonkaen
(start at: 2022-04-20 17:53:38)
Parameters: order = (1, 0, 1), seasonal_order = (1, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_myanmar', 'hotspot_lao_pdr', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:23:02<00:00,  3.40s/it]


Test on SARIMAX with RMSE = 12.4263
(finish at: 2022-04-20 19:17:49)

Province [6/6]: Surat
(start at: 2022-04-20 19:17:49)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'hotspot_thailand', 'hotspot_p_malaysia', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [57:31<00:00,  2.36s/it]


Test on SARIMAX with RMSE = 5.3786
(finish at: 2022-04-20 20:16:01)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 9.9183
-----------------------------------------------------------------
Total of 94248 predicted rows


### 4.

In [25]:
#don't use hotspot columns
exog_columns_d = {
    'BKK': standard_exog_columns_2 + month_exog_columns,
    'Chiangmai': standard_exog_columns_2 + month_exog_columns,
    'Rayong': standard_exog_columns_2 + month_exog_columns,
    'Saraburi': standard_exog_columns_2 + month_exog_columns,
    'Khonkaen': standard_exog_columns_2 + month_exog_columns,
    'Surat': standard_exog_columns_2 + month_exog_columns
}

is_month_included_d = {
    'BKK': True, 'Chiangmai': True, 'Rayong': True, 'Saraburi': True, 'Khonkaen': True, 'Surat': True
}


best_order_d = {
    'BKK': (1, 0, 1),
    'Chiangmai': (1, 0, 1),
    'Rayong': (1, 0, 1),
    'Saraburi': (1, 0, 1),
    'Khonkaen': (1, 0, 1),
    'Surat': (1, 0, 1)
}

s = 12
best_seasonal_order_d = {
    'BKK': (0, 1, 1, s),
    'Chiangmai': (0, 1, 1, s),
    'Rayong': (0, 1, 1, s),
    'Saraburi': (0, 1, 1, s),
    'Khonkaen': (0, 1, 1, s),
    'Surat': (0, 1, 1, s)    
}

province_arr = LOCS

# -----------------------------------------------------------------------------------------------------------

filename_4 = 'submission4.csv'
all_predicted_4, df_submission_4 = generate_result_on_all_provinces(province_arr, dfs_train, dfs_test, exog_columns_d, best_order_d, best_seasonal_order_d, is_month_included_d, filename_4)

Province [1/6]: BKK
(start at: 2022-04-20 20:58:23)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:10:36<00:00,  2.89s/it]


Test on SARIMAX with RMSE = 6.0023
(finish at: 2022-04-20 22:10:08)

Province [2/6]: Chiangmai
(start at: 2022-04-20 22:10:08)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|█████████████████████████████████████| 1464/1464 [1:08:39<00:00,  2.81s/it]


Test on SARIMAX with RMSE = 9.8863
(finish at: 2022-04-20 23:19:54)

Province [3/6]: Rayong
(start at: 2022-04-20 23:19:54)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [56:08<00:00,  2.30s/it]


Test on SARIMAX with RMSE = 7.5922
(finish at: 2022-04-21 00:17:18)

Province [4/6]: Saraburi
(start at: 2022-04-21 00:17:18)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [51:03<00:00,  2.09s/it]


Test on SARIMAX with RMSE = 10.6020
(finish at: 2022-04-21 01:09:18)

Province [5/6]: Khonkaen
(start at: 2022-04-21 01:09:18)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [46:31<00:00,  1.91s/it]


Test on SARIMAX with RMSE = 10.3298
(finish at: 2022-04-21 01:56:39)

Province [6/6]: Surat
(start at: 2022-04-21 01:56:39)
Parameters: order = (1, 0, 1), seasonal_order = (0, 1, 1, 12)
Exog Columns: ['wind_speed', 'wind_dir_sin', 'wind_dir_cos', 'temp', 'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10', 'month_11', 'month_12']


100%|███████████████████████████████████████| 1464/1464 [32:45<00:00,  1.34s/it]


Test on SARIMAX with RMSE = 5.3518
(finish at: 2022-04-21 02:29:55)

-----------------------------------------------------------------
Final test on SARIMAX with RMSE = 8.5396
-----------------------------------------------------------------
Total of 94248 predicted rows
