In [1]:
import pandas as pd
import holidays
import numpy as np

from statsforecast.models import ARIMA

from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error,mean_squared_error
import gc
from statsforecast.models import AutoARIMA

  from tqdm.autonotebook import tqdm


In [2]:
df=pd.read_csv('/home/wapun/Documents/demand/collection/Load.csv')
df.rename({'Unnamed: 0':'Time'},axis=1,inplace=True)
df['Time']=pd.to_datetime(df.Time,utc=True)
df.set_index('Time',inplace=True,drop=True)
df.index=df.index.tz_convert('Europe/Warsaw')


In [3]:
holidaysPL=holidays.PL()
df['date'] = df.index.date
df['holiday']=df.date.transform(lambda x: x in holidaysPL)
df.drop('date',axis=1,inplace=True)

In [4]:
np.unique(df.loc[(df.holiday==True)&(df.index.dayofweek==5)].index.date)

array([datetime.date(2015, 8, 15), datetime.date(2015, 12, 26),
       datetime.date(2017, 11, 11), datetime.date(2018, 1, 6),
       datetime.date(2020, 8, 15), datetime.date(2020, 12, 26),
       datetime.date(2021, 5, 1), datetime.date(2021, 12, 25),
       datetime.date(2022, 1, 1), datetime.date(2023, 11, 11),
       datetime.date(2024, 1, 6)], dtype=object)

In [5]:
def dataSplit(dataframe,end_train):
    year=pd.Timedelta(365,'d')
    train_data=df.loc[dataframe.index<end_train]
    test_data=df.loc[(dataframe.index>=end_train)&(dataframe.index<end_train+year)]
    return train_data,test_data


In [6]:
df['TD']=df.Load-df.Load.shift(168)+df.Load.shift(192)-df.Load.shift(24)
df['TG']=df.Load-df.Load.shift(168)+df.Load.shift(169)-df.Load.shift(1)
df.dropna(inplace=True)

In [7]:
end_train=pd.to_datetime('2022-02-28 10:00',utc=True)
trainData,testData=dataSplit(df,end_train)
day=pd.to_timedelta(1,'day')


In [8]:
trainData

Unnamed: 0_level_0,PSEforecast,Load,holiday,TD,TG
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-10 00:00:00+01:00,17000.0,17109.375,False,-2279.250,-48.125
2015-01-10 01:00:00+01:00,16200.0,16438.150,False,-2234.775,-46.600
2015-01-10 02:00:00+01:00,15800.0,16122.300,False,-2148.525,-8.800
2015-01-10 03:00:00+01:00,15700.0,15987.900,False,-2082.875,26.000
2015-01-10 04:00:00+01:00,15700.0,15951.900,False,-2266.525,-220.775
...,...,...,...,...,...
2022-02-28 06:00:00+01:00,21950.0,21262.988,False,328.313,59.112
2022-02-28 07:00:00+01:00,23250.0,22960.638,False,90.263,-331.862
2022-02-28 08:00:00+01:00,23550.0,23395.888,False,-369.699,-878.925
2022-02-28 09:00:00+01:00,23500.0,23273.788,False,-575.663,-693.263


In [9]:
pVal=[0,1,2,3,4,6,8,10,16,24]
dVal=[0,1]
qVal=[0,1,2,3,4,6,8,10,16,24]
orders=[]
testMAE=[]
testMSE=[]
testMAPE=[]
trainMAE=[]
trainMSE=[]
trainMAPE=[]

In [10]:
def grid(column,ex): #enter string contaning names of load data and exogoneus variable
    
    try:
        for d in dVal:
            for p in pVal:
                for q in qVal:
                    
                    order=[p,d,q]
                    
                    model=ARIMA(order=order,freq='H')
                    result=model.fit(trainData[column],exog=trainData[ex])

                        
                    trainPredictions = result.get_prediction(start=0, end=len(trainData)-1, exog=trainData[ex])
                    testPredictions=result.get_forecast(steps=len(testData), exog=testData[ex])

                    trainMSE.append(mean_squared_error(trainPredictions.predicted_mean,trainData[column]))
                    testMSE.append(mean_squared_error(testPredictions.predicted_mean,testData[column]))

                    trainMAE.append(mean_absolute_error(trainPredictions.predicted_mean,trainData[column]))
                    testMAE.append(mean_absolute_error(testPredictions.predicted_mean,testData[column]))
                    
                    trainMAPE.append(mean_absolute_percentage_error(trainPredictions.predicted_mean,trainData[column]))
                    testMAPE.append(mean_absolute_percentage_error(testPredictions.predicted_mean,testData[column]))
                    orders.append(order)
                    del trainPredictions, testPredictions, result
                    gc.collect()
                    print(order)
                    

    except:
        with open ('error.txt','a') as f:
            f.write(str(p)+str(d)+str(q)+'\n')
    data = {
    'order':orders,
    'trainMAPE': trainMAPE,
    'trainMSE': trainMSE,
    'trainMAE': trainMAE,
    'testMAPE':testMAPE,
    'testMSE': testMSE,
    'testMAE': testMAE}
    nameCsv=column+'metrics.csv'
    dataframe=pd.DataFrame(data)
    dataframe.to_csv(nameCsv)
    return dataframe

In [11]:
grid('TD','holiday')

Unnamed: 0,order,trainMAPE,trainMSE,trainMAE,testMAPE,testMSE,testMAE


In [12]:
grid('TG','holiday')

Unnamed: 0,order,trainMAPE,trainMSE,trainMAE,testMAPE,testMSE,testMAE
