In [1]:
import pandas as pd
from textblob import TextBlob
import math
from sklearn.preprocessing import MinMaxScaler
from matplotlib.pyplot import MultipleLocator
import matplotlib.pyplot as plt
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from tqdm import tqdm
from statsmodels.tsa.statespace.sarimax import SARIMAX

#define RMSE
# l1-true,l2-false
def RMSE(l1, l2):
    length = len(l1)
    sum = 0
    for i in range(length):
        sum = sum + np.square(l1[i] - l2[i])
    return math.sqrt(sum / length)


#define MAE
def MAE(l1, l2):
    n = len(l1)
    l1 = np.array(l1)
    l2 = np.array(l2)
    mae = sum(np.abs(l1 - l2)) / n
    return mae


#def MAPE
def MAPE(l1, l2):
    n = len(l1)
    l1 = np.array(l1)
    l2 = np.array(l2)
    for i in range(len(l1)):
        if l1[i] == 0:
            l1[i] = 0.01
    mape = sum(np.abs((l1 - l2) / l1)) / n
    return mape





In [2]:
df_price = pd.read_csv("integrated_series.csv")[["date","normalized_price"]]
df_price.date = pd.to_datetime(df_price.date)

In [6]:
df_price.head()

Unnamed: 0,date,normalized_price
0,2011-03-29,0.896216
1,2011-03-30,0.890023
2,2011-03-31,0.917431
3,2011-04-01,0.933028
4,2011-04-04,0.939794


In [18]:
size=int(df_price.shape[0]* 2 / 3)
train_set=df_price.iloc[:size]['normalized_price']
test_set=df_price.iloc[size:]['normalized_price']

In [69]:
df=pd.DataFrame(columns=["order1","order2","order3","rmse","mae","mape"])
for i in range(1,5):
    for j in range(3):
        for k in range(1,5):
            #print(f"order: {(i,j,k)}")
            model = SARIMAX(train_set,order=(i,j,k),seasonal_order=(0,0,0,0),exog = None,enforce_stationarity=False, enforce_invertibility=False)

            result = model.fit()

            pred_h1=result.predict(start=size-1,end=df_price.shape[0]-1-1)
            rmse_h1 = RMSE(test_set.tolist(),pred_h1.tolist())
            mae_h1 = MAE(test_set.tolist(),pred_h1.tolist())
            mape_h1 = MAPE(test_set.tolist(),pred_h1.tolist())
            #print(f"rmse_h1={rmse_h1}\n mae_h1 ={mae_h1}\n mape_h1 ={mape_h1}\n")
            df.loc[len(df)]=[i,j,k,rmse_h1,mae_h1,mape_h1]
            pred_h2=result.predict(start=size-2,end=df_price.shape[0]-1-2)
            rmse_h2 = RMSE(test_set.tolist(),pred_h2.tolist())
            mae_h2 = MAE(test_set.tolist(),pred_h2.tolist())
            mape_h2 = MAPE(test_set.tolist(),pred_h2.tolist())
            #print(f"rmse_h2={rmse_h2}\n mae_h2 ={mae_h2}\n mape_h2 ={mape_h2}\n")

            pred_h3=result.predict(start=size-3,end=df_price.shape[0]-1-3)
            rmse_h3 = RMSE(test_set.tolist(),pred_h3.tolist())
            mae_h3 = MAE(test_set.tolist(),pred_h3.tolist())
            mape_h3 = MAPE(test_set.tolist(),pred_h3.tolist())
            #print(f"rmse_h3={rmse_h3}\n mae_h3 ={mae_h3}\n mape_h3 ={mape_h3}\n")



In [70]:
df.head()

Unnamed: 0,order1,order2,order3,rmse,mae,mape
0,1.0,0.0,1.0,0.215551,0.179058,0.468524
1,1.0,0.0,2.0,0.217168,0.18061,0.472996
2,1.0,0.0,3.0,0.218023,0.181403,0.475249
3,1.0,0.0,4.0,0.218732,0.182092,0.477247
4,1.0,1.0,1.0,0.161785,0.127811,0.323588


In [71]:
rmselist = np.array(df["rmse"])
maelist = np.array(df["mae"])
mapelist = np.array(df["mape"])
newlist = (rmselist + maelist + mapelist) / 3
newlist = list(newlist)
index = newlist.index(min(newlist))

In [78]:
optimal_order=(int(df.iloc[index].order1),int(df.iloc[index].order2),int(df.iloc[index].order3))

In [79]:
model = SARIMAX(train_set,order=optimal_order,seasonal_order=(0,0,0,0),exog = None,
                                        enforce_stationarity=False, enforce_invertibility=False)

result = model.fit()

pred_h1=result.predict(start=size-1,end=df_price.shape[0]-1-1)
rmse_h1 = RMSE(test_set.tolist(),pred_h1.tolist())
mae_h1 = MAE(test_set.tolist(),pred_h1.tolist())
mape_h1 = MAPE(test_set.tolist(),pred_h1.tolist())
print(f"rmse_h1={rmse_h1}\n mae_h1 ={mae_h1}\n mape_h1 ={mape_h1}\n")

pred_h2=result.predict(start=size-2,end=df_price.shape[0]-1-2)
rmse_h2 = RMSE(test_set.tolist(),pred_h2.tolist())
mae_h2 = MAE(test_set.tolist(),pred_h2.tolist())
mape_h2 = MAPE(test_set.tolist(),pred_h2.tolist())
print(f"rmse_h2={rmse_h2}\n mae_h2 ={mae_h2}\n mape_h2 ={mape_h2}\n")

pred_h3=result.predict(start=size-3,end=df_price.shape[0]-1-3)
rmse_h3 = RMSE(test_set.tolist(),pred_h3.tolist())
mae_h3 = MAE(test_set.tolist(),pred_h3.tolist())
mape_h3 = MAPE(test_set.tolist(),pred_h3.tolist())
print(f"rmse_h3={rmse_h3}\n mae_h3 ={mae_h3}\n mape_h3 ={mape_h3}\n")

rmse_h1=0.16160273602544462
 mae_h1 =0.1276106357918228
 mape_h1 =0.3230088934490065

rmse_h2=0.16160296179498784
 mae_h2 =0.12761579051287833
 mape_h2 =0.3230334033734224

rmse_h3=0.16160461700948334
 mae_h3 =0.12763280582008812
 mape_h3 =0.32311616169518714



