In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
data=pd.read_csv('GEFCom2014N.csv').dropna()
data['load']=data['load']/10000
data['T']=data['T']/100
data

Unnamed: 0,load,T,Datetime
17544,0.3010,0.2267,2006-01-01 01:00:00
17545,0.2853,0.2067,2006-01-01 02:00:00
17546,0.2758,0.2133,2006-01-01 03:00:00
17547,0.2705,0.1900,2006-01-01 04:00:00
17548,0.2709,0.1933,2006-01-01 05:00:00
...,...,...,...
96427,0.4012,0.1800,2014-12-31 20:00:00
96428,0.3856,0.1667,2014-12-31 21:00:00
96429,0.3671,0.1700,2014-12-31 22:00:00
96430,0.3499,0.1533,2014-12-31 23:00:00


In [3]:
# Convert timestamp to datetime format
data['Datetime'] = data['Datetime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

# Set the datetime as index
data.set_index('Datetime', inplace=True)

# Resample the data to hourly frequency
data = data.resample('H').mean()

# Create lag features
for i in range(1, 25):
    data['lag_{}'.format(i)] = data['load'].shift(i)

# Create rolling mean and standard deviation features
data['rolling_mean'] = data['load'].rolling(window=24).mean()
data['rolling_std'] = data['load'].rolling(window=24).std()

# Create weekday and hour features
data['weekday'] = data.index.weekday
data['hour'] = data.index.hour

# Remove missing values
data.dropna(inplace=True)

# Split the data into train and test sets
train_size = int(len(data) * 0.8)
train_data = data[:train_size]
test_data = data[train_size:]

In [4]:
#ARIMA
from statsmodels.tsa.arima.model import ARIMA

# Fit the ARIMA model
model = ARIMA(train_data['load'], order=(5, 1, 0))
model_fit = model.fit()

# Make predictions on the test set
predictions = model_fit.predict(start=len(train_data), end=len(data)-1, typ='levels')

# Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(test_data['load'], predictions)
mse = mean_squared_error(test_data['load'], predictions)
rmse = np.sqrt(mse)

print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)
ypred=predictions
y_test=test_data['load']
y_arima=ypred

#For MAE
mae=np.mean(np.abs(ypred-y_test))
print('MAE = {:.2f} (%)'.format(mae))

#For MAPE
def mean_absolute_percentage_error(y_test, ypred): 
    return np.mean(np.abs((y_test - ypred)/y_test))*100.
mape = mean_absolute_percentage_error(y_test, ypred)
print('MAPE = {:.2f} (%)'.format(mape))

# For RMSE
from sklearn.metrics import mean_squared_error 
mse=mean_squared_error(y_test,ypred) 
rsme=np.sqrt(mse) 
print('RSME = {:.2f} (%)'.format(rsme))

MAE: 0.06853807750292464
MSE: 0.006906150144835055
RMSE: 0.08310324990537407
MAE = 0.07 (%)
MAPE = 23.79 (%)
RSME = 0.08 (%)
