In [93]:
import pandas as pd
import numpy as np
from math import sqrt
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [87]:
def mape_vectorized_v2(a, b): 
    mask = a != 0
    return (np.fabs(a - b)/a)[mask].mean()

In [52]:
df = pd.read_csv('datasets/pvdaq_2012_2014_hourly.csv', header=0, infer_datetime_format=True, parse_dates=['Date-Time'], index_col=['Date-Time'])

In [62]:
train_start,train_end = '2012-01-01','2012-09-30'
test_start,test_end = '2012-10-01','2012-12-31'
train = df['dc_power'][train_start:train_end].dropna()
test = df['dc_power'][test_start:test_end].dropna()

In [94]:
# ARIMA EVALUATION
arima = sm.tsa.SARIMAX(train,order=(7,1,7),freq='h',seasonal_order=(0,0,0,0),
                                 enforce_stationarity=False, enforce_invertibility=False)
arima_res = arima.fit()
arima_res.summary()


pred = arima_res.predict(train_end,test_end)[1:]
print('ARIMA model MSE:{}, MAE:{}, RMSE:{}'.format(mean_squared_error(test,pred), mean_absolute_error(test,pred), sqrt(mean_squared_error(test,pred))))





ARIMA model MSE:89573462.92861955, MAE:8712.773163146117, RMSE:9464.325804230302


In [111]:
# Holt-Winters' trend and seasonality
mod_hw = sm.tsa.ExponentialSmoothing(train)
mod_hw_res = mod_hw.fit()
mod_hw_res.summary()


pred_mod_hw = mod_hw_res.predict(train_end,test_end)[1:]
print('Holt-Winters model MSE:{}, MAE:{}, RMSE:{}'.format(mean_squared_error(test,pred_mod_hw), mean_absolute_error(test,pred_mod_hw), sqrt(mean_squared_error(test,pred_mod_hw))))



Holt-Winters model MSE:117929125.05661231, MAE:5863.552989130435, RMSE:10859.517717496128


