In [24]:
# Evaluate a univariate persistence model
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot as plt
from pandas import concat
import numpy as np
import scipy.stats as stats
import pandas as pd

plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120})


In [11]:
# Plot the forecasts in the context of the original dataset
def plot_forecasts(series, forecasts, test):
    # Plot the entire dataset in blue
    plt.plot(series)
    # Plot the forecasts in red
    for i in range(len(forecasts)):
        # Start offset for x-axis
        off_s = len(series) - len(test) + i - 1
        # End offset for x-axis
        off_e = off_s + len(forecasts[i]) + 1
        # Indexes of x-axis for multi-step forecasts
        xaxis = [x for x in range(off_s, off_e)]
        # Values of muti-step fotecasts
        yaxis = [series[off_s]] + forecasts[i]  
        plt.plot(xaxis, yaxis, color='red')
    # Show the final plot
    plt.show()          

In [12]:
# Evaluate the RMSE for each forecast time step
def evaluate_forecasts(test, forecasts, n_steps):
    for i in range(n_steps):
        # Picking the values from multi-step forecasts
        ypred_ts = [forecast[i] for forecast in forecasts]
        # Sliding window on test series
        ytrue_ts = test[i:len(ypred_ts)+i]
        print('t+%d RMSE: %f' % ((i+1), sqrt(mean_squared_error(ytrue_ts, ypred_ts))))

In [13]:
# Make a multi-step persistence forecast
def persistence(last_ob, n_steps):
    return [last_ob for i in range(n_steps)]


In [25]:
# Load data
train = read_csv('2000_2021_Brazilian_North_Region_hourly_MWmed_hydroelectric_power_series_train.csv',
                     header=0, index_col=0, parse_dates=True, squeeze=True)
test = read_csv('2000_2021_Brazilian_North_Region_hourly_MWmed_hydroelectric_power_series_test.csv',
                     header=0, index_col=0, parse_dates=True, squeeze=True)

In [22]:
# Concatenate train/test series for plotting
series = concat([train, test], axis=0)
# Prepare data
test = test.values
series = series.values
train = [x for x in train]
# Number of multi-step
n_steps = 24
# Store multi-step forecasts
forecasts = list()
# Number of multi-step forecasts
n_forecasts = len(test) - n_steps + 1
# Walk-forward validation
for i in range(n_forecasts):
    # Make a multi-step forecast
    last_ob = train[-1]
    yhat = persistence(last_ob, n_steps)
    # Store the multi-step forecast
    forecasts.append(yhat)
    # Add actual observation to train for the next loop
    obs = test[i]
    train.append(obs)
# Evaluate the forecasts
evaluate_forecasts(test, forecasts, n_steps)
# Plot the forecasts
#plot_forecasts(series, forecasts, test)

NameError: name 'pd' is not defined

In [15]:
train = np.log(train)
test = np.log(test)

In [16]:
# Concatenate train/test series for plotting
series = concat([train, test], axis=0)
# Prepare data
test = test.values
series = series.values
train = [x for x in train]
# Number of multi-step
n_steps = 24
# Store multi-step forecasts
forecasts = list()
# Number of multi-step forecasts
n_forecasts = len(test) - n_steps + 1
# Walk-forward validation
for i in range(n_forecasts):
    # Make a multi-step forecast
    last_ob = train[-1]
    yhat = persistence(last_ob, n_steps)
    # Store the multi-step forecast
    forecasts.append(yhat)
    # Add actual observation to train for the next loop
    obs = test[i]
    train.append(obs)
# Evaluate the forecasts
evaluate_forecasts(test, forecasts, n_steps)
# Plot the forecasts
#plot_forecasts(series, forecasts, test)


t+1 RMSE: 0.066453
t+2 RMSE: 0.113188
t+3 RMSE: 0.149938
t+4 RMSE: 0.181339
t+5 RMSE: 0.208645
t+6 RMSE: 0.232401
t+7 RMSE: 0.252269
t+8 RMSE: 0.268171
t+9 RMSE: 0.280302
t+10 RMSE: 0.289111
t+11 RMSE: 0.295021
t+12 RMSE: 0.298087
t+13 RMSE: 0.298341
t+14 RMSE: 0.295985
t+15 RMSE: 0.291049
t+16 RMSE: 0.283365
t+17 RMSE: 0.272861
t+18 RMSE: 0.259569
t+19 RMSE: 0.244034
t+20 RMSE: 0.227568
t+21 RMSE: 0.211249
t+22 RMSE: 0.196458
t+23 RMSE: 0.184781
t+24 RMSE: 0.180400


In [26]:
train = stats.boxcox(train)[0]
test = stats.boxcox(test)[0]

In [29]:
# Concatenate train/test series for plotting
series = concat([pd.Series(train), pd.Series(test)], axis=0)
train = [x for x in train]
# Number of multi-step
n_steps = 24
# Store multi-step forecasts
forecasts = list()
# Number of multi-step forecasts
n_forecasts = len(test) - n_steps + 1
# Walk-forward validation
for i in range(n_forecasts):
    # Make a multi-step forecast
    last_ob = train[-1]
    yhat = persistence(last_ob, n_steps)
    # Store the multi-step forecast
    forecasts.append(yhat)
    # Add actual observation to train for the next loop
    obs = test[i]
    train.append(obs)
# Evaluate the forecasts
evaluate_forecasts(test, forecasts, n_steps)
# Plot the forecasts
#plot_forecasts(series, forecasts, test)


t+1 RMSE: 1.119174
t+2 RMSE: 1.119185
t+3 RMSE: 1.119194
t+4 RMSE: 1.119218
t+5 RMSE: 1.119238
t+6 RMSE: 1.119215
t+7 RMSE: 1.119221
t+8 RMSE: 1.119230
t+9 RMSE: 1.119245
t+10 RMSE: 1.119243
t+11 RMSE: 1.119241
t+12 RMSE: 1.119224
t+13 RMSE: 1.119209
t+14 RMSE: 1.119188
t+15 RMSE: 1.119176
t+16 RMSE: 1.119143
t+17 RMSE: 1.119149
t+18 RMSE: 1.119156
t+19 RMSE: 1.119151
t+20 RMSE: 1.119153
t+21 RMSE: 1.119165
t+22 RMSE: 1.119178
t+23 RMSE: 1.119241
t+24 RMSE: 1.119258
