In [1]:
# Import packages.
import pandas as pd
from PyForePa import PyForePa

In [2]:
# Read in data for temperature in kitchen area. 
# Readings taken every ten minutes for 4.5 months. 
# Readings in Celsius.
df = pd.read_csv(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv',
    usecols=['T1']
)

In [3]:
# Describe the temperature data.
df.describe()

Unnamed: 0,T1
count,19735.0
mean,21.686571
std,1.606066
min,16.79
25%,20.76
50%,21.6
75%,22.6
max,26.26


In [4]:
# Make a PyForePa object called "data".
data = PyForePa(df['T1'])

In [5]:
# Get predictions for the next 6 steps using a drift model.
# This is equivalent to the next 60 minutes of our series.
y_pred = data.drift_forecast(h=6).y_pred
print(y_pred)

[25.50028428 25.50056856 25.50085284 25.50113712 25.5014214  25.50170569]


In [6]:
# Get the mean of predictions for the next 6 steps.
mean_y_pred = y_pred.mean()
print(mean_y_pred)

25.500994983277593


In [7]:
# Alternatively...
mean_y_pred = data.drift_forecast(h=6).y_pred.mean()
print(mean_y_pred)

25.500994983277593


In [8]:
# Get the upper bound of those predictions.
ub_y_pred = data.drift_forecast(h=6).y_pred_ub
print(ub_y_pred)

[25.58060464 25.61415871 25.63997179 25.66177785 25.68102319 25.69844959]


In [9]:
# Take the first 19000 observations as training data.
train_set = PyForePa(df['T1'].head(-1735))

In [10]:
# Set aside the remaining 1735 observations as ground truth.
y_true = df['T1'].tail(1735)

In [11]:
# Predict final 1735 observations using a naive model and calculate rmse by comparing
# predictions to ground truth.
naive_rmse = train_set.naive_forecast(h=len(y_true)).root_mean_squared_error(y_true)
print(naive_rmse)

0.7329037171660147


In [12]:
# Alternatively...
naive_preds = train_set.naive_forecast(h=len(y_true))
naive_rmse = naive_preds.root_mean_squared_error(y_true)
print(naive_rmse)

0.7329037171660147


In [13]:
# Compare naive model to a historical mean model. Loooks like the naive model does better.
mean_rmse = train_set.mean_forecast(h=len(y_true)).root_mean_squared_error(y_true)
print(mean_rmse)

2.4209621915926416
