Time-series forecasting from IoT Home Automation Data.

Tuomas Eerola - 2019

Data source: https://github.com/eerolat/home-automation-data-logger

# Run the following cell.

Download actual sensor data. 

In [0]:
import pandas as pd

from urllib.request import urlopen

log_url = "http://eerola.dy.fi/temp/temperature.log"

series_own = pd.read_csv(log_url, sep=" ", parse_dates=[[0, 1]])
series_own.columns=['Date Time', 'SourceInfo1', 'SourceInfo2', 'MeasurementInfo1', 'Temp', 'MeasurementInfo2', 'Measurement2']
#series_own.insert(6, "Target", "NaN")
dropcolumns = ['SourceInfo1', 'SourceInfo2', 'MeasurementInfo1', 'MeasurementInfo2', 'Measurement2']
series_own.drop(dropcolumns, inplace=True, axis=1)
series_own.set_index('Date Time', inplace=True)

series = series_own.reset_index()
series = series.drop_duplicates(subset='Date Time', keep='last')
series = series.set_index('Date Time')
series = series.resample('H').bfill()

print ("Data loading ready.")

# Visualize the data to see what we've got.

In [0]:
from matplotlib import pyplot


series.plot()
pyplot.show()

Edit the following numbers to your preference.

In [0]:
series_days = 7

prediction_days = 1

In [0]:
import statsmodels.api as sm
import itertools

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

y = series.tail(series_days*24)

cols = ['aic', 'param', 'param_seasonal']
aics = pd.DataFrame(columns = cols)

for param in pdq:
  for param_seasonal in seasonal_pdq:
    try:
      mod = sm.tsa.statespace.SARIMAX(y,order=param,seasonal_order=param_seasonal,enforce_stationarity=False,enforce_invertibility=False)
      results = mod.fit()
      aics = aics.append({'aic': results.aic, 'param': param, 'param_seasonal': param_seasonal},ignore_index=True)
      
    except: 
      continue

aics = aics.sort_values(by=['aic'])
print(aics.head(1))

Fitting SARIMAX model.

Update the parameters on the next cell to the ones presented above. 

In [0]:
mod = sm.tsa.statespace.SARIMAX(y,
                                order=(0, 1, 1),
                                seasonal_order=(1, 0, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()
print(results.summary().tables[1])

See the model fit.

In [0]:
import datetime

pred_fit_fit = results.get_prediction(start=y.index[-prediction_days*24], dynamic=False)

pred_ci_fit = pred.conf_int()

ax = y.plot(label='observed')
pred_fit_fit.predicted_mean.plot(ax=ax, label='Forecasted', alpha=.7, figsize=(14, 4))

ax.fill_between(pred_ci_fit.index, pred_ci_fit.iloc[:, 0], pred_ci_fit.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')

pyplot.legend()
pyplot.show()

pred_uc = results.get_forecast(steps=prediction_days*12)
pred_ci = pred_uc.conf_int()
ax = y.plot(label='observed', figsize=(14, 4))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')

pyplot.legend()
pyplot.show()

Draw some cool statistics.

In [0]:
from matplotlib import pyplot

results.plot_diagnostics(figsize=(18, 8))

pyplot.show()