# Data Forecasting

The module is in charge of: 
- forecasting the value of temperature in the next X seconds; 
- forecasting the value of humidity in the next X seconds; 
- forecasting the value of the gas sensor in the next X seconds 

(X is defined by users or developers). 

The time-series of the predicted values must be saved on INFLUX and shown on the GRAFANA dashboard as well; e.g. by using two lines for the temperature graph, one related to raw measurements, one to predictions. One or multiple techniques for time-series forecasting can be developed and compared.


## Import Dependencies

In [1]:
import os
import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import random

# Data retrival from influx

In [None]:
!pip3 install influxdb_client
!pip install statsmodels

In [3]:
from influxdb_client import InfluxDBClient, Point
from influxdb_client.client.write_api import SYNCHRONOUS
token = '0-boCREZ1XTzYhmT3RYogxFDtraRzOEZsMYNxjp0mBOWHCif47lUv2UddrAfyJUwupk33ci92-aHHFWhjv2pRg=='
bucket = 'temperature'
org = 'iot_group'
client = InfluxDBClient(url="http://192.168.43.177:8086", token=token, org=org)
query_api = client.query_api()
write_api = client.write_api(write_options=SYNCHRONOUS)

In [4]:
query = 'from(bucket: "temperature")' \
  '|> range(start:2022-08-10T15:00:00Z, stop:2022-09-20T15:00:00Z)'

In [5]:
result = client.query_api().query(org=org, query=query)

-Exploiting the query results in order to obtain a dataframe

## Implementation Temperature Predictor

### ARIMA

In [None]:
dataset_path = "../datasets/dataset1.csv"
df = pd.read_csv(dataset_path)
df.head()

In [None]:
df.head()

In [None]:
#evolution over time of our data
plot_cols = ['_value']
plot_features = df[plot_cols]
plot_features.index = df['_time']
print(df.to_string())
_ = plot_features.plot(subplots=True)



In [None]:
df.describe()

In [65]:
nrows = (len(df.values))
splitPoint = int (nrows * 0.60)
train = df['_value'] [:splitPoint]
test = df ['_value'] [splitPoint:]

In [None]:
 # Check whether the time-series is stationary through the Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
result = adfuller(train)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
train_new = train.diff().dropna()
result = adfuller(train_new)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
from statsmodels.graphics import tsaplots
fig = tsaplots.plot_acf(train_new,lags=10)
plt.show()

In [None]:
fig = tsaplots.plot_pacf(train_new, lags=10)
plt.show()

In [None]:
#Step 6; Apply ARIMA model
from statsmodels.tsa.arima.model import ARIMA
history = [x for x in train]
predictions = list()
print(test)
for t in test.index:
  model = ARIMA(history, order=(0,0,1))
  model_fit = model. fit()
  output = model_fit.forecast()
  yest = output[0]
  predictions.append(yest)
  obs= test[t]
  history.append(obs)
  print ('predicted=%f, expected=%f' % (yest, obs))

In [None]:
import math
from sklearn.metrics import mean_squared_error

rmse = math.sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f'% rmse)

df2 = pd. DataFrame (predictions)
df2. set_index(test.index, inplace=True)
plt.plot(test)
plt.plot(df2, color='red')
plt.show()

## Implementation Humidity Predictor

In [None]:
dataset_path = "/content/drive/MyDrive/Datasets/Forecasting/hum.csv"
df = pd.read_csv(dataset_path)
df.head()

In [None]:
df = df.rename(columns={"409151bfa0cc" : 'time', "409151bfa0cc.1" : 'y'})
df.head()

In [None]:
#evolution over time of our data
plot_cols = ['y']
plot_features = df[plot_cols]
_ = plot_features.plot(subplots=True)

In [None]:
df.describe().transpose()

In [None]:
nrows = (len(df.values))
splitPoint = int (nrows * 0.60)
train = df['y'] [:splitPoint]
test = df ['y'] [splitPoint:]

In [None]:
 # Check whether the time-series is stationary through the Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
result = adfuller(train)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
train_new = train.diff().dropna()
result = adfuller(train_new)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
fig = tsaplots.plot_acf(train_new, lags=10)
plt.show()

In [None]:
fig = tsaplots.plot_pacf(train_new, lags=10)
plt.show()

In [None]:
history = [x for x in train]
predictions = list()
print(test)
for t in test.index:
  model = ARIMA(history, order=(1,1,1))
  model_fit = model.fit()
  output = model_fit.forecast()
  yest = output[0]
  predictions.append(yest)
  obs= test[t]
  history.append(obs)
  print ('predicted=%f, expected=%f' % (yest, obs))

In [None]:
rmse = math.sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f'% rmse)

df2 = pd.DataFrame(predictions)
df2. set_index(test.index, inplace=True)
plt.plot(test)
plt.plot(df2, color='red')
plt.show()

## Implement Gas Predictor

In [None]:
dataset_path = "/content/drive/MyDrive/Datasets/Forecasting/gas.csv"
df = pd.read_csv(dataset_path)

In [None]:
df.head()

In [None]:
df['y'].size

In [None]:
df['y'][0:4288] = 4095

In [None]:
#evolution over time of our data
plot_cols = ['y']
plot_features = df[plot_cols]
_ = plot_features.plot(subplots=True)

In [None]:
df['y'][3000:4000] = [4095 for i in range(0,1000)]

In [None]:
df.describe().transpose()

In [None]:
nrows = (len(df.values))
splitPoint = int (nrows * 0.50)
train = df['y'] [:splitPoint]
test = df ['y'] [splitPoint:]

Check for stationarity 

In [None]:
 # Check whether the time-series is stationary through the Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller
result = adfuller(train)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
train_new = train.diff().dropna()
result = adfuller(train_new)

print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

In [None]:
from statsmodels.graphics import tsaplots
fig = tsaplots.plot_acf(train, lags=10)
plt.show()

In [None]:
fig = tsaplots.plot_pacf(train, lags=10)
plt.show()

In [None]:
# Apply ARIMA model
from statsmodels.tsa.arima_model import ARIMA
history = list(np.array([x for x in train_new]).astype('float64'))
predictions = list()
print(test)
for t in test.index:
  model = ARIMA(history, order=(1,1,1))
  model_fit = model.fit()
  output = model_fit.forecast()
  yest = output[0]
  predictions.append(yest)
  obs= test[t]
  history.append(obs)
  print ('predicted=%f, expected=%f' % (yest, obs))

In [None]:
import math
from sklearn.metrics import mean_squared_error

rmse = math.sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f'% rmse)

df2 = pd. DataFrame (predictions)
df2. set_index(test.index, inplace=True)
plt.plot(test)
plt.plot(df2, color='red')
plt.show()

In [None]:
plt.plot(test)


In [None]:
import pickle
pickle.dump(model_fit, open('gas-model', 'wb'))