# Data Forecasting with prophet


# PROPHET PREDICTION

## TESTO

## Import Dependencies

In [None]:
!pip3 install chart_studio
!pip3 install pystan
!pip3 install fbprophet
!pip3 install glob2
!pip3 install python-dateutil

## Import Libraries

In [None]:
import os
import itertools
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import time
import datetime 
from datetime import timedelta
import math
import numpy as np
import scipy.stats as st
from fbprophet import Prophet
from influxdb_client import InfluxDBClient
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from statsmodels.tsa.stattools import adfuller
from influxdb_client import InfluxDBClient, Point, WriteOptions
from influxdb_client.client.write_api import SYNCHRONOUS
from influxdb_client.client.write_api import WriteType

## Influx Setting

In [None]:
INFLUXDB_HOST = os.getenv("INFLUX_HOST")
INFLUXDB_PORT = os.getenv("INFLUX_HOST_PORT")
INFLUXDB_ORG = os.getenv("INFLUX_ORG")
INFLUXDB_TOKEN = os.getenv("INFLUX_TOKEN")
client = InfluxDBClient(url="http://"+INFLUXDB_HOST+":"+INFLUXDB_PORT, token=INFLUXDB_TOKEN, org=INFLUXDB_ORG)


In [None]:
#This function convert Influx data in Panda Dataframes
def convert_to_dataframe(result):
    raw = []
    for table in result:
        for record in table.records:
            raw.append((record.get_time(), record.get_value()))
    return pd.DataFrame(raw, columns=['ds','y'], index=None)

In [None]:
buckets = ['temperature', 'humidity', 'gas']

In [None]:
#This function sends Forecasting Data to InfluxDB
def send_predictions(prediction, bucket):
	lines = [str(prediction['yhat'][d]) for d in range(len(prediction))]
	if(bucket == "gas"):
		lines = ['val,prediction=yes,clientId=' + str("diubi-esp-32")+",lat=999,lng=999"+ " gasPred" + '=' + str(prediction['yhat'][d])
										+ ' ' + str(int(time.mktime(prediction['ds'][d].timetuple()))) + "000000000" for d in range(len(prediction))]
	else:
		lines = ['val,prediction=yes,clientId=' + str("diubi-esp-32")+",lat=999,lng=999"+ " " + bucket + '=' + str(prediction['yhat'][d])
										+ ' ' + str(int(time.mktime(prediction['ds'][d].timetuple()))) + "000000000" for d in range(len(prediction))]
	write_client = client.write_api(write_options=WriteOptions(batch_size=1000, flush_interval=10_000,
															jitter_interval=2_000, retry_interval=5_000, write_type=WriteType.synchronous))
	write_client.write(bucket, INFLUXDB_ORG, lines)

## Train Prophet Model 

In [None]:
#With this function we obtain the forecasted data
def bucket_forecast(bucket):
    global mse_array
    global forecasted_array
    global expected_array
    global forecasted
    global expected
    global forecast

    mse_array = []
    forecasted_array = []
    expected_array = []
    forecasted = []
    expected = []
    
    print(bucket)
    query = 'from(bucket: "'+ bucket +'")' \
                ' |> range(start: 2022-11-29T01:00:00.00Z, stop: 2022-12-01T13:00:00.00Z)'\
                ' |> filter(fn: (r) => r["_measurement"] == "val")' \
                ' |> filter(fn: (r) => r["_field"] == "' + bucket +'")' \
                ' |> filter(fn: (r) => r["clientId"] == "diubi-esp-32")' \
                ' |> filter(fn: (r) => r["lat"] == "42.846290")' \
                ' |> aggregateWindow(every: 3m , fn: mean, createEmpty: false)'\
                ' |> yield(name: "mean")'\

    result = client.query_api().query(org=INFLUXDB_ORG, query=query)
    print(result)
    
    # Convert the results to dataframe
    df = convert_to_dataframe(result)
    
    #adjust DateTime values
    df['ds'] = df['ds'].dt.tz_localize(None)
    
    #train_size 80% - test_size 20%
    nrows = (len(df.values))
    splitPoint = int (nrows * 0.80)
    train = df['y'] [:splitPoint].to_frame()
    traindata = df['ds'] [:splitPoint]
    train["ds"] = traindata
    test = df['y'][splitPoint:].to_frame()
    testData = df['ds'][splitPoint:]
    test["ds"] = testData

    #Prophet instance
    m = Prophet(
        yearly_seasonality=False,
        weekly_seasonality=False,
        daily_seasonality=True,
        changepoint_range=1,
        changepoint_prior_scale=0.01
    ).fit(train)

    #prediction period in minutes
    test_interval = int((test.iloc[-1]['ds'].timestamp() - test.iloc[0]['ds'].timestamp()) / 60)
    test_interval = test_interval + 5
    
    #make prediction
    future = m.make_future_dataframe(periods=test_interval, 
             freq=DateOffset(minutes=1))
    forecast = m.predict(future)
    
    forecast['ds'] = forecast.ds.dt.floor('min')

    #send to INfluxDb forecasted data
    send_predictions(forecast, bucket)

    #preparing forecasted data for the evaluation phase
    test['ds'] = test.ds.dt.floor('min')
    metric = test.set_index('ds')[['y']].join(forecast.set_index('ds').yhat).reset_index()
    metric = metric.dropna()
    mse = mean_squared_error(metric['y'], metric['yhat'])
    mse_array.append(mse)
    expected_array.append(metric['y'].tolist())
    forecasted_array.append(metric['yhat'].tolist())
    forecasted = list(itertools.chain.from_iterable(forecasted_array))
    expected = list(itertools.chain.from_iterable(expected_array))

## Prophet Model Results and Evalutaion

### RMSE, Mean Value and Confidence Interval computation

In [None]:
APE = []
i = 0
for bucket in buckets:
	bucket_forecast(bucket)
	send_predictions(forecast,bucket)
	print("\n\nEvaluating " + bucket)
	plt.plot(expected, label="Real Value")
	plt.plot(forecasted, "-r", label="Forecasted Value")

	plt.legend(loc="upper left")
	plt.show()

	#Mean Square Error & Root Square Error considered more accurate 
	mse = mean_squared_error(expected, forecasted)
	print('mse')
	print(mse) #mah
	rmse = math.sqrt(mean_squared_error(expected, forecasted))
	print('Test RMSE: %.3f'%rmse)

	
	# MAE
	# The mean absolute error, or MAE, 
	# is calculated as the average of the forecast error values, 
	# where all of the forecast error values are forced to be positive.
	# These error values are in the original units of the predicted values. 
	# A mean absolute error of zero indicates no error.

	# Calculate the MAE 
	mae = mean_absolute_error(expected, forecasted)
	print('MAE: %f' % mae)


	# A confidence interval is an estimation technique used in statistical inference to constrain a pair or pairs of values, 
	# within which the desired point estimate will be found (with a certain probability).

	# A confidence interval will allow us to calculate two values around a sample mean (one upper and one lower). 
	# These values will bound an interval within which, with some probability, the population parameter will be found.


	mean = sum(forecasted) / len(forecasted) #mean
	print("Mean: ",mean)


	std = np.std(forecasted)  # standard deviation
	print("Standard Deviation: ",std)

	# create 95% confidence interval
	res = st.t.interval(confidence=0.95, df=len(forecasted)-1,
				loc=np.mean(forecasted),
				scale=st.sem(forecasted))

	print("Confidence intervals: ",res)