# 1. Facebook Prophet Assessement

## I - Setup

In [1]:
# Import of librairies
import os
os.environ['NUMEXPR_MAX_THREADS'] = '16'
import mysql.connector as mariadb
import pandas as pd
from fbprophet import Prophet
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from multiprocessing import Pool
from multiprocessing import freeze_support
import time
import math

# Defining classes and functions

class sql_query:
    def __init__(self, credentials_path):
        self.db_credentials = pd.read_csv(credentials_path, index_col="Field")
      
    
    def __call__(self, query):
        
        mariadb_connection = mariadb.connect(
            user=self.db_credentials.loc["user"][0],
            password=self.db_credentials.loc["password"][0],
            host=self.db_credentials.loc["host"][0],
            port=3306,
            db = "db_velib")
        
        self.cursor = mariadb_connection.cursor()
    
        cursor = self.cursor
        cursor.execute(query)
        field_names = [i[0] for i in cursor.description]
        df = pd.DataFrame(cursor, columns=field_names)
        return df
    
def prophet_prediction(hour, full_dataframe):
    df_instance = full_dataframe[full_dataframe["ds"] < hour]
    m = Prophet()
    m.fit(df_instance)
    future = m.make_future_dataframe(periods=30, freq='min')
    forecast = m.predict(future)
    predictions = forecast[forecast["ds"]>= hour]
    return list(predictions.yhat)

def measure_rmse(actual, predicted):
    return math.sqrt(mean_squared_error(actual, predicted))

def result_creating( station_id):
    day_of_testing = "2020-05-10"
    request = sql_query("../../aws_mariadb_crendentials.csv")

    query = """
    SELECT DISTINCT date_of_update, nb_total_free_bikes FROM velib_realtime
    WHERE station_id = {}
    AND date_of_update > DATE({})
    ORDER BY date_of_update ASC
    """.format(station_id, day_of_testing)

    df= request(query)
    df.columns = ['ds','y']

    # Setting max boundary
    df_data = df[df["ds"]< (pd.Timestamp(day_of_testing)+ pd.DateOffset(days=1))]

    # Creating dataframe for results
    df_results = pd.DataFrame(columns=['prediction', 'real_values', 'metrics'], index=pd.date_range(day_of_testing+' 06:00:00', periods=64, freq='15Min'))

    # Creating predictions

    for i in tqdm(df_results.index):
        df_results.loc[i]['prediction'] = prophet_prediction(i, df_data)
        df_results.loc[i]['real_values'] = list(df_data[df_data['ds'] >= i][0:30]['y'])
        df_results.loc[i]['metrics'] = measure_rmse(df_results.loc[i]["real_values"], df_results.loc[i]["prediction"])

    df_results.to_csv("/home/exalis/Github/velib-prediction-v2/3. Results/Facebook Prophet/Facebook Prophet Results - {} - {}.csv".format(day_of_testing, station_id))
    

def run_multiprocessing(func, i, n_processors):
    with Pool(processes=n_processors) as pool:
        return pool.map(func, i)

def main(list_of_stations):
    n_processors = 14
    out = run_multiprocessing(result_creating, list_of_stations , n_processors)

Importing plotly failed. Interactive plots will not work.


In [2]:
# Extracting the list of the stations

request = sql_query("../../aws_mariadb_crendentials.csv")
query = """
SELECT DISTINCT station_id FROM velib_realtime
"""
df= request(query)
# Removing bad values
df= df.drop(0)
df = df.drop(1391)

list_of_stations = list(df.station_id)

print(list_of_stations)

[1001, 1002, 1003, 1006, 1007, 1008, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1102, 1116, 1117, 1119, 1120, 1122, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017, 2020, 2021, 2022, 2101, 2102, 2108, 2109, 2110, 2111, 2112, 3001, 3002, 3003, 3004, 3005, 3006, 3007, 3008, 3009, 3010, 3011, 3012, 3101, 3102, 4001, 4002, 4003, 4005, 4006, 4007, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4101, 4103, 4104, 4107, 4108, 4109, 4110, 4111, 5001, 5002, 5003, 5004, 5005, 5006, 5007, 5012, 5014, 5015, 5016, 5018, 5019, 5020, 5021, 5022, 5024, 5026, 5027, 5028, 5029, 5030, 5031, 5032, 5033, 5034, 5105, 5106, 5107, 5110, 5114, 5115, 5116, 5119, 5121, 5122, 5123, 5124, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6020, 6021, 6022, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6103, 6104, 6107, 6108, 7001, 7002, 7003, 700

In [4]:
if __name__ == "__main__":
    freeze_support()   # required to use multiprocessing
    main(list_of_stations)

  0%|          | 0/3 [00:00<?, ?it/s]INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
 33%|███▎      | 1/3 [00:15<00:31, 15.99s/it]INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
Process ForkPoolWorker-14:
Process ForkPoolWorker-3:
Process ForkPoolWorker-8:
Process ForkPoolWorker-7:
Process ForkPoolWorker-9:
Process ForkPoolWorker-5:
Process ForkPoolWorker-6:
Process ForkPoolWorker-2:
Process ForkPoolWorker-4:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent

Process ForkPoolWorker-13:
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/pool.py", line 110, in worker
    task = get()
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/queues.py", line 351, in get
    with self._rlock:
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
Traceback (most recent call last):
Traceback (most recent call last):
KeyboardInterrupt
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/exalis/anaconda3/envs/tensorflowenv/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/exalis/anaconda3/envs/tensorflowenv/li

KeyboardInterrupt: 