In [5]:
# Import of librairies
import tensorflow as tf
import mysql.connector as mariadb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime
import math
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tqdm import tqdm
from joblib import load
from timeloop import Timeloop
from datetime import timedelta
import time 

class sql_query:
    def __init__(self, credentials_path):
        self.db_credentials = pd.read_csv(credentials_path, index_col="Field")
      
    
    def __call__(self, query):
        
        mariadb_connection = mariadb.connect(
            user=self.db_credentials.loc["user"][0],
            password=self.db_credentials.loc["password"][0],
            host=self.db_credentials.loc["host"][0],
            port=3306,
            db = "db_velib")
        
        self.cursor = mariadb_connection.cursor()
        cursor = self.cursor
        cursor.execute("SET  time_zone = 'Europe/Paris'")
        cursor.execute(query)
        field_names = [i[0] for i in cursor.description]
        df = pd.DataFrame(cursor, columns=field_names)
        return df
    
# Transforming the input data in the proper format 


def measure_rmse(actual, predicted):
    return math.sqrt(mean_squared_error(actual, predicted))

def list_stations():
    request = sql_query("../../aws_mariadb_crendentials.csv")
    query = """
    SELECT DISTINCT station_id FROM velib_realtime
    """
    df= request(query)
    # Removing bad values
    df= df.drop(0)
    df = df.drop(1391)
    list_of_stations = list(df.station_id)
    return list_of_stations

def loading_models_unique(station_id, day_of_testing):

    try:
        LSTM_A = tf.keras.models.load_model('../4. Models/Tensorflow Univariate - {} - {} - LSTM_A.h5'.format(day_of_testing, station_id))
        LSTM_B = tf.keras.models.load_model('../4. Models/Tensorflow Univariate - {} - {} - LSTM_B.h5'.format(day_of_testing, station_id))
        std = load('../4. Models/Tensorflow Univariate - {} - {} - std.joblib'.format(day_of_testing, station_id))
        return LSTM_A, LSTM_B, std
    
    except:
        print('impossible to load ')


def create_result_df(past_history):
    # Extracting base for prediction 

    request = sql_query("../../aws_mariadb_crendentials.csv")
#
#    query = """
#    SELECT station_id, date_of_update, nb_total_free_bikes FROM db_velib.velib_realtime
#    WHERE date_of_update >= (SELECT * FROM (SELECT distinct date_of_update FROM db_velib.velib_realtime
#    WHERE MINUTE(date_of_update)%5 = 0
#    ORDER BY date_of_update DESC 
#    LIMIT 36) as temp
#    ORDER BY date_of_update ASC 
#    LIMIT 1)
#    AND MINUTE(date_of_update)%5 = 0
#    ORDER BY station_id, date_of_update ASC;
#    """

    query = """
    SELECT station_id, date_of_update, nb_total_free_bikes FROM db_velib.velib_realtime
    WHERE date_of_update >= DATE_SUB(NOW(), INTERVAL 220 Minute) AND MINUTE(date_of_update)%5 = 0
    ORDER BY station_id, date_of_update ASC;
    """
    df = request(query)
    # Selecting only the last 36 date_of_update values. We don't do it in SQL as it is longer!
    df = df[df['date_of_update'].isin(list(df['date_of_update'].unique())[-past_history:])]
    df.index = df.date_of_update
    df = df[['station_id','nb_total_free_bikes']]

    return df


def predict_iteration_unique(list_of_stations, df, LSTM_A, LSTM_B, std):
    # Request for each minutes
    
    df_prediction = pd.DataFrame(columns=['time', 'station_id','model_A', 'model_B'])

    for station_id in tqdm(list_of_stations):
        try:

            df_prediction_temp = pd.DataFrame(columns=['time', 'station_id','model_A', 'model_B'])
            df_prediction_temp["time"] = list(pd.date_range(max(df.index), periods=7, freq='5Min'))[1:]
 
            df_prediction_temp["station_id"] = station_id

            input_data = std.transform(df[df["station_id"] == station_id].nb_total_free_bikes.values[:-36].reshape(-1, 1))
            df_prediction_temp['model_A'] = std.inverse_transform(LSTM_A.predict(input_data.reshape(1,past_history,1))[0])
            df_prediction_temp['model_B'] = std.inverse_transform(LSTM_B.predict(input_data.reshape(1,past_history,1))[0])
            df_prediction = pd.concat([df_prediction, df_prediction_temp])
        
        except:
            print('error on ', station_id)
            
    return df_prediction


def predict_iteration_unique_2(df, LSTM_A, LSTM_B, std):
    # Creating a template for an array the batch size
    data = np.array(df.pivot_table(df, index= 'station_id', columns=df.index))

    # Making nb of station x previous values shape
    data = data.reshape(data.shape[0], data.shape[1])

    # Standard Scaling
    data = std.transform(data)

    # Reshaping for tensor transformation and batchisation
    data = data.reshape(data.shape[0], data.shape[1], 1)

    # Making a prediction

    pred_values_A = std.inverse_transform(LSTM_A.predict(data))
    pred_values_B = std.inverse_transform(LSTM_B.predict(data))


    df_pred_A = pd.DataFrame(pred_values_A, index=df.station_id.unique(), columns=list(pd.date_range(max(df.index), periods=7, freq='5Min'))[1:])
    df_pred_A = df_pred_A.set_index(df_pred_A.index).stack().reset_index(name='model_A').rename(columns={'level_0':'station_id','level_1':'date'})

    df_pred_B = pd.DataFrame(pred_values_B, index=df.station_id.unique(), columns=list(pd.date_range(max(df.index), periods=7, freq='5Min'))[1:])
    df_pred_B = df_pred_B.set_index(df_pred_B.index).stack().reset_index(name='model_B').rename(columns={'level_0':'station_id','level_1':'date'})

    # Merging

    df_prediction = df_pred_A.merge(df_pred_B,left_on=["station_id", "date"],right_on=["station_id", "date"])
    
    return df_prediction
            
# Initialisation

# Variables
day_of_testing = '2020-05-19'

#

In [6]:
%%time

station_id = 'global'
past_history = 36
#future_target = 6
#list_of_stations = list_stations() # inutile 
#BATCH_SIZE = 36

# Loading the models only once
LSTM_A, LSTM_B, std = loading_models_unique(station_id, day_of_testing)
df = create_result_df(past_history)
df_prediction = predict_iteration_unique_2(df, LSTM_A, LSTM_B, std)
df_prediction['date_of_prediction'] = str(pd.Timestamp.now())[:16]
df_prediction.to_csv('../7. Predictions/5min_predictions_global.csv', index=False)

impossible to load 


TypeError: cannot unpack non-iterable NoneType object