In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
# import plotly.express as px
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow as tf

# Reading data files 
run_name = 'Run3'

tsg_screen_data = pd.read_excel('./TSGscreen/' + run_name+'.xlsx')
tsg_screen_data.dropna(axis=0)
tsg_screen_data.dtypes
tsg_screen_data['Time'] = pd.to_datetime(tsg_screen_data['Time'],format='%H:%M:%S')
tsg_screen_data.head()
feeder_data = pd.read_excel('./Feederdata/' + run_name+'.xlsx')
feeder_data = feeder_data.dropna(axis=1)
feeder_data['Time'] = pd.to_datetime(feeder_data['Time'],format='%H:%M:%S')
feeder_data.head()
eyecon_data = pd.read_excel('./Eyecondata/' + run_name+'.xlsx')
eyecon_data['Time'] = pd.to_datetime(eyecon_data['Time'],format='%H:%M:%S')
eyecon_data.head()
feed_eyecon = pd.merge_asof(eyecon_data,feeder_data,on='Time', tolerance=pd.Timedelta('2s'))
feed_eyecon = feed_eyecon.dropna(axis=0)
len(feed_eyecon)

combined_Data = pd.merge_asof(feed_eyecon,tsg_screen_data,on='Time', tolerance=pd.Timedelta('2s'))
combined_Data = combined_Data.dropna(axis=0)
combined_Data.columns.values
# plot_cols = [' D_v50', 'Torque','Zone 2','Zone 3','Zone 4','Zone 5','Zone 6','Zone 7','Zone 8']
# plot_cols = ['Zone 2','Zone 3','Zone 4','Zone 5','Zone 6','Zone 7','Zone 8']
plot_cols = [' D_v50', 'Torque']
time_arr = combined_Data.pop(combined_Data.columns.values[1])
plot_features = combined_Data[plot_cols]
plot_features.index = time_arr
_ = plot_features.plot(subplots=True)
combined_Data = combined_Data.drop([' TimeStamp','TimeStamp'],axis=1)
combined_Data = combined_Data.drop([' D_v10', ' D_v90', ' D_n10',' D_n50', ' D_n90', ' Median diameter',' Std deviation',' Shape mean', ' Shape RSD', '7 Massflow','7 Setpoint', '7 ScrewSpeed', '7 AveFeedFactor','7 DriveCommand'],axis=1)

In [None]:
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()

X_data = X_scaler.fit_transform(combined_Data[['Mass flow rate','7 NetWeight','Liquid flow rate','Actual RPM','Torque']])
Y_data = Y_scaler.fit_transform(combined_Data[['Torque']])

In [None]:
def custom_ts_multi_data_prep(dataset, target, start, end, window, horizon):
    X = []
    y = []
    start = start + window
    if end is None:
        end = len(dataset) - horizon
    for i in range(start, end):
        indices = range(i-window, i)
        X.append(dataset[indices])
        indicey = range(i+1, i+1+horizon)
        y.append(target[indicey])
    return np.array(X), np.array(y)

In [None]:
hist_window = 60
horizon = 60
TRAIN_SPLIT = 140
x_train, y_train = custom_ts_multi_data_prep(X_data, Y_data, 0, TRAIN_SPLIT, hist_window, horizon)
x_vali, y_vali = custom_ts_multi_data_prep(X_data, Y_data, TRAIN_SPLIT, None, hist_window, horizon) 

In [None]:
# print ('Multiple window of past history\n')
# print(x_train[0])
print ('\n Target horizon\n')
print (y_train[0]) 


In [None]:
# Prepare the training data and validation data using the TensorFlow data function, which faster and efficient way to feed data for training.
batch_size = 128
buffer_size = 32
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().shuffle(buffer_size).batch(batch_size).repeat()
val_data = tf.data.Dataset.from_tensor_slices((x_vali, y_vali))
val_data = val_data.batch(batch_size).repeat() 


In [None]:
lstm_model = tf.keras.models.Sequential([
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(6, return_sequences=True), 
                            input_shape=x_train.shape[-2:]),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(4)),
    # tf.keras.layers.Dense(20, activation='tanh'),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(units=horizon),
])
lstm_model.compile(optimizer='adam', loss='mae')
lstm_model.summary() 

In [None]:
model_path = 'Bidirectional_LSTM_Multivariate.h5'
early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint]
# history = lstm_model.fit(train_data,epochs=150,steps_per_epoch=10,validation_data=val_data,validation_steps=50,verbose=1,callbacks=callbacks)
history = lstm_model.fit(train_data,epochs=150,steps_per_epoch=100,validation_data=val_data,validation_steps=100,verbose=1,callbacks=callbacks)

In [None]:
plt.figure(figsize=(16,9))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train loss', 'validation loss'])
plt.show() 

In [None]:
data_val = X_scaler.fit_transform(combined_Data[['Mass flow rate','7 NetWeight','Liquid flow rate','Actual RPM','Torque']].tail(horizon+1))
val_rescaled = data_val.reshape(1, data_val.shape[0], data_val.shape[1])
pred = lstm_model.predict(val_rescaled)
pred_Inverse = Y_scaler.inverse_transform(pred)
pred_Inverse

In [None]:
def timeseries_evaluation_metrics_func(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred): 
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true, y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n') 

validate=Y_scaler.inverse_transform(val_rescaled[0])
timeseries_evaluation_metrics_func(validate[-(horizon+1):-1,-1],pred_Inverse[0])

In [None]:
plt.figure(figsize=(16,9))
plt.plot( list(validate[-(horizon+1):-1,-1]))
plt.plot( list(pred_Inverse[0]))
plt.title("Actual vs Predicted")
plt.ylabel("Traffic volume")
plt.legend(('Actual','predicted'))
plt.show() 