# Script for Model Training for Currency Exchange Rate Prediction

### Import Dependencies

In [None]:
import os
import yaml
from numpy import array
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.layers import LSTM, Dropout,Dense,RepeatVector,TimeDistributed,Input,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam as adam
from sklearn.model_selection import train_test_split
import datetime

## Fetch Load the Data

In [None]:
# Prepare dataframe
data = pd.read_csv("data_resampled_hour.csv")
data = data.rename({"Unnamed: 0":"time"},axis=1)
data = data.set_index("time")

print(data.shape)
data.head()

### Perform Processing to Data

In [None]:
# Look into Last 48 hours and predict next 1 hour 
n_past= window = 48
n_future = 1

n_features = 5

In [None]:
# Scaling the Data
from sklearn import preprocessing
import joblib

scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)

### Train-Test Split

In [None]:
train_df, test_df = train_test_split(data, test_size=0.1, shuffle=False)

train_df,val_df = train_test_split(train_df, test_size=0.35, shuffle=False)
print(train_df.shape,val_df.shape,test_df.shape)

### Helper Functions

In [None]:
'''
method: split_series
input_parameters: series, n_past, n_future
output_parameters: np.array(X), np.array(y)
description: This method takes the data and splits it for supervised learning where input is the
             last n observations and output is the future m observations. 
'''


def split_series(series, n_past, n_future):
    #
    # n_past ==> no of past observations
    #
    # n_future ==> no of future observations
    #
    X, y = list(), list()
    for window_start in range(len(series)):
        past_end = window_start + n_past
        future_end = past_end + n_future
        if future_end > len(series):
            break
        # slicing the past and future parts of the window
        past, future = series[window_start:past_end, :], series[past_end:future_end, :]
        X.append(past)
        y.append(future)
    return np.array(X), np.array(y)

In [None]:
'''
method: plot_training
input_parameters: history
output_parameters: none
description: This method plots the training and validation performance over the epochs.
'''


def plot_training(history):
    print(history.history.keys())

    #  "MAE: Mean Absolute Error"
    plt.plot(history.history['mae'])
    plt.plot(history.history['val_mae'])
    plt.title('model mae')
    plt.ylabel('mae')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    
    # "Loss"
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
# Split data into past and future observations and reshape

train = train_df
test = test_df

X_train, y_train = split_series(train, n_past, n_future)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], n_features))

X_test, y_test = split_series(test, n_past, n_future)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], n_features))

X_val, y_val = split_series(val_df, n_past, n_future)
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], n_features))
y_val = y_val.reshape((y_val.shape[0], y_val.shape[1], n_features))

print("Input Shape: ", X_train.shape, X_test.shape, X_val.shape)
print("Output Shape: ", y_train.shape, y_test.shape, y_val.shape)   

### Create the CNN LSTM Model

In [None]:
# define model
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Flatten

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(window,n_features)))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(Dropout(0.2))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(RepeatVector(y_test.shape[1]))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(20, activation='relu')))
model.add(TimeDistributed(Dense(n_features)))


In [None]:
model.summary()

### Start the Model Training

In [None]:
'''
method: training
input_parameters: model, X_train, y_train,X_test, y_test
output_parameters: none
description: This method trains the neural network based on the model passed and also
             plots the training results.
'''

def training(model,X_train, y_train,X_val, y_val):

    initial_learning_rate = 0.001
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    # compile the model
    model.compile(optimizer=opt, loss='mse', metrics = ['mae']) 
    history = model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), batch_size=16, verbose=1)

    # plot training performance over the epochs
    plot_training(history)

In [None]:
training(model, X_train, y_train, X_val, y_val)

In [None]:
# Save the Model
model.save("final_model_hour.h5")

### Model Evaluation/Testing

In [None]:
from keras.models import load_model
model = load_model('final_model_hour.h5')

In [None]:
model.summary()

In [None]:
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=8)
print("Losses are:", results)

### Evaluate and Plot some Data

In [None]:
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_pred.shape[0], n_features)
y_test = y_test.reshape(y_test.shape[0], n_features)

In [None]:
# my_scaler = joblib.load("scaler.gz")
y_test_unscaled= scaler.inverse_transform(y_test)
y_pred_unscaled= scaler.inverse_transform(y_pred)

In [None]:
df_test = pd.DataFrame(y_test_unscaled)
df_pred = pd.DataFrame(y_pred_unscaled)


In [None]:
plt.plot(df_test[1])
plt.plot(df_pred[1])