# Foreign Exchange Forecasting using LSTMs

In [None]:
import pandas as pd
import numpy as np

from pylab import rcParams
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rc

from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.metrics import RootMeanSquaredError
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 14, 8

In [None]:
# Google Spreadsheet ID
SHEET_ID = '1JDNv_mArl-GPIpxuWS5GxgVEwvjXocS1MrXGc6TYs8M'
SHEET_NAME = ['USD/IDR', 'EUR/IDR', 'JPY/IDR']

## Data Overview
From Google Finance

In [None]:
url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME[0]}'
df = pd.read_csv(url)

# Drop unused columns
df = df.drop('Unnamed: 2', axis=1)

# Convert Date columns to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y %H:%M:%S')
df.info()

In [None]:
dfplot = df.copy()
dfplot = dfplot.groupby([pd.Grouper(key='Date', freq='D')]).first().reset_index()
dfplot = dfplot.set_index('Date')

color_pal = ["#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB"]
_ = dfplot.plot(style='', figsize=(20,5), color=color_pal[0], title='USD/IDR by Days')

## Data preprocessing

### Normalize

In [None]:
scaler = MinMaxScaler()
close_price = df.Close.values.reshape(-1, 1)
scaled_close = scaler.fit_transform(close_price)
scaled_close.shape

### Hyperparameter

In [None]:
SEQ_LEN = 6 # 6, 11, 21
FOLD = 5 # 5, 10
LSTM_Layer = 3 # 1, 2, 3
WINDOW_SIZE = SEQ_LEN - 1

### Sliding Window

In [None]:
def to_sequences(data, seq_len):
    d = []
    for index in range(len(data) - seq_len):
        d.append(data[index: index + seq_len])
    return np.array(d)

def preprocess(data_raw, seq_len, train_split=0.90):
    data = to_sequences(data_raw, seq_len)
    num_train = int(train_split * data.shape[0])
    X_train = data[:num_train, :-1, :]
    y_train = data[:num_train, -1, :]
    X_test = data[num_train:, :-1, :]
    y_test = data[num_train:, -1, :]
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = preprocess(scaled_close, SEQ_LEN)

## Building LSTM Model with Cross-Validation

In [None]:
BATCH_SIZE = 32
VAL_SPLIT = 0.1
EPOCH = 50

inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

tscv = TimeSeriesSplit(n_splits=FOLD)

In [None]:
fold_no = 1
metrics_per_fold = [[] for _ in range(3)]
metrics_inverse_per_fold = [[] for _ in range(3)]

for train, test in tscv.split(inputs, targets):
    model = Sequential()
    
    for i, units in enumerate([128, 64, 32][:LSTM_Layer]):
        model.add(LSTM(units, return_sequences=(i < LSTM_Layer - 1), input_shape=(WINDOW_SIZE, 1)))
    model.add(Dense(1))
    
    model.compile(loss='mean_squared_error',
                  metrics=['mae', RootMeanSquaredError()],
                  optimizer='adam')

    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
    history = model.fit(inputs[train],
                        targets[train],
                        epochs=EPOCH,
                        batch_size=BATCH_SIZE,
                        validation_split=VAL_SPLIT,
                        verbose=0)
    
    # Model Evaluation
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    
    # Model Prediction
    y_hat = model.predict(inputs[test])
    y_test_inverse = scaler.inverse_transform(targets[test])
    y_hat_inverse = scaler.inverse_transform(y_hat)
    
    # Model Prediction Metrics
    mse_inverse = mean_squared_error(y_test_inverse, y_hat_inverse)
    mae_inverse = mean_absolute_error(y_test_inverse, y_hat_inverse)
    rmse_inverse = np.sqrt(mse_inverse)
       
    mse = mean_squared_error(targets[test], y_hat)
    mae = mean_absolute_error(targets[test], y_hat)
    rmse = np.sqrt(mse)

    print("Model Evaluate (model.evaluate) Result")
    print(f'Score for fold {fold_no}: {model.metrics_names[1]} is {scores[1]}; {model.metrics_names[0]}/mse is {scores[0]}; {model.metrics_names[2]} is {scores[2]}\n')
    
    print("Model Predict (model.predict) Result")
    print(f'Score for fold {fold_no}: mae is {mae}; mse is {mse}; rmse is {rmse}')
    print(f'Score for fold {fold_no}: mae is {mae_inverse}; mse is {mse_inverse}; rmse is {rmse_inverse}\n')

    fold_no += 1
    
    metrics_inverse_per_fold[0].append(mae_inverse) # MAE Inverse
    metrics_inverse_per_fold[1].append(mse_inverse) # MSE Inverse
    metrics_inverse_per_fold[2].append(rmse_inverse) # RMSE Inverse
    
    metrics_per_fold[0].append(mae) # MAE
    metrics_per_fold[1].append(mse) # MSE
    metrics_per_fold[2].append(rmse) # RMSE

In [None]:
title = ['MAE', 'MSE', 'RMSE']
for i, unit in enumerate(metrics_per_fold):
    print(f"----------- {title[i]} -----------")
    print(f"Value per Fold : {unit}")
    print(f"Average Training Value : {np.mean(unit)}\n")

In [None]:
for i, unit in enumerate(metrics_inverse_per_fold):
    print(f"----------- {title[i]} -----------")
    print(f"Value per Fold : {unit}")
    print(f"Average Training Value : {np.mean(unit)}\n")

## Model Evaluation

In [None]:
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model Loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'])
# plt.show()

## Prediction

In [None]:
# y_hat = model.predict(X_test)
# y_test_inverse = scaler.inverse_transform(y_test)
# y_hat_inverse = scaler.inverse_transform(y_hat)

In [None]:
# plt.plot(y_test_inverse, label="Actual Price", color='green')
# plt.plot(y_hat_inverse, label="Predicted Price", color='red')
 
# plt.title('United State Dollar Price Prediction')
# plt.xlabel('Time [days]')
# plt.ylabel('Price')
# plt.legend(loc='best')
 
# plt.show()

In [None]:
# def matrices(actual, predicted):
#     mse = mean_squared_error(actual, predicted)
#     mae = mean_absolute_error(actual, predicted)
#     rmse = np.sqrt(mse)
#     print("Mean Absolute Error for prediction :", mae)
#     print("Mean Squared Error for prediction :", mse)
#     print("Root Mean Squared Error for prediction :", rmse)

# # matrices(y_test_inverse, y_hat_inverse)
# print("----------------- Normalized Error -----------------")
# matrices(y_test, y_hat)
# print("----------------- Actual Error -----------------")
# matrices(y_test_inverse, y_hat_inverse)