# Foreign Exchange Forecasting using LSTMs

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.metrics import RootMeanSquaredError

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error

## Hyperparameter

In [None]:
# Google Spreadsheet ID
SHEET_ID = '1JDNv_mArl-GPIpxuWS5GxgVEwvjXocS1MrXGc6TYs8M'
SHEET_NAME = 'SGD/IDR' # 'USD/IDR', 'EUR/IDR', 'SGD/IDR'

SEQ_LEN = 6 # 6, 11, 21
FOLD = 5 # 5, 10
LSTM_Layer = 2 # 1, 2, 3
WINDOW_SIZE = SEQ_LEN - 1

## Data Overview
From Google Finance

In [None]:
url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'
df = pd.read_csv(url)

# Convert Date columns to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y %H:%M:%S')
df.info()

In [None]:
dfplot = df.copy()
dfplot = dfplot.groupby([pd.Grouper(key='Date', freq='D')]).first().reset_index()
dfplot = dfplot.set_index('Date')

color_pal = ["#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB"]
_ = dfplot.plot(style='', figsize=(20,5), color=color_pal[0], title=f'{SHEET_NAME} by Days')

## Data preprocessing

### Outlier Detection

In [None]:
def replace_outliers(data):
    """
    Replaces outliers in a given dataset with the lower/upper bound value.

    Args:
        data: A numpy array or pandas DataFrame containing the data.

    Returns:
        A numpy array with outliers replaced by the lower/upper bound value.
    """
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    data[data < lower_bound] = lower_bound
    data[data > upper_bound] = upper_bound
    return data

df['Close'] = replace_outliers(df['Close'])

In [None]:
# Boxplot for outlier detection
plt.boxplot(df['Close'])
plt.title(f"{SHEET_NAME} Boxplot for Outlier Detection")
plt.xlabel("Close")
plt.ylabel("Values")
plt.show()

### Normalize

In [None]:
scaler = MinMaxScaler()
close_price = df.Close.values.reshape(-1, 1)
scaled_close = scaler.fit_transform(close_price)

In [None]:
print("----------- Normalize Data Shape -----------")
print(scaled_close.shape)
print("\n----------- Normalize Data -----------")
print(scaled_close)

### Sliding Window and TSCV

In [None]:
def to_sequences(data, seq_len):
    """
    Converts a list of data into a sequence of equal length.

    Args:
        data: A list of numerical values.
        seq_len: An integer indicating the length of each sequence.

    Returns:
        A numpy array of shape (len(data) - seq_len, seq_len) containing the sequences.
    """
    d = []
    for index in range(len(data) - seq_len):
        d.append(data[index: index + seq_len])
    return np.array(d)

def preprocess(data_raw, seq_len):
    """
    Preprocesses the raw data into training data and target labels.

    Args:
        data_raw: A list of numerical values.
        seq_len: An integer indicating the length of each sequence.

    Returns:
        A tuple of two numpy arrays: (train, target).
        train is the input data for training, with shape (len(data_raw) - seq_len, seq_len - 1).
        target is the output labels for training, with shape (len(data_raw) - seq_len, 1).
    """
    data = to_sequences(data_raw, seq_len)
    target = data[:, -1, :]
    input = data[:, :-1, :]
    return input, target
    
inputs, targets = preprocess(scaled_close, SEQ_LEN)

tscv = TimeSeriesSplit(n_splits=FOLD)

In [None]:
i = 1
for train, test in tscv.split(inputs, targets):
    print(f"Fold No - {i}")
    print(f"----------- Train Data Shape -----------")
    print(inputs[train].shape)
    print(targets[train].shape)
    print(f"----------- Test Data Shape -----------")
    print(inputs[test].shape)
    print(targets[test].shape)
    print()
    i += 1

## Modeling

### Building LSTM Model with Cross-Validation

In [None]:
BATCH_SIZE = 32
VAL_SPLIT = 0.1
EPOCH = 50

metrics_per_fold = [[] for _ in range(3)]
metrics_inverse_per_fold = [[] for _ in range(3)]
y_test_per_fold, y_hat_inverse_per_fold = [], []
history_per_fold = []

In [None]:
for fold_no, (train, test) in enumerate(tscv.split(inputs, targets)):
    tf.keras.backend.clear_session()
    
    model = Sequential()
    for i, units in enumerate([128, 64, 32][:LSTM_Layer]):
        model.add(LSTM(units, return_sequences=(i < LSTM_Layer - 1), input_shape=(WINDOW_SIZE, 1)))
    model.add(Dense(1))
    
    model.compile(loss='mean_squared_error',
                  metrics=['mae', RootMeanSquaredError()],
                  optimizer='adam')

    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no+1} ...')
    
    history = model.fit(inputs[train],
                        targets[train],
                        epochs=EPOCH,
                        batch_size=BATCH_SIZE,
                        validation_split=VAL_SPLIT,
                        verbose=0)
    
    history_per_fold.append(history)
    
    # Model Evaluation
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    
    # Model Prediction
    y_hat = model.predict(inputs[test])
    y_test_inverse = scaler.inverse_transform(targets[test])
    y_hat_inverse = scaler.inverse_transform(y_hat)
    
    y_test_per_fold.append(y_test_inverse)
    y_hat_inverse_per_fold.append(y_hat_inverse)
    
    # Model Prediction Metrics
    mse_inverse = mean_squared_error(y_test_inverse, y_hat_inverse)
    mae_inverse = mean_absolute_error(y_test_inverse, y_hat_inverse)
    rmse_inverse = np.sqrt(mse_inverse)
       
    mse = mean_squared_error(targets[test], y_hat)
    mae = mean_absolute_error(targets[test], y_hat)
    rmse = np.sqrt(mse)

    print("Model Evaluate (model.evaluate) Result")
    print(f'Score for fold {fold_no+1}: {model.metrics_names[1]} is {scores[1]}; {model.metrics_names[0]}/mse is {scores[0]}; {model.metrics_names[2]} is {scores[2]}\n')
    
    print("Model Predict (model.predict) Result")
    print(f'Score for fold {fold_no+1}: mae is {mae}; mse is {mse}; rmse is {rmse}')
    print(f'Score for fold {fold_no+1}: mae is {mae_inverse}; mse is {mse_inverse}; rmse is {rmse_inverse}\n')

    metrics_inverse_per_fold[0].append(mae_inverse) # MAE Inverse
    metrics_inverse_per_fold[1].append(mse_inverse) # MSE Inverse
    metrics_inverse_per_fold[2].append(rmse_inverse) # RMSE Inverse
    
    metrics_per_fold[0].append(mae) # MAE
    metrics_per_fold[1].append(mse) # MSE
    metrics_per_fold[2].append(rmse) # RMSE

## Model Evaluation

In [None]:
title = ['MAE', 'MSE', 'RMSE']
for i, unit in enumerate(metrics_per_fold):
    print(f"----------- {title[i]} -----------")
    print(f"Value per Fold : {unit}")
    print(f"Average Training Value : {np.mean(unit)}\n")

In [None]:
for i, unit in enumerate(metrics_inverse_per_fold):
    print(f"----------- {title[i]} -----------")
    print(f"Value per Fold : {unit}")
    print(f"Average Training Value : {np.mean(unit)}\n")

In [None]:
fig, axs = plt.subplots(FOLD, figsize=(12, FOLD*5))
for i in range(FOLD):
    axs[i].plot(history_per_fold[0].history['loss'])
    axs[i].plot(history_per_fold[0].history['val_loss'])

    axs[i].set_title(f'{SHEET_NAME} Model Loss --- Fold {i+1}')
    axs[i].set_xlabel('epoch')
    axs[i].set_ylabel('loss')
    axs[i].legend(['train', 'test'], loc='best')

plt.tight_layout()
plt.savefig(f'D:/Collage/Courses/Skripsi/Gambar/Hasil Skenario/{SHEET_NAME[:3]} Model Loss_LSTM_{LSTM_Layer}_CV_{FOLD}_Window_{WINDOW_SIZE}.png')
plt.show()

In [None]:
fig, axs = plt.subplots(FOLD, figsize=(12, FOLD*5))
for i in range(FOLD):
    axs[i].plot(y_test_per_fold[i], label="Actual Price", color='green')
    axs[i].plot(y_hat_inverse_per_fold[i], label="Predicted Price", color='red')

    axs[i].set_title(f'{SHEET_NAME} Price Prediction --- Fold {i+1}')
    axs[i].set_xlabel('Time [days]')
    axs[i].set_ylabel('Price')
    axs[i].legend(loc='best')

plt.tight_layout()
plt.savefig(f'D:/Collage/Courses/Skripsi/Gambar/Hasil Skenario/{SHEET_NAME[:3]} Price Prediction_LSTM_{LSTM_Layer}_CV_{FOLD}_Window_{WINDOW_SIZE}.png')
plt.show()

In [None]:
error_result = pd.DataFrame([[SHEET_NAME, LSTM_Layer, WINDOW_SIZE, np.nan, FOLD, np.mean(metrics_inverse_per_fold[0]), np.mean(metrics_inverse_per_fold[1]), np.mean(metrics_inverse_per_fold[2])]],
                            columns=['Type', 'LSTM Layer', 'Window', 'Split', 'CV (Fold)', 'MAE', 'MSE', 'RMSE'])
hasil = pd.read_excel('Hasil - 2.0.xlsx')
final = pd.concat([hasil, error_result], ignore_index=True)
final.to_excel('Hasil - 2.0.xlsx', index=False)

In [None]:
import playsound

playsound.playsound('C:/Users/danie/Downloads/Music/iphone_14.mp3')