# Foreign Exchange Forecasting using LSTMs

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.metrics import RootMeanSquaredError

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

## Hyperparameter

In [None]:
# Google Spreadsheet ID
SHEET_ID = '1JDNv_mArl-GPIpxuWS5GxgVEwvjXocS1MrXGc6TYs8M'
SHEET_NAME = 'SGD/IDR' # 'USD/IDR', 'EUR/IDR', 'SGD/IDR'

SEQ_LEN = 21 # 6, 11, 21
SPLIT = 0.90 # 0.80, 0.90
LSTM_Layer = 3 # 1, 2, 3
WINDOW_SIZE = SEQ_LEN - 1

## Data Overview
From Google Finance

In [None]:
url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'
df = pd.read_csv(url)

# Convert Date columns to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y %H:%M:%S')
df.info()

In [None]:
dfplot = df.copy()
dfplot = dfplot.groupby([pd.Grouper(key='Date', freq='D')]).first().reset_index()
dfplot = dfplot.set_index('Date')

color_pal = ["#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB"]
_ = dfplot.plot(style='', figsize=(20,5), color=color_pal[0], title=f'{SHEET_NAME} by Days')

## Data preprocessing

### Outlier Detection

In [None]:
def replace_outliers(data):
    """
    Replaces outliers in a given dataset with the lower/upper bound value.

    Args:
        data: A numpy array or pandas DataFrame containing the data.

    Returns:
        A numpy array with outliers replaced by the lower/upper bound value.
    """
    Q1 = np.percentile(data, 25)
    Q3 = np.percentile(data, 75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    data[data < lower_bound] = lower_bound
    data[data > upper_bound] = upper_bound
    return data

df['Close'] = replace_outliers(df['Close'])

In [None]:
# Boxplot for outlier detection
plt.boxplot(df['Close'])
plt.title(f"{SHEET_NAME} Boxplot for Outlier Detection")
plt.xlabel("Close")
plt.ylabel("Values")
plt.show()

### Normalize

In [None]:
scaler = MinMaxScaler()
close_price = df.Close.values.reshape(-1, 1)
scaled_close = scaler.fit_transform(close_price)

In [None]:
print("----------- Normalize Data Shape -----------")
print(scaled_close.shape)
print("\n----------- Normalize Data -----------")
print(scaled_close)

### Making sequences

In [None]:
def to_sequences(data, seq_len):
    """
    Converts a list of data into a sequence of equal length.

    Args:
        data: A list of numerical values.
        seq_len: An integer indicating the length of each sequence.

    Returns:
        A numpy array of shape (len(data) - seq_len, seq_len) containing the sequences.
    """
    d = []
    for index in range(len(data) - seq_len):
        d.append(data[index: index + seq_len])
    return np.array(d)

def preprocess(data_raw, seq_len, train_split):
    """
    Preprocesses the raw data for training and testing.

    Args:
        data_raw: A list of numerical values.
        seq_len: An integer indicating the length of each sequence.
        train_split: A float between 0 and 1 indicating the fraction of data to use for training.

    Returns:
        A tuple of four numpy arrays: (X_train, y_train, X_test, y_test).
        X_train and X_test are the input sequences for training and testing, respectively.
        y_train and y_test are the output values for training and testing, respectively.
    """
    data = to_sequences(data_raw, seq_len)
    num_train = int(train_split * data.shape[0])
    X_train = data[:num_train, :-1, :]
    y_train = data[:num_train, -1, :]
    X_test = data[num_train:, :-1, :]
    y_test = data[num_train:, -1, :]
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = preprocess(scaled_close, SEQ_LEN, train_split = SPLIT)

In [None]:
print("----------- Train Data Shape -----------")
print(X_train.shape)
print(y_train.shape)
print("----------- Test Data Shape -----------")
print(X_test.shape)
print(y_test.shape)

## Modeling

### Building LSTM model

In [None]:
tf.keras.backend.clear_session()
model = Sequential()

for i, units in enumerate([128, 64, 32][:LSTM_Layer]):
    model.add(LSTM(units, return_sequences=(i < LSTM_Layer - 1), input_shape=(WINDOW_SIZE, 1)))
model.add(Dense(units=1))

model.summary()

### Training

In [None]:
BATCH_SIZE = 32
EPOCH = 50
VAL_SPLIT = 0.1

model.compile(
    loss='mse',
    metrics=['mae', RootMeanSquaredError()],
    optimizer='adam',
)
history = model.fit(X_train,
                    y_train,
                    epochs=EPOCH,
                    batch_size=BATCH_SIZE,
                    validation_split=VAL_SPLIT)

### Model Evaluation

In [None]:
model.evaluate(X_test, y_test)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title(f'Model Loss --- {SHEET_NAME}')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.savefig(f'D:/Collage/Courses/Skripsi/Gambar/Hasil Skenario/{SHEET_NAME[:3]} Model Loss_LSTM_{LSTM_Layer}_Split_{SPLIT}_Window_{WINDOW_SIZE}.png')
plt.show()

## Prediction

In [None]:
y_hat = model.predict(X_test)
y_test_inverse = scaler.inverse_transform(y_test)
y_hat_inverse = scaler.inverse_transform(y_hat)

In [None]:
plt.plot(y_test_inverse, label="Actual Price", color='green')
plt.plot(y_hat_inverse, label="Predicted Price", color='red')
 
plt.title(f'{SHEET_NAME} Price Prediction\nLSTM = {LSTM_Layer}, Split Data = {SPLIT}, Window = {WINDOW_SIZE}', fontsize=15)
plt.xlabel('Time [days]')
plt.ylabel('Price')
plt.legend(loc='best')
plt.savefig(f'D:/Collage/Courses/Skripsi/Gambar/Hasil Skenario/{SHEET_NAME[:3]} Price Prediction_LSTM_{LSTM_Layer}_Split_{SPLIT}_Window_{WINDOW_SIZE}.png')
plt.show()

In [None]:
def matrices(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mse)
    print("Mean Absolute Error for prediction :", mae)
    print("Mean Squared Error for prediction :", mse)
    print("Root Mean Squared Error for prediction :", rmse)
    return mae, mse, rmse

print(f"LSTM = {LSTM_Layer}, Split Data = {SPLIT}, Window = {WINDOW_SIZE}")
print("\n----------------- Normalized Error -----------------")
mae, mse, rmse = matrices(y_test, y_hat)
print("\n----------------- Actual Error -----------------")
mae_inverse, mse_inverse, rmse_inverse = matrices(y_test_inverse, y_hat_inverse)

In [None]:
error_result = pd.DataFrame([[SHEET_NAME, LSTM_Layer, WINDOW_SIZE, SPLIT, np.nan, mae_inverse, mse_inverse, rmse_inverse]],
                            columns=['Type', 'LSTM Layer', 'Window', 'Split', 'CV (Fold)', 'MAE', 'MSE', 'RMSE'])
hasil = pd.read_excel('Hasil - 2.0.xlsx')
final = pd.concat([hasil, error_result], ignore_index=True)
final.to_excel('Hasil - 2.0.xlsx', index=False)