Training recurrent neural network (RNN) models to predict bilateral foreign exchange rates.

# Setup

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import optuna

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import copy
import json
import os

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
'''Hyperparameters'''
lookback = 365 * 2
learning_rate = 0.001
batch_size = 512
num_lstm_layers = 3
num_lstm_units = 100
dropout_rate = 0.2
loss_func = nn.MSELoss()
weight_decay = 0.0
num_epochs = 50
percent_training_data = 0.9
scale = True

# options: 'lstm', 'gru'
model_architecture = 'lstm'

# Models

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_num_cols, hidden_size, num_layers=1, device='cpu'):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # add lstm layers
        self.lstm = nn.LSTM(
            input_num_cols,
            hidden_size,
            num_layers,
            dropout=dropout_rate,
            batch_first=True)
        # add fully-connected linear layer
        self.fc = nn.Linear(hidden_size, input_num_cols)
        self.device=device

    def forward(self, x):
      # hidden state
      h0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(self.device)
      #initial cell state
      c0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(self.device)
      out, _ = self.lstm(x, (h0, c0))

      # filter out everything except the last timestep’s output
      out = self.fc(out[:, -1, :])
      return out

class GRU(nn.Module):
    def __init__(self, input_num_cols, hidden_size, num_layers=1, device='cpu'):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # add gru layers
        self.gru = nn.GRU(
            input_num_cols,
            hidden_size,
            num_layers,
            dropout=0.2,
            batch_first=True)
        # add fully-connected linear layer
        self.fc = nn.Linear(hidden_size, input_num_cols)

        self.device=device

    def forward(self, x):
      # hidden state
      h0 = torch.zeros(self.num_layers, x.shape[0], self.hidden_size).to(self.device)
      out, _ = self.gru(x, h0)

      # filter out everything except the last timestep’s output
      out = self.fc(out[:, -1, :])
      return out

# Shared Helper Functions

In [None]:
'''Helper Functions'''
def add_multiindex(forex_df):
    mi_df = forex_df.set_index(['slug', 'date'])

    new_index = pd.MultiIndex.from_product([mi_df.index.levels[0], mi_df.index.levels[1]], names=['slug', 'date'])
    mi_df = mi_df.reindex(new_index, fill_value=None).bfill()
    mi_df = mi_df.sort_index()

    return mi_df

def convert_to_np(mi_df, num_cols, num_dates):
    df = mi_df.reset_index().set_index(['slug', 'date']).unstack(level='date')
    data_np = df.to_numpy()

    # shape for 2D array
    shape = (num_cols, num_dates)

    # reshape the array to 3D
    data_np = data_np.reshape(shape).astype(np.float32)
    data_np = data_np.transpose()
    print(f"Data shape: {data_np.shape}")
    return data_np

def get_generators(data_np):
    data_np = np.transpose(data_np)
    data_r = data_np[:, ::-1]
    data_np = np.concatenate((data_np, data_r), axis=1)
    data_np = np.tile(data_np, 10)
    data_np = np.transpose(data_np)

    # create the features
    x_indices = np.arange(lookback)[:, None] + np.arange(len(data_np) - lookback)
    x = data_np[x_indices]
    x = np.transpose(x, (1, 0, 2))

    # create the labels
    y = data_np[lookback:]
    print(f"x shape: {x.shape}")
    print(f"y shape: {y.shape}")

    train_size = int(percent_training_data * x.shape[0])

    global train_data_x
    global train_data_y
    global test_data_x
    global test_data_y

    train_data_x = x[:train_size, :]
    train_data_y = y[:train_size, :]

    test_data_x = x[train_size:, :]
    test_data_y = y[train_size:, :]

    print(f"Training set shape: {train_data_x.shape}")
    print(f"Test set shape: {test_data_x.shape}")

    train_generator = DataLoader(TensorDataset(torch.from_numpy(train_data_x), torch.from_numpy(train_data_y)), batch_size=batch_size, shuffle=False)
    test_generator = DataLoader(TensorDataset(torch.from_numpy(test_data_x), torch.from_numpy(test_data_y)), batch_size=1, shuffle=False)

    return train_generator, test_generator

def fit_and_predict(model, train_generator, test_generator):
    min_loss = float('inf')
    for epoch in range(num_epochs):
        model.train()
        for data in train_generator:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            # clear the old gradients
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1}, Loss: {loss.item()}")

    model.eval()
    prediction_list = []

    with torch.no_grad():
        for i, data in enumerate(test_generator):
          if i == 0:
            inputs, _ = data
            prediction_list = copy.deepcopy(inputs.numpy()).reshape(lookback, 1)
            inputs = inputs.to(device)
          else:
            inputs =copy.deepcopy(prediction_list[-lookback:])
            inputs = inputs.reshape((1, lookback, num_cols))
            inputs = torch.from_numpy(inputs).to(device)
          output = model(inputs).cpu().numpy()
          prediction_list = np.append(prediction_list, output, axis=0)

    prediction_list = prediction_list[lookback:]
    return prediction_list

def save_model(model, currency_pair):
    currency_pair = currency_pair.replace('/', '_')
    dir_models = directory + 'trained/'
    files = set(os.listdir(dir_models))

    i = 0
    while(True):
        fpath = currency_pair + str(i) + '.pt'
        if fpath not in files:
            break
        i += 1
    full_path = dir_models + fpath

    torch.save(model.state_dict(), full_path)

    metadata = {
       'num_epochs': num_epochs,
       'lookback': lookback,
       'learning_rate': learning_rate,
       'batch_size': batch_size,
       'num_lstm_layers': num_lstm_layers,
       'num_lstm_units': num_lstm_units,
       'dropout_rate': dropout_rate,
       'weight_decay': weight_decay,
       'scale': scale,
       'model_architecture': model_architecture,
    }

    metadata_fpath = directory + 'model_metadata/' + currency_pair + str(i)

    # write dictionary to json file
    with open(metadata_fpath, 'w') as json_file:
        json.dump(metadata, json_file)

def check_if_completed(currency_pair):
    currency_pair = currency_pair.replace('/', '_')
    dir_models = directory + 'trained/'

    files = set(os.listdir(dir_models))

    for file in files:
        fname = currency_pair + str(0)
        if fname in file:
            return True
    return False

def generate_sine_wave_array(length, wave_frequency):
    x = np.arange(length)
    y = np.sin(2 * np.pi * wave_frequency * x)
    return y

def generate_monotonically_increasing_array(length):
    x = np.arange(length)
    x = x/1000
    return x

def get_forex_df(fname, scaler):
  forex_df = pd.read_csv(fname, usecols=['slug', 'date', 'high', 'low', 'currency'])

  if filter_to_one:
    forex_df = forex_df[forex_df['slug'].isin(['USD/MWK'])]
    forex_df.reset_index(drop=True, inplace=True)

  forex_df['date'] = pd.to_datetime(forex_df['date'])
  forex_df.sort_values(by=['slug', 'date'], ascending=True, inplace=True)

  avgs = pd.DataFrame()
  avgs['rate'] = (forex_df['high'] + forex_df['low'])/2
  forex_df = pd.concat([avgs, forex_df], axis=1)

  # monotonically-increasing dummy data (for testing purposes)
  if use_dummy_data == 1:
    dummy = generate_monotonically_increasing_array(len(forex_df))
    dummy = pd.Series(dummy).reset_index(drop=True)
    forex_df['rate'] = dummy

  # wave-shaped dummy data (for testing purposes)
  elif use_dummy_data == 2:
    dummy = generate_sine_wave_array(len(forex_df), 1/10)
    dummy = pd.Series(dummy).reset_index(drop=True)
    forex_df['rate'] = dummy

  unscaled_rates = copy.deepcopy(forex_df['rate'])

  if scale:
    scaled = scaler.fit(forex_df[['rate']])
    scaled_rates = scaler.transform(forex_df[['rate']])
    forex_df['rate'] = scaled_rates[:, 0]

  forex_df = forex_df.drop('high', axis=1)
  forex_df = forex_df.drop('low', axis=1)
  return forex_df, unscaled_rates

In [None]:
'''Global Vars'''

directory = '/content/gdrive/MyDrive/Colab Notebooks/exchange_rates/'
train_data_x, train_data_y, test_data_x, test_data_y = None, None, None, None
filter_to_one = None
use_dummy_data = 0
num_cols = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using: {device}")

# Approach 1

Approach 1: Creating one model and feeding all exchange rates into it

In [None]:
def instantiate_loss_and_opt(num_cols):
    criterion = loss_func
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    return criterion, optimizer


'''__main__ '''

if __name__ == "__main__":

    scaler = StandardScaler()

    fname = directory + "forex_filtered.csv"
    forex_df, unscaled_rates = get_forex_df(fname, scaler)

    currency_pairs = forex_df['slug'].unique()
    # currency_pairs = forex_df['currency'].unique()

    num_prediction = 30
    forecast = None

    model = LSTM(num_cols, num_lstm_units, num_lstm_layers, 'cuda')
    if model_architecture == 'gru':
        model = GRU(num_cols, num_lstm_units, num_lstm_layers)
    model.to(device)

    for currency_pair in currency_pairs:
        if check_if_completed(currency_pair) == False:
            print(f"Beginning training for: {currency_pair}")
            df = forex_df[forex_df['slug'] == currency_pair]
            df = df[['rate', 'date', 'slug']]

            mi_df = add_multiindex(df)

            num_cols = len(df['slug'].unique())
            num_dates = len(mi_df.index.levels[1])

            data_np = convert_to_np(mi_df, num_cols, num_dates)
            train_generator, test_generator = get_generators(data_np)

            criterion, optimizer = instantiate_loss_and_opt(num_cols)
            predictions = fit_and_predict(model, train_generator, test_generator)

            save_model(model, currency_pair)

# Approach 2

Approach 2: Creating a separate model for each currency pair

In [None]:
def instantiate_model(num_cols):
    model = LSTM(num_cols, num_lstm_units, num_lstm_layers, device)
    if model_architecture == 'gru':
        model = GRU(num_cols, num_lstm_units, num_lstm_layers)
    model.to(device)
    criterion = loss_func
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    return model, criterion, optimizer


if __name__ == "__main__":

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using: {device}")

    scaler = StandardScaler()

    fname = directory + "forex_filtered.csv"
    forex_df, unscaled_rates = get_forex_df(fname, scaler)

    currency_pairs = forex_df['slug'].unique()

    num_prediction = 30
    forecast = None

    for currency_pair in currency_pairs:
        if check_if_completed(currency_pair) == False:
            print(f"Beginning training for: {currency_pair}")
            df = forex_df[forex_df['slug'] == currency_pair]
            df = df[['rate', 'date', 'slug']]

            mi_df = add_multiindex(df)

            num_cols = len(df['slug'].unique())
            num_dates = len(mi_df.index.levels[1])

            data_np = convert_to_np(mi_df, num_cols, num_dates)
            train_generator, test_generator = get_generators(data_np)

            model, criterion, optimizer = instantiate_model(num_cols)
            predictions = fit_and_predict(model, train_generator, test_generator)

# Displaying Results

In [None]:
def forecast_future_sequence(num_prediction, model, input_data, num_cols, lookback):

    prediction_list = input_data[-lookback:]
    prediction_list = prediction_list.reshape((lookback, 1))

    with torch.no_grad():
        for _ in range(num_prediction):
            x = copy.deepcopy(prediction_list[-(lookback):])
            x = x.reshape((1, lookback, num_cols))
            x = torch.from_numpy(x)
            x = x.to(torch.float32).to(device)
            output = model(x)
            prediction_list = np.append(prediction_list, output.cpu().numpy(), axis=0)
        prediction_list = prediction_list[lookback:]

    return prediction_list

def calculate_mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def graph_predicted_and_actual_historical(predictions, test_data_y):
    p = predictions[-365:]
    actual = test_data_y[-365:]

    time_steps = [i for i in range(len(p))]
    plt.plot(time_steps, p, color='blue', label='Predicted Rates')
    plt.plot(time_steps, actual, color='red', label='Historical Rates')

    plt.xlabel('Day #')
    plt.ylabel('Exchange Rate')
    plt.title('Predicted Values')

    plt.legend()
    plt.show()


def graph_forecasted_sequence(num_prediction, model, lookback, forex_df):
    '''Displaying Forecasted Future Sequence'''

    num_cols = 1
    initial_lookback = forex_df['rate'][-lookback:].to_numpy()

    forecast = forecast_future_sequence(num_prediction, model, initial_lookback, num_cols, lookback)

    time_steps = [i for i in range(len(initial_lookback) + len(forecast))]
    plt.plot(time_steps[:len(initial_lookback)], initial_lookback, label='Past', color='blue')

    plt.plot(time_steps[len(initial_lookback):], forecast[:, 0], label='Predicted', color='red')

    plt.xlabel('Time Steps')
    plt.ylabel('Values')
    plt.title('Actual vs. Predicted Values')

    plt.legend()
    plt.show()

def graph_historical(forex_df):
    historical = forex_df['rate']
    time_steps = [i for i in range(len(historical))]

    plt.plot(time_steps, historical, label='Historical', color='red')

    plt.xlabel('Time Steps')
    plt.ylabel('Values')
    plt.title('Actual vs. Predicted Values')

    plt.legend()
    plt.show()