# One model, one currency pair, all study periods

### Imports

In [None]:
import os
import time
import warnings
import math

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from IPython.display import Image
from keras.utils import plot_model, to_categorical
from livelossplot import PlotLossesKeras
from keras_sequential_ascii import keras2ascii 

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, GRU, Dropout
from keras.layers import CuDNNLSTM, CuDNNGRU
from keras.callbacks import EarlyStopping, ModelCheckpoint

### Display options

In [None]:
plt.style.use('default')
pd.set_option("display.max_rows", 5)

## Function defintions

### Data preprocessing
#### Scaling

In [None]:
def scale_data(time_series, scaler, train_len):
    """
    Scales a time series.
    Scaler options: from sklearn.preprocessing: 
    MinMaxScaler (e.g. ranges(0,1), (-1,1)), StandardScaler (with or w/o std)
    Splits time series into training and trading (=test) data. 
    Scaler is fitted only to training data, 
    transformation is applied to the whole data set.
    Returns the stitched train and trade data 
    and the scaler fitted on the train data.
    """
    
    # create train and trade set
    train = time_series[:train_len]
    trade = time_series[train_len:]
    
    # scale from train set only
    # reshape to be usable with scaler
    train = train.values.reshape(train.shape[0], 1)
    trade = trade.values.reshape(trade.shape[0], 1)
    # fit scaler to training set only
    fitted_scaler = scaler.fit(train)  
    # scale both sets with the training set scaler
    train = fitted_scaler.transform(train)
    trade = fitted_scaler.transform(trade)
    # inverse transformation 
    # fitted_scaler.inverse_transform(train_z)
    # fitted_scaler.inverse_transform(trade_z)
    
    stitched = np.concatenate((train, trade), axis=0)
    
    return stitched, fitted_scaler

#### Input sequence creation

In [None]:
def create_input_sequences(time_series, fitted_scaler, train_len, seq_len, 
                           targets="classification_1D", pred_steps=1):
    """
    Converts a time series to a supervised problem for recurrent neural networs: 
    Creates the X's (windows of length seq_len)
    and the respective y's (the observation pred_steps steps after the windows)
    for both the train and trade set.
    targets: "regression", "classification_1D" (sparse, value in [0, 1]), 
    "classification_2D" (one-hot encoding)
    """
    
    all_windows = []  # empty time_series for training windows
    for i in range(len(time_series) - seq_len):  
        all_windows.append(time_series[i : (i + seq_len) + pred_steps])  
        # we split the windows up into (X, y) later, + pred_steps are the y's
        
    all_windows = np.array(all_windows)  # make it a numpy array
    # number of all windows = len(time_series) - seq_len
    
    split_at_row = int(train_len - seq_len)
    
    # train windows
    train_windows = all_windows[:split_at_row, :]
#     np.random.shuffle(train_windows)  
# keeps the windows intact, but shuffles their order
    
    x_train = train_windows[:, :-1]
    
    y_train = train_windows[:, -1]  # scaled returns
    if "classification" in targets:
      # one-hot encoding: col 0: returns < 0, col 1: returns >= 0
        y_train = to_categorical(fitted_scaler.inverse_transform(y_train) >= 0)  
        if targets == "classification_1D":
          # if real returns >= 0: 1, else: 0 (only take col 1)
            y_train = y_train[:, 1]  
    
    # trade windows
    trade_windows = all_windows[split_at_row:, :]
    
    x_trade = trade_windows[:, :-1]
    
    y_trade = trade_windows[:, -1]
    if "classification" in targets:
       # one-hot encoding: col 0: returns < 0, col 1: returns >= 0
        y_trade = to_categorical(fitted_scaler.inverse_transform(y_trade) >= 0) 
        if targets == "classification_1D":
          # if real returns >= 0: 1, else: 0 (only take col 1)
            y_trade = y_trade[:, 1]  

    # reshape seq.s into 3D: (samples, sequence_length/time steps, features)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) 
    x_trade = np.reshape(x_trade, (x_trade.shape[0], x_trade.shape[1], 1))  

    return [x_train, y_train, x_trade, y_trade]

#### Scaling and sequence creation

In [None]:
def data_prep(time_series, scaler, train_len, seq_len, 
              targets="classification_1D", pred_steps=1):
    """
    Data preparation:
    Scaling, then creating input sequences for supervised learning.
    """
    # Scale time series, return scaled ts and scaler for inverse scaling
    scaled_ts, fitted_scaler = scale_data(time_series=time_series,
                                          scaler=scaler,
                                          train_len=train_len)
    
    # Create input sequences and return inputs and targets for both training and trading data
    x_train, y_train, x_trade, y_trade = create_input_sequences(time_series=scaled_ts,
                                                                fitted_scaler=fitted_scaler, 
                                                                train_len=train_len,
                                                                seq_len=seq_len, 
                                                                targets=targets, 
                                                                pred_steps=pred_steps)
    
    return [scaled_ts, fitted_scaler, x_train, y_train, x_trade, y_trade]

### Model building functions
#### Feedforward network (Multi-layer perceptron)

In [None]:
def build_fnn(input_dim, hidden_layers, neurons, dropout, loss, 
              output_activation, optimizer='adam', summary=False):
    """
    Builds a feedforward neural network model for binary classification.
    input_dim: number of observations (for comparison with RNNs: sequence length)
    """
    
    model = Sequential()
    
    # input dropout
    model.add(Dropout(dropout))
        
    # first hidden layer
    model.add(Dense(neurons, activation='relu', input_dim=input_dim))
    model.add(Dropout(dropout))

    # hidden layers in between
    for _ in range(hidden_layers - 1):
        model.add(Dense(neurons, activation='relu'))
        model.add(Dropout(dropout))
    
    # output layer
    model.add(Dense(1, activation=output_activation))

    model.compile(loss=loss, 
                  optimizer=optimizer, 
                  metrics=['binary_accuracy'])
    
    # print summary of layers and parameters
    if summary:
        model.summary()

    return model

#### Recurrent Neural Networks (Simple RNN, LSTM, GRU)

In [None]:
def build_rnn(rnn_type, input_shape, hidden_layers, neurons, dropout, loss, 
              output_activation, optimizer='adam', summary=False):
    """
    Builds a recurrent neural network model for regression.
    input_shape: (sequence length, number of features)
    rnn_type: RNN, SimpleRNN, LSTM, GRU, CuDRNNLSTM, CuDRNNGRU, Bidirectional 
    (see https://keras.io/layers/recurrent/)
    """
    
    model = Sequential()
    
    # input dropout
    model.add(Dropout(dropout))
    
    if hidden_layers > 1:
        # first hidden layer
        model.add(rnn_type(neurons, 
                           input_shape=input_shape, 
                           return_sequences=True))
        model.add(Dropout(dropout))
        
        # hidden layers in between
        for _ in range(hidden_layers - 2):
            model.add(rnn_type(neurons, 
                               return_sequences=True))
            model.add(Dropout(dropout))
        
        # final hidden layer before dense layer
        model.add(rnn_type(neurons))
        model.add(Dropout(dropout))
        
    else:
        # single hidden layer
        model.add(rnn_type(neurons, 
                           input_shape=input_shape))
        model.add(Dropout(dropout))
    
    # output layer
    model.add(Dense(1, activation=output_activation))

    model.compile(loss=loss, 
                  optimizer=optimizer, 
                  metrics=['binary_accuracy'])
    
    # print summary of layers and parameters
    if summary:
        model.summary()

    return model

### Model Training

In [None]:
def training_one_period(model, x_train, y_train, batch_size, max_epochs=100, 
             val_split=0.2, verbose=1, patience=10):
    """
    Takes a compiled model and trains it on training data.
    """
    start_training = time.time()

    callbacks = [EarlyStopping(monitor='val_loss',
                               patience=patience, 
                               verbose=verbose, 
                               mode='auto', 
                               restore_best_weights=True),
                               PlotLossesKeras()]  #,
#                  ModelCheckpoint(monitor='val_loss', 
#                                  filepath='weights/weights_e{epoch:02d}-vl{val_loss:.4f}.hdf5',
#                                  verbose=verbose, 
#                                  save_best_only=True,
#                                  save_weights_only=True)] 

    hist = model.fit(x_train, y_train, 
                     batch_size=batch_size, 
                     epochs=max_epochs,
                     verbose=verbose,
                     validation_split=val_split, 
                     callbacks=callbacks)
    
    training_time = time.time() - start_training

    if verbose > 0:
        print(f"Time: {round(training_time/60)} minutes")
    
    return model, training_time

In [None]:
def training_all_periods(currency, hidden_layer_type, hidden_layers, neurons, dropout):
    """
    Training function that enables to specify a model to train by the currency, the type 
    and number ofhidden layers, number of neurons per hidden layer and dropout rate.
    ATTENTION:
    This function was defined mainly for readability of the later training process
    but makes use of variables defined outside the function and is not very generic.
    Use only after defining the scaler, train_len, trade_len, etc.
    """

    # timing
    start_all = time.time()

    # isolate currency pair
    ts = returns[currency].dropna()

    # determine number of study perios
    study_periods = int((len(ts) - train_len) / trade_len)

    # loop through all study periods
    for period_no in reversed(range(study_periods)):

        #### Data Preprocessing ####

        # isolate study period
        sp_stop = len(ts) - period_no * trade_len
        sp_start = sp_stop - (train_len + trade_len)
        time_series = ts[sp_start : sp_stop]

        # data preparation: scaling and creating a supervised problem
        scaled_ts, fitted_scaler, x_train, y_train, x_trade, y_trade = data_prep(time_series=time_series,
                                                                                 scaler=scaler, 
                                                                                 train_len=train_len, 
                                                                                 seq_len=sequence_len, 
                                                                                 targets='classification_1D', 
                                                                                 pred_steps=1)

        if hidden_layer_type == Dense:

            #### FNN ####

            # model building
            model_to_train = build_fnn(input_dim=sequence_len,
                                       hidden_layers=hidden_layers, 
                                       neurons=neurons, 
                                       dropout=dropout, 
                                       loss='binary_crossentropy', 
                                       output_activation='sigmoid', 
                                       optimizer='adam', 
                                       summary=False)

            # flatten input arrays (FNNs don't use sequences as inputs)
            x_train_flat = np.reshape(x_train, (x_train.shape[0], x_train.shape[1]))
            # also flat, but generic name for generic evaluation function
            x_trade = np.reshape(x_trade, (x_trade.shape[0], x_trade.shape[1]))

            # model training
            trained_model, training_time = training_one_period(model_to_train, 
                                                                 x_train_flat,  # flat!
                                                                 y_train, 
                                                                 batch_size=batch_size, 
                                                                 max_epochs=max_epochs, 
                                                                 val_split=validation_split, 
                                                                 verbose=verbose, 
                                                                 patience=patience)

        else:

            #### Recurrent ####

            # rnn model
            model_to_train = build_rnn(rnn_type=hidden_layer_type,
                                       input_shape=(sequence_len, 1),
                                       hidden_layers=hidden_layers, 
                                       neurons=neurons, 
                                       dropout=dropout, 
                                       loss='binary_crossentropy', 
                                       output_activation='sigmoid', 
                                       optimizer='adam', 
                                       summary=False)

            # model training    
            trained_model, training_time = training_one_period(model_to_train, 
                                                                 x_train, 
                                                                 y_train, 
                                                                 batch_size=batch_size, 
                                                                 max_epochs=max_epochs, 
                                                                 val_split=validation_split, 
                                                                 verbose=verbose, 
                                                                 patience=patience)

        if (period_no + 1) % 5 == 0:
            print(f'{currency}, period {period_no + 1}/{study_periods}, {round(training_time/60, 1)} min')

        # get: log_loss, accuracy, roc_auc, profits, sharpe ratio
        results_this_model = list(evaluation(model=trained_model,
                                             time_series=time_series,
                                             x_true=x_trade,  # flat!
                                             y_true=y_trade))

        # append training time
        results_this_model.append(training_time)

        # write results into dataframe
        for j in range(len(metrics)):
                results_dict[currency].loc[(model_str, period_no + 1), metrics[j]] = results_this_model[j]


    # total training time for all study periods
    print(f'Done. Time: {round((time.time() - start_all)/60)} minutes')

    return results_dict

### Evaluation
#### Economic evaluation

In [None]:
def trading_strategy(y_true_returns, y_pred, midpoint=0.5, threshold=0):
    """
    Calculates cumulative absolute profits (i.e. p.a. profits for 250 days 
    of trading) from a simple trading strategy of going long when predicted 
    returns are on or above a midpoint + threshold (Default: 0.5 + 0) and 
    short when below midpoint - threshold.    
    threshold = 0 (Default) means trading every prediction.
    """
    
    returns = []
    
    for i in range(len(y_pred)):
        
        # if model predicts positive return,  go long
        if y_pred[i] >= midpoint + threshold:
            returns.append(y_true_returns[i])
            
        # else, go short
        elif y_pred[i] < midpoint - threshold:
            returns.append(0 - y_true_returns[i])
    
    profits = listproduct([(1 + r) for r in returns]) - 1
    stdev = np.std(returns)
    sharpe_ratio = np.mean(returns) / stdev
    
    return profits, stdev, sharpe_ratio

#### Evaluation metrics

In [None]:
def evaluation(model, time_series, x_true, y_true):
    """
    Evaluates a model's predictions.
    """
    # get true returns (i.e. not the binary variable y_true) 
    # from time_series (before scaling)
    y_trade_returns = time_series[-250:].values
    
    # predict y_trade
    y_pred = model.predict(x_true, verbose=0)[:, 0]
    
    # profits: true returns, predicted probabilities
    profits, stdev, sharpe_ratio = trading_strategy(y_trade_returns, y_pred)
    
    # log loss and accuracy: x_trade sequences, true binary labels
    log_loss, accuracy = model.evaluate(x_true, y_true, verbose=0)
    
    # area under ROC curve: true binary labels, predicted probabilities
    roc_auc = roc_auc_score(y_true, y_pred)
    
    return log_loss, accuracy, roc_auc, profits, stdev, sharpe_ratio

### Helper functions
#### Listproduct

In [None]:
def listproduct(lst):
    """
    Takes a list argument and returns the 
    product of all its elements.
    """
    product = 1
    for number in lst:
        product *= number
    return product

#### Multi-index dataframe for results

In [None]:
def create_results_df(study_periods, metrics, models=['FNN', 'SRNN', 'LSTM', 'GRU']):
    """
    Returns a multi-index pd.DataFrame filled with '_' in each cell.
    Columns: evaluation metrics
    Row levels: models (level 0), study periods (level 1)
    """
    
    # multi-index
    idx = pd.MultiIndex.from_product([models,  # for each model type
                                     list(range(1, study_periods + 1))],  # one row per study period
                                     names=['Model', 'Study period'])

    # empty results dataframe 
    return pd.DataFrame('-', idx, metrics)

#### Dataframe aggregation by model

In [None]:
def aggregate_results_by_model(granular_df, models=['FNN', 'SRNN', 'LSTM', 'GRU']):
    """
    Aggregates the values in a granular dataframe 
    (data for every study period) by model.
    """
    # aggregated per model results
    aggregated_df = pd.DataFrame(columns=metrics)

    for nn in models:
        aggregated_df.loc[nn] = granular_df.loc[nn].mean(axis=0)

    return aggregated_df

## Data

In [None]:
dataset_raw = pd.read_csv('exchange_rates_FED.csv', header=3, index_col=0).iloc[2:,:]
dataset_raw = dataset_raw.add_prefix('USD/')
dataset_raw = dataset_raw.rename(index=str, columns={"USD/USD":"EUR/USD", "USD/USD.1":"GBP/USD", "USD/USD.2":"AUD/USD", "USD/USD.3":"NZD/USD",
                                            "USD/Unnamed: 19":"NBDI", "USD/Unnamed: 20":"NMCDI", "USD/Unnamed: 21":"NOITPI"})

dates = dataset_raw.index  # save index

In [None]:
# to numeric
prices = pd.DataFrame(columns=dataset_raw.columns)
for col in prices.columns:
    prices[col] = pd.to_numeric(dataset_raw[col].astype(str), errors="coerce")

### Dataset overview
- 12170 rows (days)
    - start date: January 4, 1971
    - end date: August 25, 2017
- 26 columns (23 currencies vs. USD, plus 3 indices*)
    - series of different length, rest NaN
    - e.g. EUR/USD starts in 1999
    - most other main currency pairs in 1971

    *(Nominal Broad Dollar Index, Nominal Major Currencies Dollar Index, Nominal Other Important Trading Partners Dollar Index)

#### Create one-day returns

- create a separate dataframe for daily returns
- skip `NaN`s instead of padding them with zeroes - that would mislead the models with artificial data

In [None]:
returns = pd.DataFrame(index=prices.index, columns=prices.columns)

for col in returns.columns:
    returns[col] = prices[col][prices[col].notnull()].pct_change()

## Modeling and prediction
### Parameters
#### Fixed

In [None]:
# parameters for data preprocessing
train_len = 750
trade_len = 250
sequence_len = 240
scaler = MinMaxScaler(feature_range=(-1,1))

# metrics to be evaluated
metrics = ['Log loss', 'Accuracy', 'AUC', 'Returns', 
           'Standard deviation', 'Sharpe ratio', 'Time']

# model and currency pair alternatives

cell_types = [Dense, SimpleRNN, LSTM, GRU]
# cell_types = [Dense, SimpleRNN, CuDNNLSTM, CuDNNGRU]
models_str = ['FNN', 'SRNN', 'LSTM', 'GRU']
currencies = ["EUR/USD", "GBP/USD", "USD/JPY", "USD/CHF", "USD/CAD"]

#### Create structure to store results

In [None]:
# empty dictionary for results: one df for each currency
results_dict = {}

for curr in currencies:
    
    # determine number of study periods
    study_periods = int((len(returns[curr].dropna()) - train_len) / trade_len)

    # create a dataframe to store results
    results_dict[curr] = create_results_df(study_periods, metrics)
    
    results_dict[curr]

#### Model Choice

In [None]:
# define index combination
h, c = 2, 0

# get model and curreny combination to for training/testing
hidden_layer_type = cell_types[h]
model_str = models_str[h]
currency = currencies[c]

#### Tuned parameters

In [None]:
# hyperparameters for model building
hidden_layers = 3
neurons = 1000
dropout = 0

# hyperparameters for model training
batch_size = 64
max_epochs = 100
validation_split = 0.2 
verbose = 0
patience = 10

## Training

In [None]:
results_dict = training_all_periods(currency, hidden_layer_type, hidden_layers, neurons, dropout)

#### Aggregate results

In [None]:
results_agg_dict = {}

results_agg_dict[curr] = aggregate_results_by_model(results_dict[curr], models=[model_str])

# # when having trained multiple currency pairs
# for c_pair in currencies:
#     results_agg_dict[c_pair] = aggregate_results_by_model(results_dict[c_pair], models=models_str)

#### Concatenate to global dataframe

In [None]:
results_global = pd.concat(results_agg_dict)
filename = f'results/results_{model_str}_{curr}_{hidden_layers}hl_{neurons}n_{dropout}d.csv'
filename = 'results/final-for-now/results_FNN_CAD.csv'
results_global.to_csv(filename)
results_global