# Naïve Benchmark

### Imports

In [1]:
import os
import time
import warnings
import math

import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, roc_auc_score

## Function defintions

### Evaluation
#### Economic evaluation

In [5]:
def trading_strategy(y_true_returns, y_pred, midpoint=0.5, threshold=0):
    """
    Calculates cumulative absolute profits (i.e. p.a. profits for 250 days 
    of trading) from a simple trading strategy of going long when predicted 
    returns are on or above a midpoint + threshold (Default: 0.5 + 0) and 
    short when below midpoint - threshold.    
    threshold = 0 (Default) means trading every prediction.
    """
    
    returns = []
    
    for i in range(len(y_pred)):
        
        # if model predicts positive return,  go long
        if y_pred[i] >= midpoint + threshold:
            returns.append(y_true_returns[i])
            
        # else, go short
        elif y_pred[i] < midpoint - threshold:
            returns.append(0 - y_true_returns[i])
    
    profits = listproduct([(1 + r) for r in returns]) - 1
    stdev = np.std(returns)
    sharpe_ratio = np.mean(returns) / stdev
    
    return profits, stdev, sharpe_ratio

### Helper functions
#### Listproduct

In [24]:
def listproduct(lst):
    """
    Takes a list argument and returns the 
    product of all its elements.
    """
    product = 1
    for number in lst:
        product *= number
    return product

#### Multi-index dataframe for results

In [25]:
def create_results_df(study_periods, metrics, models=['FNN', 'SRNN', 'LSTM', 'GRU']):
    """
    Returns a multi-index pd.DataFrame filled with '_' in each cell.
    Columns: evaluation metrics
    Row levels: models (level 0), study periods (level 1)
    """
    
    # multi-index
    idx = pd.MultiIndex.from_product([models,  # for each model type
                                     list(range(1, study_periods + 1))],  # one row per study period
                                     names=['Model', 'Study period'])

    # empty results dataframe 
    return pd.DataFrame('-', idx, metrics)

## Data

In [26]:
dataset_raw = pd.read_csv('data/exchange_rates_FED.csv', header=3, index_col=0).iloc[2:,:]
dataset_raw = dataset_raw.add_prefix('USD/')
dataset_raw = dataset_raw.rename(index=str, columns={"USD/USD":"EUR/USD", "USD/USD.1":"GBP/USD", "USD/USD.2":"AUD/USD", "USD/USD.3":"NZD/USD",
                                            "USD/Unnamed: 19":"NBDI", "USD/Unnamed: 20":"NMCDI", "USD/Unnamed: 21":"NOITPI"})

dates = dataset_raw.index  # save index

In [27]:
# to numeric
prices = pd.DataFrame(columns=dataset_raw.columns)
for col in prices.columns:
    prices[col] = pd.to_numeric(dataset_raw[col].astype(str), errors="coerce")

### Dataset overview
- 12170 rows (days)
    - start date: January 4, 1971
    - end date: August 25, 2017
- 26 columns (23 currencies vs. USD, plus 3 indices*)
    - series of different length, rest NaN
    - e.g. EUR/USD starts in 1999
    - most other main currency pairs in 1971

    *(Nominal Broad Dollar Index, Nominal Major Currencies Dollar Index, Nominal Other Important Trading Partners Dollar Index)

#### Create one-day returns

- create a separate dataframe for daily returns
- skip `NaN`s instead of padding them with zeroes - that would mislead the models with artificial data

In [28]:
returns = pd.DataFrame(index=prices.index, columns=prices.columns)

for col in returns.columns:
    returns[col] = prices[col][prices[col].notnull()].pct_change()

## Benchmark: Naive Forecast

In [29]:
currencies = ["EUR/USD", "GBP/USD", "USD/JPY", "USD/CHF"]

In [30]:
# parameters for data preprocessing
train_len = 750
trade_len = 250
sequence_len = 240

# metrics to be evaluated
metrics = ['Accuracy', 'AUC', 'Returns', 'Standard deviation', 'Sharpe ratio']

# empty dictionary for results
results_dict = {}

In [57]:
# timing
start_all = time.time()

# loop through selected currency pairs
for curr in currencies:
    
    # isolate currency pair
    ts = returns[curr].dropna() 
    
    # determine number of study periods
    study_periods = int((len(ts) - train_len) / trade_len)
    
    # create a dataframe to store results
    results_dict[curr] = create_results_df(study_periods, metrics)
    
    # loop through all study periods
    for period_no in reversed(range(study_periods)):

        # isolate study period
        sp_stop = len(ts) - period_no * trade_len
        sp_start = sp_stop - (train_len + trade_len)
        time_series = ts[sp_start : sp_stop]

        # define binary targets
        targets = time_series[-251:] >= 0
        
        # compute naive predictions
        naive_predictions = targets[:-1]
        
        # remove last observation from training set (-251)
        targets = targets[1:]
        
        # match indices (required for auc computation)
        naive_predictions.index = targets.index

        # log loss cannot be computed for naive naive_predictionsictions including 0 (!log(0))
        acc = sum(naive_predictions == targets)/len(naive_predictions)
        auc = roc_auc_score(targets, naive_predictions)

        # real returns
        rr = time_series[-250:]

        # profits: targets returns, naive_predictionsicted probabilities
        profits, stdev, sharpe_ratio = trading_strategy(rr, naive_predictions)
        
        # get: accuracy, roc_auc, profits, stdev, sharpe ratio
        results_this_model = acc, auc, profits, stdev, sharpe_ratio
        
        # write results into dataframe
        for j in range(len(metrics)):
                results_dict[curr].loc[('Naive FC', period_no + 1), metrics[j]] = results_this_model[j]


# total training time over all currency pairs, all study periods, and all model architectures
print(f'Time: {round((time.time() - start_all))} seconds')

Time: 3 seconds


In [58]:
results_dict['EUR/USD'].loc['Naive FC'].mean(axis=0)

Accuracy              0.474400
AUC                   0.471813
Returns              -0.020230
Standard deviation    0.006000
Sharpe ratio         -0.018790
dtype: float64

In [59]:
results_naive = pd.DataFrame(columns = metrics)

for curr in currencies:
    results_naive.loc[curr] = results_dict[curr].loc['Naive FC'].mean(axis=0)

In [60]:
results_naive

Unnamed: 0,Accuracy,AUC,Returns,Standard deviation,Sharpe ratio
EUR/USD,0.4744,0.471813,-0.02023,0.006,-0.01879
GBP/USD,0.501023,0.497063,0.048086,0.005875,0.031028
USD/JPY,0.494047,0.488804,0.048776,0.006348,0.027989
USD/CHF,0.487256,0.483862,0.013099,0.007103,0.001446


#### Weighted average

In [61]:
weighted_avg = []

for col in results_naive.columns:
    weighted_avg.append((15*results_naive.loc['EUR/USD', col]+
                         43*(results_naive.loc['GBP/USD', col]+
                             results_naive.loc['USD/JPY', col]+
                             results_naive.loc['USD/CHF', col]))
                        /144)
weighted_avg

[0.49205555555555563,
 0.4880244872612939,
 0.030728352681113562,
 0.006396180062758912,
 0.016097718325528833]

In [62]:
results_naive.loc['Ø'] = weighted_avg

In [63]:
results_naive

Unnamed: 0,Accuracy,AUC,Returns,Standard deviation,Sharpe ratio
EUR/USD,0.4744,0.471813,-0.02023,0.006,-0.01879
GBP/USD,0.501023,0.497063,0.048086,0.005875,0.031028
USD/JPY,0.494047,0.488804,0.048776,0.006348,0.027989
USD/CHF,0.487256,0.483862,0.013099,0.007103,0.001446
Ø,0.492056,0.488024,0.030728,0.006396,0.016098


### Write to file and LaTeX table

In [64]:
# filename = 'results/naive_forecast.csv'
# results_naive.to_csv(filename)
results_naive.round(4).to_latex()

'\\begin{tabular}{lrrrrr}\n\\toprule\n{} &  Accuracy &     AUC &  Returns &  Standard deviation &  Sharpe ratio \\\\\n\\midrule\nEUR/USD &    0.4744 &  0.4718 &  -0.0202 &              0.0060 &       -0.0188 \\\\\nGBP/USD &    0.5010 &  0.4971 &   0.0481 &              0.0059 &        0.0310 \\\\\nUSD/JPY &    0.4940 &  0.4888 &   0.0488 &              0.0063 &        0.0280 \\\\\nUSD/CHF &    0.4873 &  0.4839 &   0.0131 &              0.0071 &        0.0014 \\\\\nØ       &    0.4921 &  0.4880 &   0.0307 &              0.0064 &        0.0161 \\\\\n\\bottomrule\n\\end{tabular}\n'