In [2]:
#from ib_insync import *
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels import regression,stats
import math
import datetime 
import statsmodels.formula.api as smf 
from datetime import date, time, datetime, timedelta
from collections import deque
from os import listdir
from os.path import isfile, join
from collections import defaultdict
from operator import itemgetter 
import itertools
from strategies import *
from helpers import *
from joblib import Parallel, delayed
from tqdm import tqdm

In [4]:
mypath = 'eric_jh_data/'
countries = sorted(['Australia', 'Japan', 'China'])
fx_dict = {'Australia':('eric_jh_data/Forex/AUD_USD_new.csv',0),
           'Japan':('eric_jh_data/Forex/USD_JPY_new.csv',1),
           'China':('eric_jh_data/Forex/USD_HKD_new.csv',1)}

list_pairs = []
for country in countries:
    countrypath = mypath + country
    adr_names =  [f for f in listdir(countrypath) if not isfile(join(countrypath, f))] #grab all adr names of the country
    for adr in sorted(adr_names):
        list_pairs.append((country, adr))

In [5]:
# Store ratios
for (country, adr) in list_pairs:
    ratio_geq_1, ratio = get_ratio(country, adr, fx_dict)
    if adr == "ACH_2600" or adr == "BGNE_6160":
        rounded_ratio = float('%.2g' % ratio)
    else:
        rounded_ratio = float('%.1g' % ratio)
    print("Country: {}, ADR_Stock: {}, Estimated Ratio (4 d.p.): {}, Implied Ratio (2 s.f.): {}".format(country, adr, ratio, rounded_ratio))
    ratio_df = pd.DataFrame({"ratio_geq_1" : [ratio_geq_1], "ratio" : [rounded_ratio]})
    ratio_df.to_csv(f'eric_jh_data/{country}/{adr}/ratio.csv')
        
# This shows the empircally estimated ratio, and the implied ratio we shall assume.
# These values corroborate with the select few we checked online, like GENE_GTG and BGNE_6160.

Country: Australia, ADR_Stock: ATHE_ATH, Estimated Ratio (4 d.p.): 59.4889, Implied Ratio (2 s.f.): 60.0
Country: Australia, ADR_Stock: GENE_GTG, Estimated Ratio (4 d.p.): 595.5978, Implied Ratio (2 s.f.): 600.0
Country: Australia, ADR_Stock: IMMP_IMM, Estimated Ratio (4 d.p.): 9.9082, Implied Ratio (2 s.f.): 10.0
Country: Australia, ADR_Stock: IMRN_IMC, Estimated Ratio (4 d.p.): 39.4289, Implied Ratio (2 s.f.): 40.0
Country: Australia, ADR_Stock: JHX_JHX, Estimated Ratio (4 d.p.): 1.0043, Implied Ratio (2 s.f.): 1.0
Country: Australia, ADR_Stock: KZIA_KZA, Estimated Ratio (4 d.p.): 10.0346, Implied Ratio (2 s.f.): 10.0
Country: Australia, ADR_Stock: MESO_MSB, Estimated Ratio (4 d.p.): 5.0155, Implied Ratio (2 s.f.): 5.0
Country: Australia, ADR_Stock: PLL_PLL, Estimated Ratio (4 d.p.): 101.5788, Implied Ratio (2 s.f.): 100.0
Country: Australia, ADR_Stock: WBK_WBC, Estimated Ratio (4 d.p.): 0.9997, Implied Ratio (2 s.f.): 1.0
Country: China, ADR_Stock: ACH_2600, Estimated Ratio (4 d.p.)

In [7]:
# stock_num_per_unit is how many stocks we would buy for 1 "unit" of trade
# avg_bid_non_us_before is how much foreign currency we can buy with 1 USD, 1 minute before the Asian market opens
# avg_bid_non_us_at is how much foreign currency we can buy with 1 USD, when the Asian market opens
# avg_bid_us_before is how much foreign currency we can buy with 1 USD, 1 minute before the US market opens
# avg_bid_us_at is how much foreign currency we can buy with 1 USD, when the US market opens
# All dates are in local time: so in sequential order (for each row), it will go stock_open, stock_close, adr_open, adr_close
merged_df = data_processing(*list_pairs[1], fx_dict)
merged_df.head()

Unnamed: 0,date,adr_open,adr_close,adr_volume,stock_open,stock_close,stock_volume,avg_bid_non_us_before,avg_ask_non_us_before,avg_non_us_before,...,avg_bid_us_at,avg_ask_us_at,avg_us_at,ir,stock_num_per_unit,adr_num_per_unit,stock_open_per_unit,stock_close_per_unit,adr_open_per_unit,adr_close_per_unit
0,2015-04-10,18.08,17.36,1873,0.04,0.039,987003,1.296948,1.299612,1.29828,...,1.302409,1.305068,1.303738,,600.0,1,24.0,23.4,18.08,17.36
1,2015-04-13,17.76,16.16,2762,0.039,0.039,1586945,1.30331,1.305987,1.304648,...,1.31553,1.318215,1.316872,,600.0,1,23.4,23.4,17.76,16.16
2,2015-04-14,16.96,17.08,2545,0.036,0.038,2905099,1.3169,1.319606,1.318253,...,1.30982,1.312529,1.311174,,600.0,1,21.6,22.8,16.96,17.08
3,2015-04-15,17.68,17.52,2106,0.039,0.038,2069419,1.312108,1.314822,1.313465,...,1.310301,1.312994,1.311647,,600.0,1,23.4,22.8,17.68,17.52
4,2015-04-16,17.4,17.0,1068,0.038,0.036,2378678,1.301459,1.304133,1.302796,...,1.282035,1.284634,1.283335,,600.0,1,22.8,21.6,17.4,17.0


In [10]:
def plot_ret(date, cash, country, adr, num_xticks = 15, save = False, filename = "pnl_plot.png"):
    fig = plt.figure(figsize = (20, 8))
    ax = fig.add_subplot(111)
    ax.plot(date, cash)
    plt.xticks(np.arange(0, len(date), (len(date) - 1 )// num_xticks), rotation = 30, ha = 'right', fontsize = 14)
    plt.xlim(0, len(date))
    plt.yticks(fontsize = 14)
    plt.grid(True)
    plt.title(f'PnL Chart for {adr} pair from {country}', fontsize = 18)
    if save:
        fig.savefig(f'eric_jh_data/{country}/{adr}/plots/{filename}')
    else:
        plt.show();
    plt.close(fig)

# Hyperparameter Tuning

Hyperparameters:
1. Lookback window
2. Entry threshold
3. Exit threshold
4. Stop-loss threshold

Steps:
1. HP Tune each strategy for each of the pairs
2. Store results for each pair in hp_log_sfx{version}.txt
3. Store results for each strategy in results_sfx{version}.txt
4. Store best strategy for each pair in results_sfx_all.txt

In [53]:
window_grid = [30, 60, 100]
entry_grid = [1, 1.5, 2]
exit_grid = [-0.5, 0, 0.5]
stop_loss_grid = [2.5, 3, 3.5]
oos_start_date = "2019-02-22"
oos_end_date = "2020-02-22"

"""
HP Tune on coarse grid
"""

def hp_tune(pairs_trade_strategy, version, country, adr, window_grid = [30,60,100], 
            entry_grid = [1,1.5,2], exit_grid = [-0.5,0,0.5], stop_loss_grid = [2.5,3,3.5], fine = False):
    hps = list(itertools.product(*[window_grid, entry_grid, exit_grid, stop_loss_grid]))
    merged_df = data_processing(country, adr, fx_dict)
    hp_log = []
    max_ret = -10000
    max_ret_hps = ()
    max_ret_port = []
    max_ret_hr = 0
    max_ret_dd = 0
    max_ret_dates = []
    for hp in hps:
        if version == '1' or version == '2':
            ret1, _, portfolio_values1, hits1, dates1 = pairs_trade_strategy(merged_df, 
                                                                         lookback = hp[0], 
                                                                         entry = hp[1], 
                                                                         exit = hp[2], 
                                                                         stop_loss = hp[3], 
                                                                         end_date = oos_start_date)
            ret2, _, portfolio_values2, hits2, dates2 = pairs_trade_strategy(merged_df, 
                                                                             cash = 250000 if len(portfolio_values1) == 0 else portfolio_values1[-1],
                                                                             lookback = hp[0], 
                                                                             entry = hp[1], 
                                                                             exit = hp[2], 
                                                                             stop_loss = hp[3],
                                                                             start_date = oos_end_date)
            ret = (portfolio_values2[-1] - 250000) / 250000
            portfolio_values = portfolio_values1 + portfolio_values2
            hits = hits1 + hits2
            dates = dates1 + dates2
        else:
            ret1, _, portfolio_values1, hits1, dates1 = pairs_trade_strategy(merged_df, 
                                                                         lookback = hp[0], 
                                                                         entry_cond1_val = hp[1], 
                                                                         entry_cond2_val = hp[1],
                                                                         exit_cond1_val = hp[2],
                                                                         exit_cond2_val = hp[2],
                                                                         stop_loss_cond1 = hp[3],
                                                                         stop_loss_cond2 = hp[3],
                                                                         end_date = oos_start_date)
            ret2, _, portfolio_values2, hits2, dates2 = pairs_trade_strategy(merged_df, 
                                                                             cash = 250000 if len(portfolio_values1) == 0 else portfolio_values1[-1],
                                                                             lookback = hp[0], 
                                                                             entry_cond1_val = hp[1], 
                                                                             entry_cond2_val = hp[1],
                                                                             exit_cond1_val = hp[2],
                                                                             exit_cond2_val = hp[2],
                                                                             stop_loss_cond1 = hp[3],
                                                                             stop_loss_cond2 = hp[3],
                                                                             start_date = oos_end_date)
            ret = (portfolio_values2[-1] - 250000) / 250000
            portfolio_values = portfolio_values1 + portfolio_values2
            hits = hits1 + hits2
            dates = dates1 + dates2
        
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(portfolio_values)*100,2)
        if ret > max_ret:
            max_ret = ret
            max_ret_hps = hp
            max_ret_port = portfolio_values
            max_ret_dates = dates
            max_ret_hr = hit_ratio
            max_ret_dd = max_drawdown
        hp_log.append(f'{hp}: Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%\n')
    logs = [f'(Lookback, Entry, Exit, Stop-loss)\n',
            f'Best HPs: {max_ret_hps}, Return: {max_ret}%, Hit Ratio: {max_ret_hr}%, Max Drawdown: {max_ret_dd}%\n']
    logs = logs + hp_log 
    if fine:
        fname = f'eric_jh_data/{country}/{adr}/logs/hp1_log_fine_sfx{version}.txt' 
    else:
        fname = f'eric_jh_data/{country}/{adr}/logs/hp1_log_sfx{version}.txt' 
    f = open(fname, 'w')
    f.writelines(logs)
    f.close()
    if fine:
        plot_ret(max_ret_dates, max_ret_port, country, adr, save = True, filename = f'is_sfx_fine_hp1_pnl_plot_v{version}.png')
    else:
        plot_ret(max_ret_dates, max_ret_port, country, adr, save = True, filename = f'is_sfx_hp1_pnl_plot_v{version}.png')
    
    best_hps = f'Country: {country}, ADR_Stock: {adr}, HPs: {max_ret_hps}, Return: {max_ret}%, Hit Ratio: {max_ret_hr}%, Max Drawdown: {max_ret_dd}%\n'
    if fine:
        fname = f'logs/results1_fine_sfx{version}.txt'
    else:
        fname = f'logs/results1_sfx{version}.txt'
    f = open(fname, 'a')
    f.writelines(best_hps)
    f.close()

In [54]:
v1 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v1, '1', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.2min remaining: 10.4min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  2.1min remaining:  7.8min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  2.2min remaining:  4.6min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  2.3min remaining:  3.1min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  2.5min remaining:  2.1min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  3.4min remaining:  1.9min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  3.9min remaining:  1.3min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  4.0min remaining:   39.8s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  4.1min finished


In [55]:
v2 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v2, '2', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.3min remaining: 11.1min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  2.4min remaining:  8.9min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  2.5min remaining:  5.4min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  2.6min remaining:  3.4min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  2.7min remaining:  2.4min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  3.6min remaining:  2.0min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  4.1min remaining:  1.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  4.2min remaining:   42.2s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  4.3min finished


In [56]:
v3a = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v3a, '3a', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.6min remaining: 13.5min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  2.8min remaining: 10.3min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  3.1min remaining:  6.6min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  3.2min remaining:  4.3min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  3.3min remaining:  2.9min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  4.5min remaining:  2.5min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  5.0min remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  5.3min remaining:   53.2s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  5.4min finished


In [57]:
v3b = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v3b, '3b', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.7min remaining: 14.0min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  3.0min remaining: 10.9min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  3.3min remaining:  7.0min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  3.5min remaining:  4.7min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  3.5min remaining:  3.1min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  4.8min remaining:  2.6min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  5.2min remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  5.5min remaining:   55.4s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  5.7min finished


### Consolidate results for each pair

In [59]:
def consolidate(filename):
    results = []
    variants = ['1', '2', '3a', '3b']
    for v in variants:
        fname = f'logs/{filename}{v}.txt'
        with open(fname) as f:
            results.append(f.read())

    summ = []        
    for (country, adr) in list_pairs:
        summ.append(f'Country: {country}, ADR_Stock: {adr}\n')
        for v in range(4):
            ind = results[v].find(f'Country: {country}, ADR_Stock: {adr}')
            end = results[v][ind:].find('\n')
            res = results[v][ind + len(f'Country: {country}, ADR_Stock: {adr}, '):ind + end]
            summ.append(f'Variant {variants[v]}: ' + res + '\n')

    fname = f'logs/{filename}_all.txt'
    f = open(fname, 'w')
    f.writelines(summ)
    f.close()

### Sort results for each pair based on return

In [60]:
variants = ['1', '2', '3a', '3b']

def sort_res(string):
    ind = string.find('Return: ')
    end = string[ind:].find('%')
    return float(string[ind + 8:ind + end])

def sort_results(filename):
    for (country, adr) in list_pairs:
        for v in variants:
            fname = f'eric_jh_data/{country}/{adr}/logs/{filename}{v}.txt' 
            f = open(fname, 'r')
            res = f.readlines()
            f.close()
            sorted_res = sorted(res[2:], key = sort_res, reverse = True)
            res = res[:2] + sorted_res
            f = open(fname, 'w')
            f.writelines(res)
            f.close()

In [22]:
consolidate('results_sfx')
sort_results('hp_log_sfx')

In [61]:
consolidate('results1_sfx')
sort_results('hp1_log_sfx')

## Finer HP Tuning

In [27]:
from operator import add

"""
HP tune fine grid around coarse values
"""

def hp_tune_fine(pairs_trade_strategy, version, country, adr):
    fname = f'eric_jh_data/{country}/{adr}/logs/hp_log_sfx{version}.txt' 
    f = open(fname, 'r')
    res = f.readlines()
    f.close()
    ind = res[1].find(', Return: ')
    hps = res[1][:ind]
    coarse_hps = tuple(map(float, hps[11:-1].split(', ')))
    fine_window = tuple( map(add, [int(coarse_hps[0])]*5, [-10, 5, 0, 5, 10]) )
    fine_entry = tuple( map(add, [coarse_hps[1]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    fine_exit =  tuple( map(add, [coarse_hps[2]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    fine_stop_loss =  tuple( map(add, [coarse_hps[3]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    
    hp_tune(pairs_trade_strategy, version, country, adr, fine_window, fine_entry, fine_exit, fine_stop_loss, fine = True)

In [28]:
v1 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v1, '1', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 10.3min remaining: 86.2min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 14.5min remaining: 53.0min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 17.3min remaining: 36.5min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 19.0min remaining: 25.4min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 20.9min remaining: 18.1min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 24.8min remaining: 13.8min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 26.2min remaining:  8.7min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 27.1min remaining:  4.5min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 29.2min finished


In [29]:
v2 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v2, '2', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 11.1min remaining: 92.9min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 15.3min remaining: 56.0min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 21.3min remaining: 44.9min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 23.0min remaining: 30.7min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 24.6min remaining: 21.3min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 27.2min remaining: 15.1min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 31.3min remaining: 10.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 33.5min remaining:  5.6min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 36.8min finished


In [30]:
v3a = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v3a, '3a', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 12.6min remaining: 105.1min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 21.0min remaining: 76.9min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 25.5min remaining: 53.9min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 27.0min remaining: 36.0min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 32.5min remaining: 28.2min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 34.7min remaining: 19.3min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 39.2min remaining: 13.1min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 41.3min remaining:  6.9min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 46.1min finished


In [31]:
v3b = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v3b, '3b', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 15.0min remaining: 124.8min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 20.9min remaining: 76.8min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 26.1min remaining: 55.1min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 33.6min remaining: 44.8min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 36.7min remaining: 31.8min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 38.9min remaining: 21.6min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 43.4min remaining: 14.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 45.0min remaining:  7.5min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 51.2min finished


In [32]:
consolidate('results_fine_sfx')
sort_results('hp_log_fine_sfx')

## Generate IS trade logs

In [29]:
pairs_trade_strategy = [pairs_trade_v1, pairs_trade_v2, pairs_trade_v3a, pairs_trade_v3b]

for (country, adr) in list_pairs:
    merged_df = data_processing(country, adr, fx_dict)
    for i in range(4):
        v = variants[i]
        strat = pairs_trade_strategy[i]
        fname = f'eric_jh_data/{country}/{adr}/logs/hp_log_fine_sfx{v}.txt' 
        f = open(fname, 'r')
        res = f.readlines()
        f.close()
        ind = res[1].find(', Return: ')
        hps = res[1][:ind]
        best_hps = tuple(map(float, hps[11:-1].split(', ')))
        if i < 2:
            ret1, tr1, pv1, hits1, dates1 = strat(merged_df, 
                                                 cash = 250000,
                                                 lookback = int(best_hps[0]), 
                                                 entry = best_hps[1], 
                                                 exit = best_hps[2],
                                                 stop_loss = best_hps[3],
                                                 end_date = oos_start_date)
            ret2, tr2, pv2, hits2, dates2 = strat(merged_df, 
                                                 cash = 250000 if len(pv1) == 0 else pv1[-1],
                                                 lookback = int(best_hps[0]), 
                                                 entry = best_hps[1], 
                                                 exit = best_hps[2],
                                                 stop_loss = best_hps[3],
                                                 start_date = oos_end_date)
            ret = (pv2[-1] - 250000) / 250000
            tr = tr1 + tr2
            pv = pv1 + pv2
            hits = hits1 + hits2
            dates = dates1 + dates2
            
        else:
            ret1, tr1, pv1, hits1, dates1 = strat(merged_df, 
                                                 cash = 250000,
                                                 lookback = int(best_hps[0]), 
                                                 entry_cond1_val = best_hps[1], 
                                                 entry_cond2_val = best_hps[1],
                                                 exit_cond1_val = best_hps[2],
                                                 exit_cond2_val = best_hps[2],
                                                 stop_loss_cond1 = best_hps[3],
                                                 stop_loss_cond2 = best_hps[3],
                                                 end_date = oos_start_date)
            ret2, tr2, pv2, hits2, dates2 = strat(merged_df, 
                                                 cash = 250000 if len(pv1) == 0 else pv1[-1],
                                                 lookback = int(best_hps[0]), 
                                                 entry_cond1_val = best_hps[1], 
                                                 entry_cond2_val = best_hps[1],
                                                 exit_cond1_val = best_hps[2],
                                                 exit_cond2_val = best_hps[2],
                                                 stop_loss_cond1 = best_hps[3],
                                                 stop_loss_cond2 = best_hps[3],
                                                 start_date = oos_end_date)
            ret = (pv2[-1] - 250000) / 250000
            tr = tr1 + tr2
            pv = pv1 + pv2
            hits = hits1 + hits2
            dates = dates1 + dates2
            
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(pv)*100,2)
            
        plot_returns(dates, pv, country, adr, save = True, filename = f'is_sfx_hp_pnl_plot_v{v}.png')
        
        result = f'Country: {country}\nADR: {adr}\nReturn: {ret}%\nHit Ratio: {hit_ratio}%\nMax Drawdown: {max_drawdown}%\n'
        result += f'\nTrades\n\n'
        
        fname = f'eric_jh_data/{country}/{adr}/logs/is_log_sfx{v}.txt' 
        f = open(fname, 'w')
        f.writelines(result)
        f.writelines(tr)
        f.writelines(f'\nHyperparameters\n\n{best_hps}')
        f.close()

## Testing Out-of-sample Returns

In [30]:
pairs_trade_strategy = [pairs_trade_v1, pairs_trade_v2, pairs_trade_v3a, pairs_trade_v3b]

for (country, adr) in list_pairs:
    merged_df = data_processing(country, adr, fx_dict)
    for i in range(4):
        v = variants[i]
        strat = pairs_trade_strategy[i]
        fname = f'eric_jh_data/{country}/{adr}/logs/hp_log_fine_sfx{v}.txt' 
        f = open(fname, 'r')
        res = f.readlines()
        f.close()
        ind = res[1].find(', Return: ')
        hps = res[1][:ind]
        best_hps = tuple(map(float, hps[11:-1].split(', ')))
        if i < 2:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry = best_hps[1], 
                             exit = best_hps[2],
                             stop_loss = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
        else:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry_cond1_val = best_hps[1], 
                             entry_cond2_val = best_hps[1],
                             exit_cond1_val = best_hps[2],
                             exit_cond2_val = best_hps[2],
                             stop_loss_cond1 = best_hps[3],
                             stop_loss_cond2 = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
            
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(pv)*100,2)
            
        plot_returns(dates, pv, country, adr, save = True, filename = f'oos_sfx_hp_pnl_plot_v{v}.png')
        
        result = f'Country: {country}\nADR: {adr}\nReturn: {ret}%\nHit Ratio: {hit_ratio}%\nMax Drawdown: {max_drawdown}%\n'
        result += f'\nTrades\n\n'
        
        fname = f'eric_jh_data/{country}/{adr}/logs/oos_log_sfx{v}.txt' 
        f = open(fname, 'w')
        f.writelines(result)
        f.writelines(tr)
        f.writelines(f'\nHyperparameters\n\n{best_hps}')
        f.close()
        
        fname = 'logs/oos_sfx_cum_results.txt'
        f = open(fname, 'a')
        f.writelines(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%\n')
        f.close()
        print(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%')

Australia ATHE_ATH Variant1, Return: 0.08%, Hit Ratio: 78.95%, Max Drawdown: 0.01%
Australia ATHE_ATH Variant2, Return: 0.05%, Hit Ratio: 84.62%, Max Drawdown: 0.02%
Australia ATHE_ATH Variant3a, Return: 0.04%, Hit Ratio: 82.35%, Max Drawdown: 0.03%
Australia ATHE_ATH Variant3b, Return: 0.02%, Hit Ratio: 76.92%, Max Drawdown: 0.03%
Australia GENE_GTG Variant1, Return: 0.17%, Hit Ratio: 86.67%, Max Drawdown: 0.02%
Australia GENE_GTG Variant2, Return: 0.11%, Hit Ratio: 81.25%, Max Drawdown: 0.02%
Australia GENE_GTG Variant3a, Return: 0.22%, Hit Ratio: 88.89%, Max Drawdown: 0.06%
Australia GENE_GTG Variant3b, Return: 0.15%, Hit Ratio: 76.92%, Max Drawdown: 0.04%
Australia IMMP_IMM Variant1, Return: 0.01%, Hit Ratio: 100.0%, Max Drawdown: 0.01%
Australia IMMP_IMM Variant2, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMMP_IMM Variant3a, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMMP_IMM Variant3b, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMRN_IMC Var

  
  
  
  


China BGNE_6160 Variant3b, Return: 0%, Hit Ratio: 0%, Max Drawdown: 0%
China CEA_670 Variant1, Return: 0.15%, Hit Ratio: 66.67%, Max Drawdown: 0.1%
China CEA_670 Variant2, Return: 0.14%, Hit Ratio: 65.52%, Max Drawdown: 0.04%
China CEA_670 Variant3a, Return: 0.02%, Hit Ratio: 57.14%, Max Drawdown: 0.1%
China CEA_670 Variant3b, Return: -0.12%, Hit Ratio: 57.14%, Max Drawdown: 0.19%
China HNP_902 Variant1, Return: 0.01%, Hit Ratio: 37.5%, Max Drawdown: 0.15%
China HNP_902 Variant2, Return: -0.0%, Hit Ratio: 50.0%, Max Drawdown: 0.09%
China HNP_902 Variant3a, Return: 0.01%, Hit Ratio: 42.11%, Max Drawdown: 0.14%
China HNP_902 Variant3b, Return: -0.06%, Hit Ratio: 43.75%, Max Drawdown: 0.13%
China LFC_2628 Variant1, Return: 0.01%, Hit Ratio: 50.0%, Max Drawdown: 0.13%
China LFC_2628 Variant2, Return: -0.31%, Hit Ratio: 41.18%, Max Drawdown: 0.5%
China LFC_2628 Variant3a, Return: -0.38%, Hit Ratio: 41.18%, Max Drawdown: 0.5%
China LFC_2628 Variant3b, Return: -0.38%, Hit Ratio: 27.27%, Max D

## Out-of-sample returns on coarse values

In [62]:
pairs_trade_strategy = [pairs_trade_v1, pairs_trade_v2, pairs_trade_v3a, pairs_trade_v3b]

for (country, adr) in list_pairs:
    merged_df = data_processing(country, adr, fx_dict)
    for i in range(4):
        v = variants[i]
        strat = pairs_trade_strategy[i]
        fname = f'eric_jh_data/{country}/{adr}/logs/hp1_log_sfx{v}.txt' 
        f = open(fname, 'r')
        res = f.readlines()
        f.close()
        ind = res[1].find(', Return: ')
        hps = res[1][:ind]
        best_hps = tuple(map(float, hps[11:-1].split(', ')))
        if i < 2:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry = best_hps[1], 
                             exit = best_hps[2],
                             stop_loss = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
        else:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry_cond1_val = best_hps[1], 
                             entry_cond2_val = best_hps[1],
                             exit_cond1_val = best_hps[2],
                             exit_cond2_val = best_hps[2],
                             stop_loss_cond1 = best_hps[3],
                             stop_loss_cond2 = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
            
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(pv)*100,2)
            
        plot_returns(dates, pv, country, adr, save = True, filename = f'oos_sfx_hp1_coarse_pnl_plot_v{v}.png')
        
        result = f'Country: {country}\nADR: {adr}\nReturn: {ret}%\nHit Ratio: {hit_ratio}%\nMax Drawdown: {max_drawdown}%\n'
        result += f'\nTrades\n\n'
        
        fname = f'eric_jh_data/{country}/{adr}/logs/oos_coarse1_log_sfx{v}.txt' 
        f = open(fname, 'w')
        f.writelines(result)
        f.writelines(tr)
        f.writelines(f'\nHyperparameters\n\n{best_hps}')
        f.close()
        
        fname = 'logs/oos_sfx_coarse1_cum_results.txt'
        f = open(fname, 'a')
        f.writelines(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%\n')
        f.close()
        print(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%')

Australia ATHE_ATH Variant1, Return: 0.06%, Hit Ratio: 81.82%, Max Drawdown: 0.02%
Australia ATHE_ATH Variant2, Return: 0.05%, Hit Ratio: 88.89%, Max Drawdown: 0.01%
Australia ATHE_ATH Variant3a, Return: 0.06%, Hit Ratio: 90.91%, Max Drawdown: 0.02%
Australia ATHE_ATH Variant3b, Return: 0.05%, Hit Ratio: 81.82%, Max Drawdown: 0.02%
Australia GENE_GTG Variant1, Return: 0.01%, Hit Ratio: 60.0%, Max Drawdown: 0.13%
Australia GENE_GTG Variant2, Return: 0.91%, Hit Ratio: 92.86%, Max Drawdown: 0.16%
Australia GENE_GTG Variant3a, Return: 0.15%, Hit Ratio: 71.43%, Max Drawdown: 0.03%
Australia GENE_GTG Variant3b, Return: 0.94%, Hit Ratio: 84.62%, Max Drawdown: 0.63%
Australia IMMP_IMM Variant1, Return: 0.04%, Hit Ratio: 60.0%, Max Drawdown: 0.03%
Australia IMMP_IMM Variant2, Return: 0.07%, Hit Ratio: 88.89%, Max Drawdown: 0.06%
Australia IMMP_IMM Variant3a, Return: 0.13%, Hit Ratio: 85.71%, Max Drawdown: 0.02%
Australia IMMP_IMM Variant3b, Return: 0.11%, Hit Ratio: 76.47%, Max Drawdown: 0.03%


Japan SONY_6758 Variant3a, Return: -0.57%, Hit Ratio: 25.0%, Max Drawdown: 1.28%
Japan SONY_6758 Variant3b, Return: -2.26%, Hit Ratio: 0.0%, Max Drawdown: 2.27%
Japan TAK_4502 Variant1, Return: 0.08%, Hit Ratio: 50.0%, Max Drawdown: 1.3%
Japan TAK_4502 Variant2, Return: 3.39%, Hit Ratio: 60.0%, Max Drawdown: 1.92%
Japan TAK_4502 Variant3a, Return: 2.03%, Hit Ratio: 63.64%, Max Drawdown: 1.35%
Japan TAK_4502 Variant3b, Return: 3.71%, Hit Ratio: 58.06%, Max Drawdown: 0.73%
Japan TM_7203 Variant1, Return: -0.27%, Hit Ratio: 100.0%, Max Drawdown: 0.42%
Japan TM_7203 Variant2, Return: 0.12%, Hit Ratio: 75.0%, Max Drawdown: 0.1%
Japan TM_7203 Variant3a, Return: -0.22%, Hit Ratio: 66.67%, Max Drawdown: 0.46%
Japan TM_7203 Variant3b, Return: 0.03%, Hit Ratio: 52.63%, Max Drawdown: 0.38%


### Store best variant + HPs to csv

In [72]:
fname = 'logs/results1_sfx_all.txt'
f = open(fname, 'r')
res = f.readlines()
f.close()

fname = 'logs/oos_sfx_coarse1_cum_results.txt'
f = open(fname, 'r')
oos_res = f.readlines()
f.close()

column_names = ['country', 'adr_stock', 'lookback', 'entry', 'exit', 'stoploss', 'variant', 'is_ret', 
                'is_hr', 'is_max_dd', 'oos_ret', 'oos_hr', 'oos_max_dd']
summ_df = pd.DataFrame(columns = column_names)

for (country, adr) in list_pairs:
    best_ret = -1
    variant = ""
    lookback = 100
    entry = 1
    exit = 0
    stoploss = 3
    hr = 0
    max_dd = 0
    ind = res.index(f'Country: {country}, ADR_Stock: {adr}\n')
    for v in range(1,5):
        ret_ind = res[ind + v].find('Return: ')
        ret = float(res[ind + v][ret_ind + 8 : -2])
        if ret > best_ret:
            variant = variants[v - 1]
            hp_ind_start = res[ind + v].find('HPs: ')
            hp_ind_end = res[ind + v].find(', Return: ')
            
            hps = res[ind + v][hp_ind_start + 5:hp_ind_end]
            best_hps = hps[1:-1].split(', ')
            lookback = int(best_hps[0])
            entry = float(best_hps[1])
            exit = float(best_hps[2])
            stoploss = float(best_hps[3])
            
            best_ret = ret
            hr_start = res[ind + v].find('%, Hit')
            dd_start = res[ind + v].find('%, Max')
            hr = float(res[ind + v][hr_start + 13:dd_start])
            max_dd = float(res[ind + v][dd_start + 16, -2])
    for i in range(len(oos_res)):
        if f'{country} {adr} Variant{variant}' in oos_res[i]:
            oos_ret_ind = i
            break
    oos_ret_start = oos_res[oos_ret_ind].find('Return: ')
    oos_hr_start = oos_res[oos_ret_ind].find('%, Hit')
    oos_dd_start = oos_res[oos_ret_ind].find('%, Max')
    oos_ret = float(oos_res[oos_ret_ind][oos_ret_start + 8:oos_hr_start])
    oos_hr = float(oos_res[oos_ret_ind][oos_hr_start + 13:oos_dd_start])
    oos_dd = float(oos_res[oos_ret_ind][oos_dd_start + 16:-2])
    temp_df = pd.DataFrame([[country, adr, lookback, entry, exit, stoploss, variant, best_ret, hr, max_dd, oos_ret, oos_hr, oos_dd]], columns = column_names)
    summ_df = summ_df.append(temp_df, ignore_index = True)

In [73]:
summ_df

Unnamed: 0,country,adr_stock,lookback,entry,exit,stoploss,variant,is_ret,oos_ret,oos_hr,oos_max_dd
0,Australia,ATHE_ATH,100,1.0,-0.5,2.5,3a,0.51,0.06,90.91,0.02
1,Australia,GENE_GTG,100,1.5,-0.5,3.5,3a,5.94,0.15,71.43,0.03
2,Australia,IMMP_IMM,60,1.0,-0.5,3.0,1,2.67,0.04,60.0,0.03
3,Australia,IMRN_IMC,60,1.0,0.0,3.0,3b,1.18,0.07,84.21,0.02
4,Australia,JHX_JHX,100,1.0,0.0,3.5,3a,0.22,0.01,44.0,0.01
5,Australia,KZIA_KZA,100,1.0,-0.5,3.0,3b,1.81,-0.03,60.0,0.04
6,Australia,MESO_MSB,30,1.0,-0.5,3.5,3b,4.39,0.24,88.0,0.03
7,Australia,PLL_PLL,60,2.0,0.5,3.0,3a,25.13,0.02,71.43,0.0
8,Australia,WBK_WBC,100,1.5,0.5,3.5,3b,2.14,0.17,80.0,0.29
9,China,ACH_2600,30,1.5,-0.5,2.5,3a,0.42,-0.0,53.33,0.02


In [74]:
summ_df.to_csv('hps1_sfx.csv', index = False)