In [2]:
#from ib_insync import *
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels import regression,stats
import math
import datetime 
import statsmodels.formula.api as smf 
from datetime import date, time, datetime, timedelta
from collections import deque
from os import listdir
from os.path import isfile, join
from collections import defaultdict
from operator import itemgetter 
import itertools
from joblib import Parallel, delayed
from tqdm import tqdm

In [3]:
def get_ratio(country, adr, fx_dict):
    adr_path = f'eric_jh_data/{country}/{adr}/adr.csv'
    stock_path =  f'eric_jh_data/{country}/{adr}/underlying.csv'
    fx_path = fx_dict[country][0]
    fx_type =  fx_dict[country][1]

    adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open'})
    stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open'})
    fx_df = pd.read_csv(fx_path, index_col = 0)

    merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close']], stock_df.loc[:,['date', 'stock_open','stock_close']])
    merged_df = pd.merge(merged_df, fx_df)

    if fx_type == 1:
        merged_df['stock_open_usd'] = merged_df['stock_open']/((merged_df['avg_bid_non_us_at'] + merged_df['avg_ask_non_us_at'])/2)
    else:
        merged_df['stock_open_usd'] = merged_df['stock_open']*((merged_df['avg_bid_non_us_at'] + merged_df['avg_ask_non_us_at'])/2)
    merged_df["ratio"] = merged_df['stock_open_usd']/merged_df['adr_close']
    
    ratio_geq_1 = True
    if np.mean(merged_df["ratio"] < 1):
        merged_df["ratio"] = 1/merged_df["ratio"]
        ratio_geq_1 = False
    
    return ratio_geq_1, np.round(np.mean(merged_df["ratio"]), 4)

In [4]:
mypath = 'eric_jh_data/'
countries = sorted(['Australia', 'Japan', 'China'])
fx_dict = {'Australia':('eric_jh_data/Forex/AUD_USD_new.csv',0),
           'Japan':('eric_jh_data/Forex/USD_JPY_new.csv',1),
           'China':('eric_jh_data/Forex/USD_HKD_new.csv',1)}

list_pairs = []
for country in countries:
    countrypath = mypath + country
    adr_names =  [f for f in listdir(countrypath) if not isfile(join(countrypath, f))] #grab all adr names of the country
    for adr in sorted(adr_names):
        list_pairs.append((country, adr))

In [5]:
# Store ratios
for (country, adr) in list_pairs:
    ratio_geq_1, ratio = get_ratio(country, adr, fx_dict)
    if adr == "ACH_2600" or adr == "BGNE_6160":
        rounded_ratio = float('%.2g' % ratio)
    else:
        rounded_ratio = float('%.1g' % ratio)
    print("Country: {}, ADR_Stock: {}, Estimated Ratio (4 d.p.): {}, Implied Ratio (2 s.f.): {}".format(country, adr, ratio, rounded_ratio))
    ratio_df = pd.DataFrame({"ratio_geq_1" : [ratio_geq_1], "ratio" : [rounded_ratio]})
    ratio_df.to_csv(f'eric_jh_data/{country}/{adr}/ratio.csv')
        
# This shows the empircally estimated ratio, and the implied ratio we shall assume.
# These values corroborate with the select few we checked online, like GENE_GTG and BGNE_6160.

Country: Australia, ADR_Stock: ATHE_ATH, Estimated Ratio (4 d.p.): 59.4889, Implied Ratio (2 s.f.): 60.0
Country: Australia, ADR_Stock: GENE_GTG, Estimated Ratio (4 d.p.): 595.5978, Implied Ratio (2 s.f.): 600.0
Country: Australia, ADR_Stock: IMMP_IMM, Estimated Ratio (4 d.p.): 9.9082, Implied Ratio (2 s.f.): 10.0
Country: Australia, ADR_Stock: IMRN_IMC, Estimated Ratio (4 d.p.): 39.4289, Implied Ratio (2 s.f.): 40.0
Country: Australia, ADR_Stock: JHX_JHX, Estimated Ratio (4 d.p.): 1.0043, Implied Ratio (2 s.f.): 1.0
Country: Australia, ADR_Stock: KZIA_KZA, Estimated Ratio (4 d.p.): 10.0346, Implied Ratio (2 s.f.): 10.0
Country: Australia, ADR_Stock: MESO_MSB, Estimated Ratio (4 d.p.): 5.0155, Implied Ratio (2 s.f.): 5.0
Country: Australia, ADR_Stock: PLL_PLL, Estimated Ratio (4 d.p.): 101.5788, Implied Ratio (2 s.f.): 100.0
Country: Australia, ADR_Stock: WBK_WBC, Estimated Ratio (4 d.p.): 0.9997, Implied Ratio (2 s.f.): 1.0
Country: China, ADR_Stock: ACH_2600, Estimated Ratio (4 d.p.)

In [6]:
def data_processing(country, adr, fx_dict, forex_bps = 10, adjust_forex_expense = True):
    adr_path = f'eric_jh_data/{country}/{adr}/adr.csv'
    stock_path =  f'eric_jh_data/{country}/{adr}/underlying.csv'
    ratio_path = f'eric_jh_data/{country}/{adr}/ratio.csv'
    fx_path = fx_dict[country][0]
    fx_type =  fx_dict[country][1]

    adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open', 'volume' : 'adr_volume'})
    stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open', 'volume' : 'stock_volume'})
    fx_df = pd.read_csv(fx_path, index_col = 0)
    ratio_df = pd.read_csv(ratio_path, index_col = 0)

    # Invert fx data so that all prices are reflected in USD
    if fx_type == 0:
        inverted_fx_df = 1/fx_df.iloc[:,[2,1,3,5,4,6,8,7,9,11,10,12]].copy()
        inverted_fx_df.columns = fx_df.columns[1:-1]
        fx_df.iloc[:,1:-1] = inverted_fx_df
    merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close', 'adr_volume']], stock_df.loc[:,['date', 'stock_open','stock_close', 'stock_volume']])
    merged_df = pd.merge(merged_df, fx_df)
    ratio_geq_1, ratio = ratio_df["ratio_geq_1"].item(), ratio_df["ratio"].item()

#     ratio is (stock price in USD)/(ADR price)
#     If ratio >= 1, we shall buy 1 stock, and sell multiple adrs
#     If ratio < 1, we shall sell 1 adr, and buy multiple stocks
    if ratio_geq_1:
        merged_df["stock_num_per_unit"] = 1
        merged_df["adr_num_per_unit"] = ratio
        merged_df["stock_open_per_unit"] = merged_df["stock_open"]
        merged_df["stock_close_per_unit"] = merged_df["stock_close"]
        merged_df["adr_open_per_unit"] = merged_df["adr_open"]*ratio
        merged_df["adr_close_per_unit"] = merged_df["adr_close"]*ratio
    else:
        merged_df["stock_num_per_unit"] = ratio
        merged_df["adr_num_per_unit"] = 1
        merged_df["stock_open_per_unit"] = merged_df["stock_open"]*ratio
        merged_df["stock_close_per_unit"] = merged_df["stock_close"]*ratio
        merged_df["adr_open_per_unit"] = merged_df["adr_open"]
        merged_df["adr_close_per_unit"] = merged_df["adr_close"]    
    
    if adjust_forex_expense:
        # Added expense for trading small amounts in forex market
        forex_bid_multiplier = 1 - 0.0001*forex_bps
        forex_ask_multiplier = 1 + 0.0001*forex_bps
        merged_df.loc[:,merged_df.columns.str.contains("bid")] *= forex_bid_multiplier
        merged_df.loc[:,merged_df.columns.str.contains("ask")] *= forex_ask_multiplier
        
    return merged_df

In [7]:
# stock_num_per_unit is how many stocks we would buy for 1 "unit" of trade
# avg_bid_non_us_before is how much foreign currency we can buy with 1 USD, 1 minute before the Asian market opens
# avg_bid_non_us_at is how much foreign currency we can buy with 1 USD, when the Asian market opens
# avg_bid_us_before is how much foreign currency we can buy with 1 USD, 1 minute before the US market opens
# avg_bid_us_at is how much foreign currency we can buy with 1 USD, when the US market opens
# All dates are in local time: so in sequential order (for each row), it will go stock_open, stock_close, adr_open, adr_close
merged_df = data_processing(*list_pairs[1], fx_dict)
merged_df.head()

Unnamed: 0,date,adr_open,adr_close,adr_volume,stock_open,stock_close,stock_volume,avg_bid_non_us_before,avg_ask_non_us_before,avg_non_us_before,...,avg_bid_us_at,avg_ask_us_at,avg_us_at,ir,stock_num_per_unit,adr_num_per_unit,stock_open_per_unit,stock_close_per_unit,adr_open_per_unit,adr_close_per_unit
0,2015-04-10,18.08,17.36,1873,0.04,0.039,987003,1.296948,1.299612,1.29828,...,1.302409,1.305068,1.303738,,600.0,1,24.0,23.4,18.08,17.36
1,2015-04-13,17.76,16.16,2762,0.039,0.039,1586945,1.30331,1.305987,1.304648,...,1.31553,1.318215,1.316872,,600.0,1,23.4,23.4,17.76,16.16
2,2015-04-14,16.96,17.08,2545,0.036,0.038,2905099,1.3169,1.319606,1.318253,...,1.30982,1.312529,1.311174,,600.0,1,21.6,22.8,16.96,17.08
3,2015-04-15,17.68,17.52,2106,0.039,0.038,2069419,1.312108,1.314822,1.313465,...,1.310301,1.312994,1.311647,,600.0,1,23.4,22.8,17.68,17.52
4,2015-04-16,17.4,17.0,1068,0.038,0.036,2378678,1.301459,1.304133,1.302796,...,1.282035,1.284634,1.283335,,600.0,1,22.8,21.6,17.4,17.0


In [8]:
def calc_max_drawdown(portfolio_values, method = "percentage"):
    peak, trough = portfolio_values[0], portfolio_values[0]
    max_drawdown = 0
    for i in range(1, len(portfolio_values)):
        if portfolio_values[i] < trough:
            trough = portfolio_values[i]
            if method == "percentage":
                max_drawdown = max(max_drawdown, (peak - trough)/peak)
            else:
                max_drawdown = max(max_drawdown, peak - trough)
        elif portfolio_values[i] > peak:
            peak, trough = portfolio_values[i], portfolio_values[i]
    return max_drawdown

In [9]:
def get_risk_statistics(stock_values, adr_values, var_ci):
    port_stock = stock_values - adr_values
    port = pd.DataFrame(data = port_stock)
    port_diff = port - port.shift(1)
    pnl = pd.DataFrame(port_diff).dropna()
    sigma = pnl.std()[0]
    pnl['pct_rank'] = pnl.rank(pct=True)
    pnl.columns =['daily_pl', 'pct_rank']
    var = abs(pnl[pnl.pct_rank< 1-var_ci].daily_pl.max())
    max_drawdown_abs = calc_max_drawdown(port_stock, "absolute")
    return sigma, var, max_drawdown_abs

In [10]:
def plot_returns(date, cash, country, adr, num_xticks = 15, save = False, filename = "pnl_plot.png"):
    fig = plt.figure(figsize = (20, 8))
    ax = fig.add_subplot(111)
    ax.plot(date, cash)
    plt.xticks(np.arange(0, len(date), (len(date) - 1 )// num_xticks), rotation = 30, ha = 'right', fontsize = 14)
    plt.xlim(0, len(date))
    plt.yticks(fontsize = 14)
    plt.grid(True)
    plt.title(f'PnL Chart for {adr} pair from {country}', fontsize = 18)
    if save:
        fig.savefig(f'eric_jh_data/{country}/{adr}/{filename}')
    else:
        plt.show();
    plt.close(fig)

In [11]:
"""
Variant 1 - Begin each trade on Asian market open (Evaluate after US market closes)

To open a position, we check the CLOSE price of adr, compared it to CLOSE px of 
stock of the same row. We buy the stock on the next trading OPEN for Asian/US market

To close a position,  we check the CLOSE price of adr, compared it to CLOSE px of 
stock of the same row. We sell the stock on the next trading next OPEN for Asian/US market

For each row:
    stock_open, stock_close, adr_open, adr_close
    After these 4 events, assess condition (right before the Asian market opens ~ 6.59PM EST)
    Place trade on next row (First trade stock on Asian market open, then trade ADR on US market open)
    
start_date: First date (EST) we may place a trade
end_date: Last date (EST) we may place a trade
portfolio_values: Stores value of portfolio at each date from one day before the start_date, to the end_date (inclusive), when Asian market opens (EST ~ 7:00 PM)
"""
def pairs_trade_v1(merged_df, lookback = 100, cash = 250000, entry = 1, exit = 0, stop_loss = 3, 
                   start_date = "2016-01-01", end_date = "2021-01-31", slippage_bps = 10, 
                   borrowing_bps = 50, risk_lookback = 100, var_ci = 0.95, var_limit = 0.1, max_drawdown_limit = 0.2, 
                   sigma_limit = 0.05, maximum_holding_period = 30, volume_lookback = 5):
    
    # Accounts for slippage and transaction costs
    short_multiplier = 1 - 0.0001*slippage_bps
    long_multiplier = 1 + 0.0001*slippage_bps
    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    # For book-keeping, since we shall store the portfolio value of the day before
    prev_cash, prev_adr_pos = cash, adr_pos
    forex_cash = 0
    holding_period = None
    diff_record = deque(maxlen = lookback)
    trade_records = []
    portfolio_values = []
    dates = []
    hits = []
    
    # Make sure that merged_df before end date is not empty
    if merged_df[merged_df['date'] < end_date].empty:
        return 0, trade_records, portfolio_values, hits, dates

    for index, row in merged_df.iterrows():

        if index+1 < len(merged_df) and index > 0:
            
            # Add portfolio value for the day before
            prev_date = merged_df.loc[index - 1, "date"]
            if row["date"] >= start_date and prev_date <= end_date:
                dates.append(prev_date)
                prev_forex_value = forex_cash + stock_pos*row["stock_open"]
                if prev_forex_value > 0:
                    prev_forex_value /= row['avg_ask_non_us_at']
                else:
                    prev_forex_value /= row['avg_bid_non_us_at']
                
                portfolio_values.append(prev_cash + prev_adr_pos*merged_df.loc[index - 1, 'adr_close'] 
                                        + prev_forex_value)

            diff_record.append(row['adr_close_per_unit'] 
                                   - row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before'])

            # We place one trade the day itself (Asian), one trade the day after (US)
            if len(diff_record) < lookback or row["date"] < start_date or merged_df.loc[index+1, "date"] > end_date:
                continue
            
            # Update cash/adr position after portfolio values has been updated
            if stock_pos > 0:
                holding_period += 1
                cash -= 0.0001*borrowing_bps*(1/252)*abs(adr_pos)*merged_df.loc[index - 1, 'adr_close']
                multiplier = (1 + 0.01*(2 + row["ir"])*(1/252))
                forex_cash *= multiplier
            prev_cash, prev_adr_pos = cash, adr_pos

            mean = np.array(diff_record).mean()
            std = np.array(diff_record).std()
            
            # If we have passed the initial lookback window and are in the specified dates
            # enter the position if diff is significant
            if diff_record[-1] > mean + entry*std and diff_record[-1] <= mean + stop_loss*std:
                if stock_pos == 0 and adr_pos == 0:
                    portfolio_value_before_entering = cash
                    adr_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["adr_volume"].median()/row["adr_num_per_unit"])
                    stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                    units = int(min(cash/row['adr_close_per_unit'],
                                    cash/(row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before']), 
                                    adr_volume, 
                                    stock_volume))
                    adr_quantity = int(units*row["adr_num_per_unit"])
                    stock_quantity = int(units*row["stock_num_per_unit"])
                    
                    # Take portfolio value for each previous day (till today) right before the Asian market opens
                    temp_risk_lookback = min(risk_lookback, index)
                    current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                    next_day = merged_df.loc[(index - temp_risk_lookback + 2):(index + 1)].copy()
                    stock_values = (np.array((current["stock_close"])/np.array(next_day["avg_non_us_before"]))*stock_quantity) 
                    adr_values = np.array(current["adr_close"]*adr_quantity)
                    sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                    if (var > portfolio_value_before_entering*var_limit or 
                        max_drawdown_abs > max_drawdown_limit*starting_cash or 
                        sigma > portfolio_value_before_entering*sigma_limit):
                        frac = min((portfolio_value_before_entering*var_limit)/var, 
                                   (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                  (portfolio_value_before_entering*sigma_limit)/sigma)
                        units = int(frac*units)
                        if units == 0:
                            continue
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"])                        
                    
                    stock_pos += stock_quantity
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                    forex_cash -= stock_px_fx*stock_quantity
                    # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                    prev_cash, prev_adr_pos = cash, adr_pos
                    
                    adr_pos -= adr_quantity
                    adr_px = merged_df.loc[index+1,'adr_open']*short_multiplier
                    cash += adr_quantity*adr_px
                    
                    holding_period = 0
                    trade_records.append("Opening positions:\n")
                    # Times in EST
                    trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                    trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")

            # Liquidation condition
            elif (diff_record[-1] < mean + exit*std or 
                  diff_record[-1] > mean + stop_loss*std or 
                  holding_period == maximum_holding_period):
                if stock_pos > 0 and adr_pos < 0 : 
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                    forex_cash += stock_px_fx*stock_pos
                    if forex_cash > 0:
                        forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                    else:
                        forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                    cash += forex_cash
                    forex_cash = 0
                    
                    # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                    prev_cash, prev_adr_pos = cash, adr_pos
                    
                    adr_px = merged_df.loc[index+1,'adr_open']*long_multiplier
                    cash -= abs(adr_pos)*adr_px
                    trade_records.append("Closing positions:\n")
                    # Times in EST
                    trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                    trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")
                    stock_pos, adr_pos = 0, 0
                    holding_period = None
                    if cash > portfolio_value_before_entering:
                        hits.append(1)
                    else:
                        hits.append(0)

    ret = (portfolio_values[-1] - starting_cash)/starting_cash
    
    return ret, trade_records, portfolio_values, hits, dates

In [12]:
"""
Variant 2 - Begin each trade on US market open (Evaluate after Asian market closes)

To open a position, we check the CLOSE price of adr of the previous row, compared to CLOSE px of 
stock of the current row. We buy the stock on the next trading OPEN for Asian/US market

To close a position, we check the CLOSE price of adr of the previous row, compared to CLOSE px of 
stock of the current row. We sell the stock on the next trading next OPEN for Asian/US market

For each row:
    stock_open, stock_close, (assess), adr_open, adr_close
    After first 2 events events, assess condition (right before the US market opens ~ 9.29AM EST)
    Place trade on current and next row (First trade ADR on US market open, then trade stock on Asian market open)
    
start_date: First date (EST) we may place a trade
end_date: Last date (EST) we may place a trade
portfolio_values: Stores value of portfolio at each date from one day before the start_date, to the end_date, when the Asian market opens
"""
def pairs_trade_v2(merged_df, lookback = 100, cash = 250000, entry = 1, exit = 0, stop_loss = 3, 
                   start_date = "2016-01-01", end_date = "2021-01-31", slippage_bps = 10, 
                   borrowing_bps = 50, risk_lookback = 100, var_ci = 0.95, var_limit = 0.1, max_drawdown_limit = 0.2, 
                   sigma_limit = 0.05, maximum_holding_period = 30, volume_lookback = 5):
    
    # Accounts for slippage and transaction costs
    short_multiplier = 1 - 0.0001*slippage_bps
    long_multiplier = 1 + 0.0001*slippage_bps
    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    holding_period = None
    forex_cash = 0
    diff_record = deque(maxlen = lookback)
    trade_records = []
    portfolio_values = []
    dates = []
    hits = []
    
    # Make sure that merged_df before end date is not empty
    if merged_df[merged_df['date'] < end_date].empty:
        return 0, trade_records, portfolio_values, hits, dates

    for index, row in merged_df.iterrows():
        
        if index+1 < len(merged_df) and index > 0:
            
            # Add portfolio value for the day before
            prev_date = merged_df.loc[index - 1, "date"]
            if row["date"] >= start_date and prev_date <= end_date:
                dates.append(prev_date)
                prev_forex_value = forex_cash + stock_pos*row["stock_open"]
                if prev_forex_value > 0:
                    prev_forex_value /= row['avg_ask_non_us_at']
                else:
                    prev_forex_value /= row['avg_bid_non_us_at']
                
                portfolio_values.append(cash + adr_pos*merged_df.loc[index - 1, 'adr_close'] 
                                        + prev_forex_value)

            diff_record.append(merged_df.loc[index-1,'adr_close_per_unit'] 
                                   - row['stock_close_per_unit']/row['avg_us_before'])
            
            # We place both trades the day itself
            if len(diff_record) < lookback or row["date"] < start_date or row["date"] > end_date:
                continue

            if stock_pos > 0:
                holding_period += 1
                cash -= 0.0001*borrowing_bps*(1/252)*abs(adr_pos)*merged_df.loc[index - 1, 'adr_close']
                multiplier = (1 + 0.01*(2 + row["ir"])*(1/252))
                forex_cash *= multiplier
                
            mean = np.array(diff_record).mean()
            std = np.array(diff_record).std()
            
            # If we have passed the initial lookback window and are in the specified dates
            # enter the position if diff is significant
            if diff_record[-1] > mean + entry*std and diff_record[-1] <= mean + stop_loss*std:
                if stock_pos == 0 and adr_pos == 0:
                    portfolio_value_before_entering = cash
                    adr_volume = 0.2*(merged_df.loc[index-volume_lookback:index - 1,:]["adr_volume"].median()/row["adr_num_per_unit"])
                    stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                    units = int(min(cash/merged_df.loc[index-1,'adr_close_per_unit'],
                                    cash/(row['stock_close_per_unit']/row['avg_us_before']), 
                                    adr_volume, 
                                    stock_volume))
                    adr_quantity = int(units*row["adr_num_per_unit"])
                    stock_quantity = int(units*row["stock_num_per_unit"])
                    # Take portfolio value for each previous day (till today) right before the US market opens
                    temp_risk_lookback = min(risk_lookback, index)
                    current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                    stock_values = np.array((current["stock_close"]/current["avg_us_before"])*stock_quantity) 
                    adr_values = np.array(merged_df.loc[(index - temp_risk_lookback):(index-1)]["adr_close"]*adr_quantity)
                    sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                    if (var > portfolio_value_before_entering*var_limit or 
                        max_drawdown_abs > max_drawdown_limit*starting_cash or 
                        sigma > portfolio_value_before_entering*sigma_limit):
                        frac = min((portfolio_value_before_entering*var_limit)/var, 
                                   (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                  (portfolio_value_before_entering*sigma_limit)/sigma)
                        units = int(frac*units)
                        if units == 0:
                            continue
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"])  
                    
                    adr_pos -= adr_quantity
                    adr_px = row['adr_open']*short_multiplier
                    cash += adr_quantity*adr_px
                    
                    stock_pos += stock_quantity
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                    forex_cash -= stock_px_fx*stock_quantity
#                     stock_px = stock_px_fx/merged_df.loc[index+1,'avg_bid_non_us_at']
#                     cash -= stock_px*stock_quantity
                    
                    holding_period = 0
                    trade_records.append("Opening positions:\n")
                    # Times in EST
                    trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {row['date']}\n")
                    trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")

            # Liquidation condition
            elif (diff_record[-1] < mean + exit*std or 
                  diff_record[-1] > mean + stop_loss*std or 
                  holding_period == maximum_holding_period):
                if stock_pos > 0 and adr_pos < 0 : 
                    adr_px = row['adr_open']*long_multiplier
                    cash -= abs(adr_pos)*adr_px
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                    forex_cash += stock_px_fx*stock_pos
                    if forex_cash > 0:
                        forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                    else:
                        forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                    cash += forex_cash
                    forex_cash = 0
                    
                    trade_records.append("Closing positions:\n")
                    # Times in EST
                    trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {row['date']}\n")
                    trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                    stock_pos, adr_pos = 0, 0
                    holding_period = None
                    if cash > portfolio_value_before_entering:
                        hits.append(1)
                    else:
                        hits.append(0)

    ret = (portfolio_values[-1] - starting_cash)/starting_cash
    
    return ret, trade_records, portfolio_values, hits, dates

In [13]:
"""
Variant 3a 
- Begin each trade on either US market open or Asian market open
- Regressions are done for the similar "type" of trade
    i.e. if we are entering at a certain time, we do a regression based on the values obtained at the same time each day

For each row:
    stock_open, stock_close, (assess condition 1), adr_open, adr_close, (assess condition 2)
    If not condition 2 - No action taken: 
        After first 2 events, Assess condition 1 (right before the US market opens ~ 9.29AM EST)
        If condition 1:
            Place trade on current and next row (First trade ADR on US market open, then trade stock on Asian market open)
    If not condition 1 - No action taken:
        After next 2 events occur, assess condition 2
        If condition 2:
            Place trade on next row (First trade ADR on Asian market open, then trade stock on US market open)
    
start_date: First date (EST) we may place a trade
end_date: Last date (EST) we may place a trade
portfolio_values: Stores value of portfolio at each date from one day before the start_date, to the end_date, when the Asian market opens
"""
def pairs_trade_v3a(merged_df, lookback = 100, cash = 250000, entry_cond1_val = 1, entry_cond2_val = 1, 
                    exit_cond1_val = 0, exit_cond2_val = 0, stop_loss_cond1 = 3, stop_loss_cond2 = 3, 
                    start_date = "2016-01-01", end_date = "2021-01-31", slippage_bps = 10, 
                    borrowing_bps = 50, risk_lookback = 100, var_ci = 0.95, var_limit = 0.1, max_drawdown_limit = 0.2, 
                    sigma_limit = 0.05, maximum_holding_period = 30, volume_lookback = 5):

    # Accounts for slippage and transaction costs
    short_multiplier = 1 - 0.0001*slippage_bps
    long_multiplier = 1 + 0.0001*slippage_bps
    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    holding_period = None
    trade_type = None
    forex_cash = 0
    # For book-keeping, since we shall store the portfolio value of the day before
    prev_cash, prev_adr_pos = cash, adr_pos
    diff_record_cond1 = deque(maxlen = lookback)
    diff_record_cond2 = deque(maxlen = lookback)
    trade_records = []
    portfolio_values = []
    dates = []
    hits = []
    enter_cond1, exit_cond1, enter_cond2, exit_cond2 = False, False, False, False
    
    # Make sure that merged_df before end date is not empty
    if merged_df[merged_df['date'] < end_date].empty:
        return 0, trade_records, portfolio_values, hits, dates
    
    for index, row in merged_df.iterrows():
                    
        if index+1 < len(merged_df) and index > 0:
            
            # Add portfolio value for the day before
            prev_date = merged_df.loc[index - 1, "date"]
            if row["date"] >= start_date and prev_date <= end_date:
                dates.append(prev_date)
                prev_forex_value = forex_cash + stock_pos*row["stock_open"]
                if prev_forex_value > 0:
                    prev_forex_value /= row['avg_ask_non_us_at']
                else:
                    prev_forex_value /= row['avg_bid_non_us_at']
                portfolio_values.append(prev_cash + prev_adr_pos*merged_df.loc[index - 1, 'adr_close'] 
                                        + prev_forex_value)
            
            # Before US Market Opens
            diff_record_cond1.append(merged_df.loc[index-1,'adr_close_per_unit'] - row['stock_close_per_unit']/row['avg_us_before'])
            # Before Asian Market Opens
            diff_record_cond2.append(row['adr_close_per_unit'] 
                                   - row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before'])


            # row["date"] is between start_date (inclusive) and end_date (inclusive)
            if len(diff_record_cond1) < lookback or row["date"] < start_date or row["date"] > end_date:
                continue

            # Update cash/adr position after portfolio values has been updated
            if stock_pos > 0:
                holding_period += 1
                cash -= 0.0001*borrowing_bps*(1/252)*abs(adr_pos)*merged_df.loc[index - 1, 'adr_close']
                multiplier = (1 + 0.01*(2 + row["ir"])*(1/252))
                forex_cash *= multiplier
            prev_cash, prev_adr_pos = cash, adr_pos
                
            mean_cond1 = np.array(diff_record_cond1).mean()
            std_cond1 = np.array(diff_record_cond1).std()
            mean_cond2 = np.array(diff_record_cond2).mean()
            std_cond2 = np.array(diff_record_cond2).std()
            
            # If a concurrent trade is not already being placed
            if not (enter_cond2 or exit_cond2):
                enter_cond1 = (diff_record_cond1[-1] > mean_cond1 + entry_cond1_val*std_cond1 
                               and diff_record_cond1[-1] <= mean_cond1 + stop_loss_cond1*std_cond1
                               and stock_pos == 0 and adr_pos == 0)
                exit_cond1 = ((diff_record_cond1[-1] < mean_cond1 + exit_cond1_val*std_cond1 
                              or diff_record_cond1[-1] > mean_cond1 + stop_loss_cond1*std_cond1
                              or (holding_period == maximum_holding_period and trade_type == 1))
                              and stock_pos > 0 and adr_pos < 0)
                    
                if enter_cond1:
                    portfolio_value_before_entering = cash
                    adr_volume = 0.2*(merged_df.loc[index-volume_lookback:index - 1,:]["adr_volume"].median()/row["adr_num_per_unit"])
                    stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                    units = int(min(cash/row['adr_close_per_unit'],
                                    cash/(row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before']), 
                                    adr_volume, 
                                    stock_volume))
                    adr_quantity = int(units*row["adr_num_per_unit"])
                    stock_quantity = int(units*row["stock_num_per_unit"])
                    
                    # Take portfolio value for each previous day when the Asian market opens
                    temp_risk_lookback = min(risk_lookback, index)
                    current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                    stock_values = np.array((current["stock_close"]/current["avg_us_before"])*stock_quantity) 
                    adr_values = np.array(merged_df.loc[(index - temp_risk_lookback):(index-1)]["adr_close"]*adr_quantity)
                    sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                    if (var > portfolio_value_before_entering*var_limit or 
                        max_drawdown_abs > max_drawdown_limit*starting_cash or 
                        sigma > portfolio_value_before_entering*sigma_limit):
                        frac = min((portfolio_value_before_entering*var_limit)/var, 
                                   (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                  (portfolio_value_before_entering*sigma_limit)/sigma)
                        units = int(frac*units)
                        if units == 0:
                            enter_cond1 = False
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"])
                    if units != 0:
                                
                        adr_pos -= adr_quantity
                        adr_px = row['adr_open']*short_multiplier
                        cash += adr_quantity*adr_px

                        stock_pos += stock_quantity
                        stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                        forex_cash -= stock_px_fx*stock_quantity
                        prev_cash, prev_adr_pos = cash, adr_pos
                        holding_period = 0
                        trade_type = 1
                        
                        trade_records.append("Opening positions:\n")
                        # Times in EST
                        trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {row['date']}\n")
                        trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")

                elif exit_cond1:
                    
                    adr_px = row['adr_open']*long_multiplier
                    cash -= abs(adr_pos)*adr_px
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                    forex_cash += stock_px_fx*stock_pos
                    if forex_cash > 0:
                        forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                    else:
                        forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                    cash += forex_cash
                    forex_cash = 0
                    trade_records.append("Closing positions:\n")
                    # Times in EST
                    trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {row['date']}\n")
                    trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                    stock_pos, adr_pos = 0, 0
                    holding_period = None
                    trade_type = None
                    prev_cash, prev_adr_pos = cash, adr_pos
                    if cash > portfolio_value_before_entering:
                        hits.append(1)
                    else:
                        hits.append(0)
                    
            # If a concurrent trade is not already being placed
            # The 2nd trade of condition 2 falls on the next day
            if not (enter_cond1 or exit_cond1) and merged_df.loc[index+1, "date"] <= end_date:
                # Check and possibly trade condition 2
                enter_cond2 = (diff_record_cond2[-1] > mean_cond2 + entry_cond2_val*std_cond2 
                               and diff_record_cond2[-1] <= mean_cond2 + stop_loss_cond2*std_cond2
                               and stock_pos == 0 and adr_pos == 0)
                exit_cond2 = ((diff_record_cond2[-1] < mean_cond2 + exit_cond2_val*std_cond2 
                              or diff_record_cond2[-1] > mean_cond2 + stop_loss_cond2*std_cond2
                              or (holding_period == maximum_holding_period and trade_type == 2))
                              and stock_pos > 0 and adr_pos < 0)
                    
                if enter_cond2:
                    portfolio_value_before_entering = cash
                    adr_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["adr_volume"].median()/row["adr_num_per_unit"])
                    stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                    units = int(min(cash/merged_df.loc[index-1,'adr_close_per_unit'],
                                    cash/(row['stock_close_per_unit']/row['avg_us_before']), 
                                    adr_volume, 
                                    stock_volume))
                    adr_quantity = int(units*row["adr_num_per_unit"])
                    stock_quantity = int(units*row["stock_num_per_unit"])
                    
                    # Take portfolio value for each previous day when the Asian market opens
                    temp_risk_lookback = min(risk_lookback, index)
                    current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                    next_day = merged_df.loc[(index - temp_risk_lookback + 2):(index + 1)].copy()
                    stock_values = (np.array((current["stock_close"])/np.array(next_day["avg_non_us_before"]))*stock_quantity) 
                    adr_values = np.array(current["adr_close"]*adr_quantity)
                    sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                    if (var > portfolio_value_before_entering*var_limit or 
                        max_drawdown_abs > max_drawdown_limit*starting_cash or 
                        sigma > portfolio_value_before_entering*sigma_limit):
                        frac = min((portfolio_value_before_entering*var_limit)/var, 
                                   (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                  (portfolio_value_before_entering*sigma_limit)/sigma)
                        units = int(frac*units)
                        if units == 0:
                            enter_cond2 = False
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"]) 
                    if units != 0:
                        stock_pos += stock_quantity
                        stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                        forex_cash -= stock_px_fx*stock_quantity
                        # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                        prev_cash, prev_adr_pos = cash, adr_pos
                        
                        adr_pos -= adr_quantity
                        adr_px = merged_df.loc[index+1,'adr_open']*short_multiplier
                        cash += adr_quantity*adr_px
                        
                        holding_period = 0
                        trade_type = 2
                        trade_records.append("Opening positions:\n")
                        # Times in EST
                        trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                        trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")

                elif exit_cond2:
                    stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                    forex_cash += stock_px_fx*stock_pos
                    if forex_cash > 0:
                        forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                    else:
                        forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                    cash += forex_cash
                    forex_cash = 0
                    # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                    prev_cash, prev_adr_pos = cash, adr_pos
                    
                    adr_px = merged_df.loc[index+1,'adr_open']*long_multiplier
                    cash -= abs(adr_pos)*adr_px
                    trade_records.append("Closing positions:\n")
                    # Times in EST
                    trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                    trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")
                    stock_pos, adr_pos = 0, 0
                    holding_period = None
                    trade_type = None
                    if cash > portfolio_value_before_entering:
                        hits.append(1)
                    else:
                        hits.append(0)

    ret = (portfolio_values[-1] - starting_cash)/starting_cash

    return ret, trade_records, portfolio_values, hits, dates

In [14]:
"""
Variant 3b
- Begin each trade on either US market open or Asian market open
- Regressions are done for all data collected in lookback window

For each row:
    stock_open, stock_close, (assess condition 1), adr_open, adr_close, (assess condition 2)
    If not condition 2 - No action taken: 
        After first 2 events, Assess condition 1 (right before the US market opens ~ 9.29AM EST)
        If condition 1:
            Place trade on current and next row (First trade ADR on US market open, then trade stock on Asian market open)
    If not condition 1 - No action taken:
        After next 2 events occur, assess condition 2
        If condition 2:
            Place trade on next row (First trade ADR on Asian market open, then trade stock on US market open)
    
start_date: First date (EST) we may place a trade
end_date: Last date (EST) we may place a trade
portfolio_values: Stores value of portfolio at each date from one day before the start_date, to the end_date, when the Asian market opens
"""
def pairs_trade_v3b(merged_df, lookback = 100, cash = 250000, entry_cond1_val = 1, entry_cond2_val = 1, 
                    exit_cond1_val = 0, exit_cond2_val = 0, stop_loss_cond1 = 3, stop_loss_cond2 = 3, 
                    start_date = "2016-01-01", end_date = "2021-01-31", slippage_bps = 10, 
                    borrowing_bps = 50, risk_lookback = 100, var_ci = 0.95, var_limit = 0.1, max_drawdown_limit = 0.2,
                    sigma_limit = 0.05, maximum_holding_period = 30, volume_lookback = 5):
    
    # Accounts for slippage and transaction costs
    short_multiplier = 1 - 0.0001*slippage_bps
    long_multiplier = 1 + 0.0001*slippage_bps
    # We assume lookback is given in terms of days
    lookback *= 2
    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    holding_period = None
    trade_type = None
    forex_cash = 0
    # For book-keeping, since we shall store the portfolio value of the day before
    prev_cash, prev_adr_pos = cash, adr_pos
    diff_record = deque(maxlen = lookback)
    trade_records = []
    portfolio_values = []
    dates = []
    hits = []
    enter_cond1, exit_cond1, enter_cond2, exit_cond2 = False, False, False, False
    
    # Make sure that merged_df before end date is not empty
    if merged_df[merged_df['date'] < end_date].empty:
        return 0, trade_records, portfolio_values, hits, dates

    for index, row in merged_df.iterrows():

        if index+1 < len(merged_df) and index > 0:
            
            # Add portfolio value for the day before
            prev_date = merged_df.loc[index - 1, "date"]
            if row["date"] >= start_date and prev_date <= end_date:
                dates.append(prev_date)
                prev_forex_value = forex_cash + stock_pos*row["stock_open"]
                if prev_forex_value > 0:
                    prev_forex_value /= row['avg_ask_non_us_at']
                else:
                    prev_forex_value /= row['avg_bid_non_us_at']
                portfolio_values.append(prev_cash + prev_adr_pos*merged_df.loc[index - 1, 'adr_close'] 
                                        + prev_forex_value)
                
            # Before US Market Opens
            diff_record.append(merged_df.loc[index-1,'adr_close_per_unit'] - row['stock_close_per_unit']/row['avg_us_before'])
            if len(diff_record) == lookback and row["date"] >= start_date and row["date"] <= end_date:
                if stock_pos > 0:
                    holding_period += 1
                    cash -= 0.0001*borrowing_bps*(1/252)*abs(adr_pos)*merged_df.loc[index - 1, 'adr_close']
                    multiplier = (1 + 0.01*(2 + row["ir"])*(1/252))
                    forex_cash *= multiplier
                prev_cash, prev_adr_pos = cash, adr_pos
                    
                mean = np.array(diff_record).mean()
                std = np.array(diff_record).std()
                
                # If a concurrent trade is not already being placed
                if not (enter_cond2 or exit_cond2):
                    enter_cond1 = (diff_record[-1] > mean + entry_cond1_val*std
                                   and diff_record[-1] <= mean + stop_loss_cond1*std
                                   and stock_pos == 0 and adr_pos == 0)
                    exit_cond1 = ((diff_record[-1] < mean + exit_cond1_val*std
                                  or diff_record[-1] > mean + stop_loss_cond1*std
                                  or (holding_period == maximum_holding_period and trade_type == 1))
                                  and stock_pos > 0 and adr_pos < 0)
                    
                    if enter_cond1:
                        portfolio_value_before_entering = cash
                        adr_volume = 0.2*(merged_df.loc[index-volume_lookback:index - 1,:]["adr_volume"].median()/row["adr_num_per_unit"])
                        stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                        units = int(min(cash/row['adr_close_per_unit'],
                                        cash/(row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before']), 
                                        adr_volume, 
                                        stock_volume))
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"])
                        
                        temp_risk_lookback = min(risk_lookback, index)
                        current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                        stock_values = np.array((current["stock_close"]/current["avg_us_before"])*stock_quantity) 
                        adr_values = np.array(merged_df.loc[(index - temp_risk_lookback):(index-1)]["adr_close"]*adr_quantity)
                        sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                        if (var > portfolio_value_before_entering*var_limit or 
                            max_drawdown_abs > max_drawdown_limit*starting_cash or 
                            sigma > portfolio_value_before_entering*sigma_limit):
                            frac = min((portfolio_value_before_entering*var_limit)/var, 
                                       (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                      (portfolio_value_before_entering*sigma_limit)/sigma)
                            units = int(frac*units)
                            if units == 0:
                                enter_cond1 = False
                            adr_quantity = int(units*row["adr_num_per_unit"])
                            stock_quantity = int(units*row["stock_num_per_unit"]) 
                        if units != 0:
                            adr_pos -= adr_quantity
                            adr_px = row['adr_open']*short_multiplier
                            cash += adr_quantity*adr_px

                            stock_pos += stock_quantity
                            stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                            forex_cash -= stock_px_fx*stock_quantity
                            prev_cash, prev_adr_pos = cash, adr_pos
                            holding_period = 0
                            trade_type = 1
                            trade_records.append("Opening positions:\n")
                            # Times in EST
                            trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {row['date']}\n")
                            trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")

                    elif exit_cond1:

                        adr_px = row['adr_open']*long_multiplier
                        cash -= abs(adr_pos)*adr_px
                        stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                        forex_cash += stock_px_fx*stock_pos
                        if forex_cash > 0:
                            forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                        else:
                            forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                        cash += forex_cash
                        forex_cash = 0
                        trade_records.append("Closing positions:\n")
                        # Times in EST
                        trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {row['date']}\n")
                        trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                        stock_pos, adr_pos = 0, 0
                        holding_period = None
                        trade_type = None
                        prev_cash, prev_adr_pos = cash, adr_pos
                        if cash > portfolio_value_before_entering:
                            hits.append(1)
                        else:
                            hits.append(0)
                        
            # Before Asian Market Opens
            diff_record.append(row['adr_close_per_unit'] - row['stock_close_per_unit']/merged_df.loc[index+1,'avg_non_us_before'])
            # The 2nd trade of condition 2 falls on the next day
            if len(diff_record) == lookback and row["date"] >= start_date and merged_df.loc[index+1,"date"] <= end_date:
                mean = np.array(diff_record).mean()
                std = np.array(diff_record).std()
                # If a concurrent trade is not already being placed
                if not (enter_cond1 or exit_cond1):
                    enter_cond2 = (diff_record[-1] > mean + entry_cond2_val*std
                                   and diff_record[-1] <= mean + stop_loss_cond2*std
                                   and stock_pos == 0 and adr_pos == 0)
                    exit_cond2 = ((diff_record[-1] < mean + exit_cond2_val*std
                                  or diff_record[-1] > mean + stop_loss_cond2*std
                                  or (holding_period == maximum_holding_period and trade_type == 2))
                                  and stock_pos > 0 and adr_pos < 0)

                    if enter_cond2:
                        portfolio_value_before_entering = cash
                        adr_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["adr_volume"].median()/row["adr_num_per_unit"])
                        stock_volume = 0.2*(merged_df.loc[index-volume_lookback+1:index,:]["stock_volume"].median()/row["stock_num_per_unit"])
                        units = int(min(cash/merged_df.loc[index-1,'adr_close_per_unit'],
                                        cash/(row['stock_close_per_unit']/row['avg_us_before']), 
                                        adr_volume, 
                                        stock_volume))
                        adr_quantity = int(units*row["adr_num_per_unit"])
                        stock_quantity = int(units*row["stock_num_per_unit"])
                        temp_risk_lookback = min(risk_lookback, index)
                        current = merged_df.loc[(index - temp_risk_lookback + 1):index].copy()
                        next_day = merged_df.loc[(index - temp_risk_lookback + 2):(index + 1)].copy()
                        stock_values = (np.array((current["stock_close"])/np.array(next_day["avg_non_us_before"]))*stock_quantity) 
                        adr_values = np.array(current["adr_close"]*adr_quantity)
                        sigma, var, max_drawdown_abs = get_risk_statistics(stock_values, adr_values, var_ci)
                        if (var > portfolio_value_before_entering*var_limit or 
                            max_drawdown_abs > max_drawdown_limit*starting_cash or 
                            sigma > portfolio_value_before_entering*sigma_limit):
                            frac = min((portfolio_value_before_entering*var_limit)/var, 
                                       (max_drawdown_limit*starting_cash)/max_drawdown_abs,
                                      (portfolio_value_before_entering*sigma_limit)/sigma)
                            units = int(frac*units)
                            if units == 0:
                                enter_cond2 = False
                            adr_quantity = int(units*row["adr_num_per_unit"])
                            stock_quantity = int(units*row["stock_num_per_unit"])  
                        if units != 0:
                            stock_pos += stock_quantity
                            stock_px_fx = merged_df.loc[index+1,'stock_open']*long_multiplier
                            forex_cash -= stock_px_fx*stock_quantity
                            # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                            prev_cash, prev_adr_pos = cash, adr_pos

                            adr_pos -= adr_quantity
                            adr_px = merged_df.loc[index+1,'adr_open']*short_multiplier
                            cash += adr_quantity*adr_px
                            holding_period = 0
                            trade_type = 2
                            trade_records.append("Opening positions:\n")
                            # Times in EST
                            trade_records.append(f"We bought {stock_quantity} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                            trade_records.append(f"We sold {adr_quantity} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")

                    elif exit_cond2:
                        stock_px_fx = merged_df.loc[index+1,'stock_open']*short_multiplier
                        forex_cash += stock_px_fx*stock_pos
                        if forex_cash > 0:
                            forex_cash /= merged_df.loc[index+1,'avg_ask_non_us_at']
                        else:
                            forex_cash /= merged_df.loc[index+1,'avg_bid_non_us_at']
                        cash += forex_cash
                        forex_cash = 0
                        # We store the current cash/adr position, because the trade below will occur on the next day (EST)
                        prev_cash, prev_adr_pos = cash, adr_pos
                        
                        adr_px = merged_df.loc[index+1,'adr_open']*long_multiplier
                        cash -= abs(adr_pos)*adr_px
                        trade_records.append("Closing positions:\n")
                        # Times in EST
                        trade_records.append(f"We sold {stock_pos} shares of underlying stock at the price of {stock_px_fx} foreign dollars on {row['date']}\n")
                        trade_records.append(f"We bought {-adr_pos} shares of ADR at the price of {adr_px} on {merged_df.loc[index+1,'date']}\n")
                        stock_pos, adr_pos = 0, 0
                        holding_period = None
                        trade_type = None
                        if cash > portfolio_value_before_entering:
                            hits.append(1)
                        else:
                            hits.append(0)

    ret = (portfolio_values[-1] - starting_cash)/starting_cash

    return ret, trade_records, portfolio_values, hits, dates

# Hyperparameter Tuning

Hyperparameters:
1. Lookback window
2. Entry threshold
3. Exit threshold
4. Stop-loss threshold

Steps:
1. HP Tune each strategy for each of the pairs
2. Store results for each pair in hp_log_sfx{version}.txt
3. Store results for each strategy in results_sfx{version}.txt
4. Store best strategy for each pair in results_sfx_all.txt

In [15]:
window_grid = [30, 60, 100]
entry_grid = [1, 1.5, 2]
exit_grid = [-0.5, 0, 0.5]
stop_loss_grid = [2.5, 3, 3.5]
oos_start_date = "2017-04-11"
oos_end_date = "2018-04-11"

"""
HP Tune on coarse grid
"""

def hp_tune(pairs_trade_strategy, version, country, adr, window_grid = [30,60,100], 
            entry_grid = [1,1.5,2], exit_grid = [-0.5,0,0.5], stop_loss_grid = [2.5,3,3.5], fine = False):
    hps = list(itertools.product(*[window_grid, entry_grid, exit_grid, stop_loss_grid]))
    merged_df = data_processing(country, adr, fx_dict)
    hp_log = []
    max_ret = -10000
    max_ret_hps = ()
    max_ret_port = []
    max_ret_hr = 0
    max_ret_dd = 0
    max_ret_dates = []
    for hp in hps:
        if version == '1' or version == '2':
            ret1, _, portfolio_values1, hits1, dates1 = pairs_trade_strategy(merged_df, 
                                                                         lookback = hp[0], 
                                                                         entry = hp[1], 
                                                                         exit = hp[2], 
                                                                         stop_loss = hp[3], 
                                                                         end_date = oos_start_date)
            ret2, _, portfolio_values2, hits2, dates2 = pairs_trade_strategy(merged_df, 
                                                                             cash = 250000 if len(portfolio_values1) == 0 else portfolio_values1[-1],
                                                                             lookback = hp[0], 
                                                                             entry = hp[1], 
                                                                             exit = hp[2], 
                                                                             stop_loss = hp[3],
                                                                             start_date = oos_end_date)
            ret = (portfolio_values2[-1] - 250000) / 250000
            portfolio_values = portfolio_values1 + portfolio_values2
            hits = hits1 + hits2
            dates = dates1 + dates2
        else:
            ret1, _, portfolio_values1, hits1, dates1 = pairs_trade_strategy(merged_df, 
                                                                         lookback = hp[0], 
                                                                         entry_cond1_val = hp[1], 
                                                                         entry_cond2_val = hp[1],
                                                                         exit_cond1_val = hp[2],
                                                                         exit_cond2_val = hp[2],
                                                                         stop_loss_cond1 = hp[3],
                                                                         stop_loss_cond2 = hp[3],
                                                                         end_date = oos_start_date)
            ret2, _, portfolio_values2, hits2, dates2 = pairs_trade_strategy(merged_df, 
                                                                             cash = 250000 if len(portfolio_values1) == 0 else portfolio_values1[-1],
                                                                             lookback = hp[0], 
                                                                             entry_cond1_val = hp[1], 
                                                                             entry_cond2_val = hp[1],
                                                                             exit_cond1_val = hp[2],
                                                                             exit_cond2_val = hp[2],
                                                                             stop_loss_cond1 = hp[3],
                                                                             stop_loss_cond2 = hp[3],
                                                                             start_date = oos_end_date)
            ret = (portfolio_values2[-1] - 250000) / 250000
            portfolio_values = portfolio_values1 + portfolio_values2
            hits = hits1 + hits2
            dates = dates1 + dates2
        
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(portfolio_values)*100,2)
        if ret > max_ret:
            max_ret = ret
            max_ret_hps = hp
            max_ret_port = portfolio_values
            max_ret_dates = dates
            max_ret_hr = hit_ratio
            max_ret_dd = max_drawdown
        hp_log.append(f'{hp}: Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%\n')
    logs = [f'(Lookback, Entry, Exit, Stop-loss)\n',
            f'Best HPs: {max_ret_hps}, Return: {max_ret}%, Hit Ratio: {max_ret_hr}%, Max Drawdown: {max_ret_dd}%\n']
    logs = logs + hp_log 
    if fine:
        fname = f'eric_jh_data/{country}/{adr}/hp_log_fine_sfx{version}.txt' 
    else:
        fname = f'eric_jh_data/{country}/{adr}/hp_log_sfx{version}.txt' 
    f = open(fname, 'w')
    f.writelines(logs)
    f.close()
    if fine:
        plot_returns(max_ret_dates, max_ret_port, country, adr, save = True, filename = f'is_sfx_fine_hp_pnl_plot_v{version}.png')
    else:
        plot_returns(max_ret_dates, max_ret_port, country, adr, save = True, filename = f'is_sfx_hp_pnl_plot_v{version}.png')
    
    best_hps = f'Country: {country}, ADR_Stock: {adr}, HPs: {max_ret_hps}, Return: {max_ret}%\n'
    if fine:
        fname = f'results_fine_sfx{version}.txt'
    else:
        fname = f'results_sfx{version}.txt'
    f = open(fname, 'a')
    f.writelines(best_hps)
    f.close()

In [17]:
v1 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v1, '1', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.2min remaining: 10.1min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  2.1min remaining:  7.9min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  2.2min remaining:  4.7min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  2.4min remaining:  3.2min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  2.8min remaining:  2.4min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  3.6min remaining:  2.0min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  4.0min remaining:  1.3min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  4.1min remaining:   40.7s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  4.2min finished


In [18]:
v2 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v2, '2', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.3min remaining: 10.5min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  2.3min remaining:  8.6min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  2.4min remaining:  5.0min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  2.5min remaining:  3.3min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  2.6min remaining:  2.3min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  3.6min remaining:  2.0min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  4.1min remaining:  1.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  4.2min remaining:   41.4s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  4.3min finished


In [19]:
v3a = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v3a, '3a', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.8min remaining: 14.6min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  3.1min remaining: 11.6min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  3.3min remaining:  7.1min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  3.4min remaining:  4.5min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  3.6min remaining:  3.1min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  4.8min remaining:  2.7min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  5.4min remaining:  1.8min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  5.7min remaining:   56.7s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  5.7min finished


In [20]:
v3b = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune)(pairs_trade_v3b, '3b', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed:  1.6min remaining: 13.5min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed:  3.0min remaining: 11.1min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed:  3.4min remaining:  7.3min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed:  3.6min remaining:  4.8min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed:  3.6min remaining:  3.2min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed:  4.8min remaining:  2.7min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed:  5.4min remaining:  1.8min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed:  5.8min remaining:   57.8s
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed:  5.9min finished


### Consolidate results for each pair

In [21]:
def consolidate(filename):
    results = []
    variants = ['1', '2', '3a', '3b']
    for v in variants:
        fname = f'{filename}{v}.txt'
        with open(fname) as f:
            results.append(f.read())

    summ = []        
    for (country, adr) in list_pairs:
        summ.append(f'Country: {country}, ADR_Stock: {adr}\n')
        for v in range(4):
            ind = results[v].find(f'Country: {country}, ADR_Stock: {adr}')
            end = results[v][ind:].find('\n')
            res = results[v][ind + len(f'Country: {country}, ADR_Stock: {adr}, '):ind + end]
            summ.append(f'Variant {variants[v]}: ' + res + '\n')

    fname = f'{filename}_all.txt'
    f = open(fname, 'w')
    f.writelines(summ)
    f.close()

In [22]:
consolidate('results_sfx')

### Sort results for each pair based on return

In [17]:
variants = ['1', '2', '3a', '3b']

def sort_res(string):
    ind = string.find('Return: ')
    end = string[ind:].find('%')
    return float(string[ind + 8:ind + end])

def sort_results(filename):
    for (country, adr) in list_pairs:
        for v in variants:
            fname = f'eric_jh_data/{country}/{adr}/{filename}{v}.txt' 
            f = open(fname, 'r')
            res = f.readlines()
            f.close()
            sorted_res = sorted(res[2:], key = sort_res, reverse = True)
            res = res[:2] + sorted_res
            f = open(fname, 'w')
            f.writelines(res)
            f.close()

In [26]:
sort_results('hp_log_sfx')

## Finer HP Tuning

In [27]:
from operator import add

"""
HP tune fine grid around coarse values
"""

def hp_tune_fine(pairs_trade_strategy, version, country, adr):
    fname = f'eric_jh_data/{country}/{adr}/hp_log_sfx{version}.txt' 
    f = open(fname, 'r')
    res = f.readlines()
    f.close()
    ind = res[1].find(', Return: ')
    hps = res[1][:ind]
    coarse_hps = tuple(map(float, hps[11:-1].split(', ')))
    fine_window = tuple( map(add, [int(coarse_hps[0])]*5, [-10, 5, 0, 5, 10]) )
    fine_entry = tuple( map(add, [coarse_hps[1]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    fine_exit =  tuple( map(add, [coarse_hps[2]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    fine_stop_loss =  tuple( map(add, [coarse_hps[3]]*5, [-0.2, -0.1, 0, 0.1, 0.2]) )
    
    hp_tune(pairs_trade_strategy, version, country, adr, fine_window, fine_entry, fine_exit, fine_stop_loss, fine = True)

In [28]:
v1 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v1, '1', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 10.3min remaining: 86.2min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 14.5min remaining: 53.0min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 17.3min remaining: 36.5min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 19.0min remaining: 25.4min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 20.9min remaining: 18.1min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 24.8min remaining: 13.8min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 26.2min remaining:  8.7min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 27.1min remaining:  4.5min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 29.2min finished


In [29]:
v2 = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v2, '2', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 11.1min remaining: 92.9min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 15.3min remaining: 56.0min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 21.3min remaining: 44.9min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 23.0min remaining: 30.7min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 24.6min remaining: 21.3min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 27.2min remaining: 15.1min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 31.3min remaining: 10.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 33.5min remaining:  5.6min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 36.8min finished


In [30]:
v3a = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v3a, '3a', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 12.6min remaining: 105.1min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 21.0min remaining: 76.9min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 25.5min remaining: 53.9min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 27.0min remaining: 36.0min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 32.5min remaining: 28.2min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 34.7min remaining: 19.3min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 39.2min remaining: 13.1min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 41.3min remaining:  6.9min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 46.1min finished


In [31]:
v3b = Parallel(n_jobs=-1, verbose = 10)(delayed(hp_tune_fine)(pairs_trade_v3b, '3b', country, adr) for (country, adr) in list_pairs)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of  28 | elapsed: 15.0min remaining: 124.8min
[Parallel(n_jobs=-1)]: Done   6 out of  28 | elapsed: 20.9min remaining: 76.8min
[Parallel(n_jobs=-1)]: Done   9 out of  28 | elapsed: 26.1min remaining: 55.1min
[Parallel(n_jobs=-1)]: Done  12 out of  28 | elapsed: 33.6min remaining: 44.8min
[Parallel(n_jobs=-1)]: Done  15 out of  28 | elapsed: 36.7min remaining: 31.8min
[Parallel(n_jobs=-1)]: Done  18 out of  28 | elapsed: 38.9min remaining: 21.6min
[Parallel(n_jobs=-1)]: Done  21 out of  28 | elapsed: 43.4min remaining: 14.4min
[Parallel(n_jobs=-1)]: Done  24 out of  28 | elapsed: 45.0min remaining:  7.5min
[Parallel(n_jobs=-1)]: Done  28 out of  28 | elapsed: 51.2min finished


In [32]:
consolidate('results_fine_sfx')
sort_results('hp_log_fine_sfx')

## Generate IS trade logs

In [29]:
pairs_trade_strategy = [pairs_trade_v1, pairs_trade_v2, pairs_trade_v3a, pairs_trade_v3b]

for (country, adr) in list_pairs:
    merged_df = data_processing(country, adr, fx_dict)
    for i in range(4):
        v = variants[i]
        strat = pairs_trade_strategy[i]
        fname = f'eric_jh_data/{country}/{adr}/hp_log_fine_sfx{v}.txt' 
        f = open(fname, 'r')
        res = f.readlines()
        f.close()
        ind = res[1].find(', Return: ')
        hps = res[1][:ind]
        best_hps = tuple(map(float, hps[11:-1].split(', ')))
        if i < 2:
            ret1, tr1, pv1, hits1, dates1 = strat(merged_df, 
                                                 cash = 250000,
                                                 lookback = int(best_hps[0]), 
                                                 entry = best_hps[1], 
                                                 exit = best_hps[2],
                                                 stop_loss = best_hps[3],
                                                 end_date = oos_start_date)
            ret2, tr2, pv2, hits2, dates2 = strat(merged_df, 
                                                 cash = 250000 if len(pv1) == 0 else pv1[-1],
                                                 lookback = int(best_hps[0]), 
                                                 entry = best_hps[1], 
                                                 exit = best_hps[2],
                                                 stop_loss = best_hps[3],
                                                 start_date = oos_end_date)
            ret = (pv2[-1] - 250000) / 250000
            tr = tr1 + tr2
            pv = pv1 + pv2
            hits = hits1 + hits2
            dates = dates1 + dates2
            
        else:
            ret1, tr1, pv1, hits1, dates1 = strat(merged_df, 
                                                 cash = 250000,
                                                 lookback = int(best_hps[0]), 
                                                 entry_cond1_val = best_hps[1], 
                                                 entry_cond2_val = best_hps[1],
                                                 exit_cond1_val = best_hps[2],
                                                 exit_cond2_val = best_hps[2],
                                                 stop_loss_cond1 = best_hps[3],
                                                 stop_loss_cond2 = best_hps[3],
                                                 end_date = oos_start_date)
            ret2, tr2, pv2, hits2, dates2 = strat(merged_df, 
                                                 cash = 250000 if len(pv1) == 0 else pv1[-1],
                                                 lookback = int(best_hps[0]), 
                                                 entry_cond1_val = best_hps[1], 
                                                 entry_cond2_val = best_hps[1],
                                                 exit_cond1_val = best_hps[2],
                                                 exit_cond2_val = best_hps[2],
                                                 stop_loss_cond1 = best_hps[3],
                                                 stop_loss_cond2 = best_hps[3],
                                                 start_date = oos_end_date)
            ret = (pv2[-1] - 250000) / 250000
            tr = tr1 + tr2
            pv = pv1 + pv2
            hits = hits1 + hits2
            dates = dates1 + dates2
            
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(pv)*100,2)
            
        plot_returns(dates, pv, country, adr, save = True, filename = f'is_sfx_hp_pnl_plot_v{v}.png')
        
        result = f'Country: {country}\nADR: {adr}\nReturn: {ret}%\nHit Ratio: {hit_ratio}%\nMax Drawdown: {max_drawdown}%\n'
        result += f'\nTrades\n\n'
        
        fname = f'eric_jh_data/{country}/{adr}/is_log_sfx{v}.txt' 
        f = open(fname, 'w')
        f.writelines(result)
        f.writelines(tr)
        f.writelines(f'\nHyperparameters\n\n{best_hps}')
        f.close()

## Testing Out-of-sample Returns

In [30]:
pairs_trade_strategy = [pairs_trade_v1, pairs_trade_v2, pairs_trade_v3a, pairs_trade_v3b]

for (country, adr) in list_pairs:
    merged_df = data_processing(country, adr, fx_dict)
    for i in range(4):
        v = variants[i]
        strat = pairs_trade_strategy[i]
        fname = f'eric_jh_data/{country}/{adr}/hp_log_fine_sfx{v}.txt' 
        f = open(fname, 'r')
        res = f.readlines()
        f.close()
        ind = res[1].find(', Return: ')
        hps = res[1][:ind]
        best_hps = tuple(map(float, hps[11:-1].split(', ')))
        if i < 2:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry = best_hps[1], 
                             exit = best_hps[2],
                             stop_loss = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
        else:
            ret, tr, pv, hits, dates = strat(merged_df, 
                             cash = 250000,
                             lookback = int(best_hps[0]), 
                             entry_cond1_val = best_hps[1], 
                             entry_cond2_val = best_hps[1],
                             exit_cond1_val = best_hps[2],
                             exit_cond2_val = best_hps[2],
                             stop_loss_cond1 = best_hps[3],
                             stop_loss_cond2 = best_hps[3],
                             start_date = oos_start_date, 
                             end_date = oos_end_date)
            
        ret = np.round(ret*100, 2)
        hit_ratio = 0
        max_drawdown = 0
        if hits:
            hit_ratio = np.round(np.mean(hits)*100,2)
            max_drawdown = np.round(calc_max_drawdown(pv)*100,2)
            
        plot_returns(dates, pv, country, adr, save = True, filename = f'oos_sfx_hp_pnl_plot_v{v}.png')
        
        result = f'Country: {country}\nADR: {adr}\nReturn: {ret}%\nHit Ratio: {hit_ratio}%\nMax Drawdown: {max_drawdown}%\n'
        result += f'\nTrades\n\n'
        
        fname = f'eric_jh_data/{country}/{adr}/oos_log_sfx{v}.txt' 
        f = open(fname, 'w')
        f.writelines(result)
        f.writelines(tr)
        f.writelines(f'\nHyperparameters\n\n{best_hps}')
        f.close()
        
        fname = 'oos_sfx_cum_results.txt'
        f = open(fname, 'a')
        f.writelines(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%\n')
        f.close()
        print(f'{country} {adr} Variant{v}, Return: {ret}%, Hit Ratio: {hit_ratio}%, Max Drawdown: {max_drawdown}%')

Australia ATHE_ATH Variant1, Return: 0.08%, Hit Ratio: 78.95%, Max Drawdown: 0.01%
Australia ATHE_ATH Variant2, Return: 0.05%, Hit Ratio: 84.62%, Max Drawdown: 0.02%
Australia ATHE_ATH Variant3a, Return: 0.04%, Hit Ratio: 82.35%, Max Drawdown: 0.03%
Australia ATHE_ATH Variant3b, Return: 0.02%, Hit Ratio: 76.92%, Max Drawdown: 0.03%
Australia GENE_GTG Variant1, Return: 0.17%, Hit Ratio: 86.67%, Max Drawdown: 0.02%
Australia GENE_GTG Variant2, Return: 0.11%, Hit Ratio: 81.25%, Max Drawdown: 0.02%
Australia GENE_GTG Variant3a, Return: 0.22%, Hit Ratio: 88.89%, Max Drawdown: 0.06%
Australia GENE_GTG Variant3b, Return: 0.15%, Hit Ratio: 76.92%, Max Drawdown: 0.04%
Australia IMMP_IMM Variant1, Return: 0.01%, Hit Ratio: 100.0%, Max Drawdown: 0.01%
Australia IMMP_IMM Variant2, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMMP_IMM Variant3a, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMMP_IMM Variant3b, Return: 0.0%, Hit Ratio: 0%, Max Drawdown: 0%
Australia IMRN_IMC Var

  
  
  
  


China BGNE_6160 Variant3b, Return: 0%, Hit Ratio: 0%, Max Drawdown: 0%
China CEA_670 Variant1, Return: 0.15%, Hit Ratio: 66.67%, Max Drawdown: 0.1%
China CEA_670 Variant2, Return: 0.14%, Hit Ratio: 65.52%, Max Drawdown: 0.04%
China CEA_670 Variant3a, Return: 0.02%, Hit Ratio: 57.14%, Max Drawdown: 0.1%
China CEA_670 Variant3b, Return: -0.12%, Hit Ratio: 57.14%, Max Drawdown: 0.19%
China HNP_902 Variant1, Return: 0.01%, Hit Ratio: 37.5%, Max Drawdown: 0.15%
China HNP_902 Variant2, Return: -0.0%, Hit Ratio: 50.0%, Max Drawdown: 0.09%
China HNP_902 Variant3a, Return: 0.01%, Hit Ratio: 42.11%, Max Drawdown: 0.14%
China HNP_902 Variant3b, Return: -0.06%, Hit Ratio: 43.75%, Max Drawdown: 0.13%
China LFC_2628 Variant1, Return: 0.01%, Hit Ratio: 50.0%, Max Drawdown: 0.13%
China LFC_2628 Variant2, Return: -0.31%, Hit Ratio: 41.18%, Max Drawdown: 0.5%
China LFC_2628 Variant3a, Return: -0.38%, Hit Ratio: 41.18%, Max Drawdown: 0.5%
China LFC_2628 Variant3b, Return: -0.38%, Hit Ratio: 27.27%, Max D

## Store best variant + HPs to csv

In [45]:
fname = 'results_sfx_all.txt'
f = open(fname, 'r')
res = f.readlines()
f.close()

column_names = ['country', 'adr_stock', 'lookback', 'entry', 'exit', 'stoploss', 'variant']
summ_df = pd.DataFrame(columns = column_names)

for (country, adr) in list_pairs:
    best_ret = -1
    variant = ""
    lookback = 100
    entry = 1
    exit = 0
    stoploss = 3
    ind = res.index(f'Country: {country}, ADR_Stock: {adr}\n')
    for v in range(1,5):
        ret_ind = res[ind + v].find('Return: ')
        ret = float(res[ind + v][ret_ind + 8 : -2])
        if ret > best_ret:
            variant = variants[v - 1]
            hp_ind_start = res[ind + v].find('HPs: ')
            hp_ind_end = res[ind + v].find(', Return: ')
            hps = res[ind + v][hp_ind_start + 5:hp_ind_end]
            best_hps = hps[1:-1].split(', ')
            lookback = int(best_hps[0])
            entry = float(best_hps[1])
            exit = float(best_hps[2])
            stoploss = float(best_hps[3])
            best_ret = ret
    temp_df = pd.DataFrame([[country, adr, lookback, entry, exit, stoploss, variant]], columns = column_names)
    summ_df = summ_df.append(temp_df, ignore_index = True)

In [46]:
summ_df

Unnamed: 0,country,adr_stock,lookback,entry,exit,stoploss,variant
0,Australia,ATHE_ATH,100,1.0,-0.5,2.5,3a
1,Australia,GENE_GTG,30,1.0,-0.5,2.5,3a
2,Australia,IMMP_IMM,60,1.0,-0.5,3.0,1
3,Australia,IMRN_IMC,30,1.0,0.0,2.5,3b
4,Australia,JHX_JHX,100,1.0,0.5,3.5,3a
5,Australia,KZIA_KZA,100,1.0,-0.5,3.5,3b
6,Australia,MESO_MSB,30,1.0,-0.5,3.5,3b
7,Australia,PLL_PLL,60,2.0,0.5,3.0,3a
8,Australia,WBK_WBC,100,1.5,0.5,3.5,3b
9,China,ACH_2600,30,1.5,0.5,2.5,2


In [48]:
summ_df.to_csv('hps_sfx.csv', index = False)