In [1]:
#from ib_insync import *
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels import regression,stats
import math
import datetime 
import statsmodels.formula.api as smf 
from datetime import date, time, datetime, timedelta
from collections import deque

In [5]:
def data_processing(country, adr, fx_dict):
    adr_path = f'eric_jh_data/{country}/{adr}/adr.csv'
    stock_path =  f'eric_jh_data/{country}/{adr}/underlying.csv'
    fx_path = fx_dict[country][0]
    fx_type =  fx_dict[country][1]
    try:
        adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open'})
    except:
        print(f"no data for ADR data of {adr} from {country}")
        return None
    try:
        stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open'})
    except:
        print(f"no data for underlying data of {adr} from {country}")
        return None
    fx_df = pd.read_csv(fx_path, index_col = 0).rename(columns = {'close':'fx_close', 'open':'fx_open'})

    merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close']], stock_df.loc[:,['date', 'stock_open','stock_close']])
    merged_df = pd.merge(merged_df, fx_df.loc[:,['date', 'fx_open','fx_close']])

    if fx_type == 1:
        merged_df['stock_open_usd'] = merged_df['stock_open']/merged_df['fx_open']
        merged_df['stock_close_usd'] = merged_df['stock_close']/merged_df['fx_close']
    else:
        merged_df['stock_open_usd'] = merged_df['stock_open']*merged_df['fx_open']
        merged_df['stock_close_usd'] = merged_df['stock_close']*merged_df['fx_close']

    ratio =round(merged_df.loc[1,'stock_close_usd']/merged_df.loc[1,'adr_open'])
    if ratio >= 1:
        merged_df['adr_open'] = merged_df['adr_open']*ratio
        merged_df['adr_close'] = merged_df['adr_close']*ratio
    else:
        ratio = round(merged_df.loc[1,'adr_open']/merged_df.loc[1,'stock_close_usd'])
        merged_df['stock_open_usd'] = merged_df['stock_open_usd']*ratio
        merged_df['stock_close_usd'] = merged_df['stock_close_usd']*ratio
        
    return merged_df
    

In [14]:
"""
Naive strategy for testing on the SONY adr and stock pair
To open a position, we check the CLOSE price of adr, compared it to CLOSE px of 
stock. We buy the stock on the next trading next OPEN for Asian market

To close a position,  we check the CLOSE price of adr, compared it to CLOSE px of 
stock. We sell the stock on the next trading next OPEN for Asian market
"""
def pairs_trade(merged_df, lookback = 100, cash = 100000, entry = 1, exit = 0, stop_loss = 3):

    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    diff_record = deque(maxlen = lookback)
    trade_records = []


    for index, row in merged_df.iterrows():
        # check if there is a px diff between close and stock_close effective
        # If index < lookback, we do not place any trade
        if index < lookback:
            diff_record.append(row['adr_close'] - row['stock_close_usd'])
            pass

        mean = np.array(diff_record).mean()
        std  = np.array(diff_record).std()
        
        # If we have passed the initial lookback window
        # enter the position if diff is significant
        if row['adr_close'] - row['stock_close_usd'] > mean + entry * std:
            if stock_pos == 0 and adr_pos == 0:
                quantity = min(int(0.5*cash/row['adr_close']),int(0.5*cash/row['stock_close_usd']))
                if index+1 < len(merged_df):
                    adr_pos -= quantity
                    cash += quantity*row['adr_close']

                    stock_px = merged_df.loc[index+1,'stock_open_usd'] # The actual px we get to trade is on the next day for asian market
                    cash -= stock_px*quantity
                    stock_pos += quantity
                    trade_records.append("Opening positions:\n")
                    trade_records.append(f"We sold the {quantity} shares of ADR at the price of {row['adr_close']} on {row['date']}\n")
                    trade_records.append(f"We bought the {quantity} shares of underlying stock at the price of {stock_px} USD ({merged_df.loc[index+1,'stock_open']} Japanese dollars) on {merged_df.loc[index+1,'date']}\n")


        # When do we exit the position?
        elif row['adr_close'] - row['stock_close_usd'] < mean + exit * std:
            if stock_pos > 0 and adr_pos < 0 : # If we have positions in the stocks, we liquidate the position
                if index+1 < len(merged_df):
                
                    cash -= abs(adr_pos)*row['adr_close']
                    cash += stock_pos*merged_df.loc[index+1,'stock_open_usd']
                    
                    trade_records.append("Closing positions:\n")
                    trade_records.append(f"We bought the {stock_pos} shares of ADR at the price of {row['adr_close']} on {row['date']}\n")
                    trade_records.append(f"We sold the {stock_pos} shares of underlying stock at the price of {merged_df.loc[index+1,'stock_open_usd']} USD ({merged_df.loc[index+1,'stock_open']} Japanese dollars) on {merged_df.loc[index+1,'date']}\n")
                    stock_pos, adr_pos = 0, 0
                    
        # Stop-loss?
        elif row['adr_close'] - row['stock_close_usd'] > mean + stop_loss * std:
            if stock_pos > 0 and adr_pos < 0 : # If we have positions in the stocks, we liquidate the position
                if index+1 < len(merged_df):
                
                    cash -= abs(adr_pos)*row['adr_close']
                    cash += stock_pos*merged_df.loc[index+1,'stock_open_usd']
                    
                    trade_records.append("Closing positions:\n")
                    trade_records.append(f"We bought the {stock_pos} shares of ADR at the price of {row['adr_close']} on {row['date']}\n")
                    trade_records.append(f"We sold the {stock_pos} shares of underlying stock at the price of {merged_df.loc[index+1,'stock_open_usd']} USD ({merged_df.loc[index+1,'stock_open']} Japanese dollars) on {merged_df.loc[index+1,'date']}\n")
                    stock_pos, adr_pos = 0, 0
        diff_record.append(row['adr_close'] - row['stock_close_usd'])
    final_val = cash + adr_pos*merged_df.loc[len(merged_df) - 1, 'adr_close'] + stock_pos*merged_df.loc[len(merged_df) - 1, 'stock_close_usd'] 
    ret = (final_val - starting_cash)/starting_cash
    
    
    return ret, trade_records

In [8]:
from os import listdir
from os.path import isfile, join
from collections import defaultdict

mypath = 'eric_jh_data/'
countries = ['Australia', 'Japan', 'China']
adr_dict = {}
return_dict = defaultdict(list)
fx_dict = {'Australia':('eric_jh_data/Forex/AUD_USD.csv',0),
           'Japan':('eric_jh_data/Forex/USD_JPY.csv',1),
           'China':('eric_jh_data/Forex/USD_HKD.csv',1)}
for country in countries:
    countrypath = mypath + country
    adr_names =  [f for f in listdir(countrypath) if not isfile(join(countrypath, f))] #grab all adr names of the country
    adr_dict[country] = adr_names
    
    for adr in adr_names:
        merged_df = data_processing(country, adr, fx_dict)
        if isinstance(merged_df, pd.core.frame.DataFrame):
            ret, trade_records = pairs_trade(merged_df)
            return_dict[country].append([adr, ret])
            logs = [f'The return of ADR_underlying pairs trading for {adr} from {country} is {ret*100}%\n']
            logs = logs + trade_records 
            
            
            fname = f'eric_jh_data/{country}/{adr}/logs.txt'
            f = open(fname, 'w')
            f.writelines(logs)
            f.close()

no data for underlying data of ATHE_ATH from Australia
no data for underlying data of IMMP_IMM from Australia
no data for underlying data of KZIA_KZA from Australia
no data for underlying data of PLL_PLL from Australia


In [9]:
# Because 1 ADR is not necessarily 1 Underlying, we find the ratio to convert the two..
ratio =round(merged_df.loc[1,'stock_close_usd']/merged_df.loc[1,'adr_open'])
if ratio >= 1:
    merged_df['adr_open'] = merged_df['adr_open']*ratio
    merged_df['adr_close'] = merged_df['adr_close']*ratio
else:
    ratio = round(merged_df.loc[1,'adr_open']/merged_df.loc[1,'stock_close_usd'])
    merged_df['stock_open_usd'] = merged_df['stock_open_usd']*ratio
    merged_df['stock_close_usd'] = merged_df['stock_close_usd']*ratio

# Hyperparameter Tuning

Hyperparameters:
1. Lookback window
2. Entry threshold
3. Exit threshold
4. Stop-loss threshold

In [15]:
import itertools
from tqdm import tqdm

window_grid = [30, 60, 100]
entry_grid = [1, 1.5, 2]
exit_grid = [-0.5, 0, 0.5]
stop_loss_grid = [2.5, 3, 3.5]

hps = list(itertools.product(*[window_grid, entry_grid, exit_grid, stop_loss_grid]))

mypath = 'eric_jh_data/'
countries = ['Australia', 'Japan', 'China']
adr_dict = {}
return_dict = defaultdict(list)
fx_dict = {'Australia':('eric_jh_data/Forex/AUD_USD.csv',0),
           'Japan':('eric_jh_data/Forex/USD_JPY.csv',1),
           'China':('eric_jh_data/Forex/USD_HKD.csv',1)}
for country in countries:
    countrypath = mypath + country
    adr_names =  [f for f in listdir(countrypath) if not isfile(join(countrypath, f))] #grab all adr names of the country
    adr_dict[country] = adr_names
    
    for adr in tqdm(adr_names):
        merged_df = data_processing(country, adr, fx_dict)
        if isinstance(merged_df, pd.core.frame.DataFrame):
            max_ret = 0
            max_ret_hps = (100, 1, 0, 3)
            hp_log = []
            for hp in hps:
                ret = pairs_trade(merged_df, lookback = hp[0], entry = hp[1], exit = hp[2], stop_loss = hp[3])[0]
                if ret > max_ret:
                    max_ret = ret
                    max_ret_hps = hp
                hp_log.append(f'Lookback: {hp[0]}\tEntry: {hp[1]}\tExit: {hp[2]}\tStop-loss: {hp[3]}\tReturn: {ret*100}%\n')
            return_dict[country].append([adr, max_ret, max_ret_hps])
            logs = [f'The return of ADR_underlying pairs trading for {adr} from {country} is {max_ret*100}%\n', 
                    f'Hyperparameters: Lookback: {max_ret_hps[0]}\tEntry: {max_ret_hps[1]}\tExit: {max_ret_hps[2]}\tStop-loss: {max_ret_hps[3]}\n']
            logs = logs + hp_log
            
            fname = f'eric_jh_data/{country}/{adr}/hp_log.txt'
            f = open(fname, 'w')
            f.writelines(logs)
            f.close()

  0%|          | 0/9 [00:00<?, ?it/s]

no data for underlying data of ATHE_ATH from Australia


 22%|██▏       | 2/9 [00:22<01:19, 11.41s/it]

no data for underlying data of IMMP_IMM from Australia


 56%|█████▌    | 5/9 [00:55<00:51, 12.88s/it]

no data for underlying data of KZIA_KZA from Australia


 78%|███████▊  | 7/9 [01:14<00:23, 11.91s/it]

no data for underlying data of PLL_PLL from Australia


100%|██████████| 9/9 [01:34<00:00, 10.47s/it]
100%|██████████| 10/10 [03:11<00:00, 19.14s/it]
100%|██████████| 9/9 [02:48<00:00, 18.73s/it]


In [22]:
fname = 'returns.txt'
f = open(fname, 'w')
for country in return_dict:
    for pairs in return_dict[country]:
        string = f'Best return for {pairs[0]} from {country} is {pairs[1] * 100}% with lookback {pairs[2][0]}, entry {pairs[2][1]}, exit {pairs[2][2]}, stop-loss {pairs[2][3]}\n'
        f.writelines(string)
f.close()

# Portfolio Allocation



In [None]:
"""
Old code snippets

# Grab the csv data for the stocks
adr_path = 'eric_jh_data/Japan/IX_8591/adr.csv'
stock_path = 'eric_jh_data/Japan/IX_8591/underlying.csv'
fx_path = 'eric_jh_data/Forex/USD_JPY.csv'
adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open'})
stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open'})
fx_df = pd.read_csv(fx_path, index_col = 0).rename(columns = {'close':'fx_close', 'open':'fx_open'})

# Find the ratio between adr and stock:


merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close']], stock_df.loc[:,['date', 'stock_open','stock_close']])
merged_df = pd.merge(merged_df, fx_df.loc[:,['date', 'fx_open','fx_close']])
merged_df['stock_open_usd'] = merged_df['stock_open']/merged_df['fx_open']
merged_df['stock_close_usd'] = merged_df['stock_close']/merged_df['fx_close']



"""