In [24]:
#from ib_insync import *
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
from statsmodels import regression,stats
import math
import datetime 
import statsmodels.formula.api as smf 
from datetime import date, time, datetime, timedelta
from collections import deque

In [91]:
from os import listdir
from os.path import isfile, join
from collections import defaultdict

mypath = 'eric_jh_data/'
countries = ['Australia', 'Japan', 'China']
adr_dict = {}
return_dict = defaultdict(list)
fx_dict = {'Australia':('eric_jh_data/Forex/AUD_USD.csv',0),
           'Japan':('eric_jh_data/Forex/USD_JPY.csv',1),
           'China':('eric_jh_data/Forex/USD_HKD.csv',1)}
for country in countries:
    countrypath = mypath + country
    adr_names =  [f for f in listdir(countrypath) if not isfile(join(countrypath, f))] #grab all adr names of the country
    adr_dict[country] = adr_names
    
    for adr in adr_names:
        merged_df = data_processing(country, adr, fx_dict)
        if isinstance(merged_df, pd.core.frame.DataFrame):
            ret, trade_records = pairs_trade(merged_df)
            return_dict[country].append([adr, ret])
            logs = [f'The return of ADR_underlying pairs trading for {adr} from {country} is {ret*100}%\n']
            logs = logs + trade_records 
            
            
            fname = f'eric_jh_data/{country}/{adr}/logs.txt'
            f = open(fname, 'w')
            f.writelines(logs)
            f.close()
            
        
        

There is no underlying data for Australia, PLL_PLL
There is no underlying data for Australia, ATHE_ATH
There is no underlying data for Australia, IMMP_IMM
There is no underlying data for Australia, KZIA_KZA


In [34]:
def data_processing(country, adr, fx_dict):
    adr_path = f'eric_jh_data/{country}/{adr}/adr.csv'
    stock_path =  f'eric_jh_data/{country}/{adr}/underlying.csv'
    fx_path = fx_dict[country][0]
    fx_type =  fx_dict[country][1]
    try:
        adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open'})
    except:
        print(f"no data for ADR data of {adr} from {counrty}")
        return None
    try:
        stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open'})
    except:
        print(f"no data for underlying data of {adr} from {counrty}")
        return None
    fx_df = pd.read_csv(fx_path, index_col = 0).rename(columns = {'close':'fx_close', 'open':'fx_open'})

    merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close']], stock_df.loc[:,['date', 'stock_open','stock_close']])
    merged_df = pd.merge(merged_df, fx_df.loc[:,['date', 'fx_open','fx_close']])

    if fx_type == 1:
        merged_df['stock_open_usd'] = merged_df['stock_open']/merged_df['fx_open']
        merged_df['stock_close_usd'] = merged_df['stock_close']/merged_df['fx_close']
    else:
        merged_df['stock_open_usd'] = merged_df['stock_open']*merged_df['fx_open']
        merged_df['stock_close_usd'] = merged_df['stock_close']*merged_df['fx_close']

    ratio =round(merged_df.loc[1,'stock_close_usd']/merged_df.loc[1,'adr_open'])
    if ratio >= 1:
        merged_df['adr_open'] = merged_df['adr_open']*ratio
        merged_df['adr_close'] = merged_df['adr_close']*ratio
    else:
        ratio = round(merged_df.loc[1,'adr_open']/merged_df.loc[1,'stock_close_usd'])
        merged_df['stock_open_usd'] = merged_df['stock_open_usd']*ratio
        merged_df['stock_close_usd'] = merged_df['stock_close_usd']*ratio
        
    return merged_df
    

In [130]:
# Because 1 ADR is not necessarily 1 Underlying, we find the ratio to convert the two..
ratio =round(merged_df.loc[1,'stock_close_usd']/merged_df.loc[1,'adr_open'])
if ratio >= 1:
    merged_df['adr_open'] = merged_df['adr_open']*ratio
    merged_df['adr_close'] = merged_df['adr_close']*ratio
else:
    ratio = round(merged_df.loc[1,'adr_open']/merged_df.loc[1,'stock_close_usd'])
    merged_df['stock_open_usd'] = merged_df['stock_open_usd']*ratio
    merged_df['stock_close_usd'] = merged_df['stock_close_usd']*ratio

In [87]:
test = ['a\n','b\n','c\n']
fname = 'text.txt'
f = open(fname, "w")
f.writelines(test)
f.close()

In [89]:
"""
Naive strategy for testing on the SONY adr and stock pair
To open a position, we check the CLOSE price of adr, compared it to CLOSE px of 
stock. We buy the stock on the next trading next OPEN for Asian market

To close a position,  we check the CLOSE price of adr, compared it to CLOSE px of 
stock. We sell the stock on the next trading next OPEN for Asian market
"""
def pairs_trade(merged_df, lookback = 100, cash = 100000):

    starting_cash = cash
    stock_pos, adr_pos = 0, 0
    diff_record = deque(maxlen = lookback)
    trade_records = []


    for index, row in merged_df.iterrows():
        # check if there is a px diff between close and stock_close effective
        # If index < lookback, we do not place any trade
        if index < lookback:
            diff_record.append(row['adr_close'] - row['stock_close_usd'])
            pass

        # If we have passed the initial lookback window
        # enter the position if diff is significant
        if row['adr_close'] - row['stock_close_usd'] > np.array(diff_record).mean() + np.array(diff_record).std():
            if stock_pos == 0 and adr_pos == 0:
                quantity = min(int(0.5*cash/row['adr_close']),int(0.5*cash/row['stock_close_usd']))
                if index+1 < len(merged_df):
                    adr_pos -= quantity
                    cash += quantity*row['adr_close']

                    stock_px = merged_df.loc[index+1,'stock_open_usd'] # The actual px we get to trade is on the next day for asian market
                    cash -= stock_px*quantity
                    stock_pos += quantity
                    trade_records.append("Opening positions:\n")
                    trade_records.append(f"We sold the {quantity} shares of ADR at the price of {row['adr_close']} on {row['date']}\n")
                    trade_records.append(f"We bought the {quantity} shares of underlying stock at the price of {stock_px} USD ({merged_df.loc[index+1,'stock_open']} Japanese dollars) on {merged_df.loc[index+1,'date']}\n")


        # When do we exit the position?
        elif row['adr_close'] - row['stock_close_usd'] < np.array(diff_record).mean():
            if stock_pos > 0 and adr_pos < 0 : # If we have positions in the stocks, we liquidate the position
                if index+1 < len(merged_df):
                
                    cash -= abs(adr_pos)*row['adr_close']
                    cash += stock_pos*merged_df.loc[index+1,'stock_open_usd']
                    
                    trade_records.append("Closing positions:\n")
                    trade_records.append(f"We bought the {stock_pos} shares of ADR at the price of {row['adr_close']} on {row['date']}\n")
                    trade_records.append(f"We sold the {stock_pos} shares of underlying stock at the price of {merged_df.loc[index+1,'stock_open_usd']} USD ({merged_df.loc[index+1,'stock_open']} Japanese dollars) on {merged_df.loc[index+1,'date']}\n")
                    stock_pos, adr_pos = 0, 0
        diff_record.append(row['adr_close'] - row['stock_close_usd'])
    final_val = cash + adr_pos*merged_df.loc[len(merged_df) - 1, 'adr_close'] + stock_pos*merged_df.loc[len(merged_df) - 1, 'stock_close_usd'] 
    ret = (final_val - starting_cash)/starting_cash
    
    
    return ret, trade_records

In [134]:
"""
Old code snippets

# Grab the csv data for the stocks
adr_path = 'eric_jh_data/Japan/IX_8591/adr.csv'
stock_path = 'eric_jh_data/Japan/IX_8591/underlying.csv'
fx_path = 'eric_jh_data/Forex/USD_JPY.csv'
adr_df = pd.read_csv(adr_path, index_col = 0).rename(columns = {'close':'adr_close', 'open':'adr_open'})
stock_df = pd.read_csv(stock_path, index_col = 0).rename(columns = {'close':'stock_close', 'open':'stock_open'})
fx_df = pd.read_csv(fx_path, index_col = 0).rename(columns = {'close':'fx_close', 'open':'fx_open'})

# Find the ratio between adr and stock:


merged_df = pd.merge(adr_df.loc[:,['date', 'adr_open','adr_close']], stock_df.loc[:,['date', 'stock_open','stock_close']])
merged_df = pd.merge(merged_df, fx_df.loc[:,['date', 'fx_open','fx_close']])
merged_df['stock_open_usd'] = merged_df['stock_open']/merged_df['fx_open']
merged_df['stock_close_usd'] = merged_df['stock_close']/merged_df['fx_close']



"""

110513.74525800598