In [1]:
## anaconda3 (Python 3.12.0) Kernel
import numpy as np

# pair trade packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from datetime import datetime

# Load Pairs Data


In [2]:
def custom_date_parser(date_str):
    return datetime.strptime(date_str, '%d/%m/%Y')

# Load the dictionary from the pickle file
with open('pairsOutcome.pkl', 'rb') as file:
    pairsOutcome = pickle.load(file)

print("Dictionary loaded from pairsOutcome.pkl")


# Load stock data and get return 
tpxData = pd.read_csv('TPX_prices.csv', index_col=0, parse_dates=True, date_parser=custom_date_parser)
tpxData = tpxData.dropna(axis='columns')
return_df = (tpxData / tpxData.shift(1)) - 1

Dictionary loaded from pairsOutcome.pkl


  tpxData = pd.read_csv('TPX_prices.csv', index_col=0, parse_dates=True, date_parser=custom_date_parser)


# Get Pair Trade Portfolio
`pairsOutcome` already have TOPIX stocks with highest liquidity and are tested for stationarity over a 1 year window

Choose top 10 known pair trades by returns in the total dataset

In [3]:
# Sort the keys by their cumpnl[-2] values in descending order
top_keys = sorted(
    pairsOutcome,
    key=lambda k: pairsOutcome[k].cumpnl.iloc[-2],  # Access cumpnl[-2] safely
    reverse=True
)[:10]  # Get the top 10 keys

# Print the top 10 performing trades
print("Top 10 performing trades:")
for i, key in enumerate(top_keys, 1):
    print(f"{i}. Key: {key}, Value: {pairsOutcome[key].cumpnl.iloc[-2]}")

Top 10 performing trades:
1. Key: 1801 JP Equity 2670 JP Equity, Value: 2.5797887367591246
2. Key: 3778 JP Equity 6701 JP Equity, Value: 2.537242032391529
3. Key: 2760 JP Equity 6254 JP Equity, Value: 2.3688208386917404
4. Key: 5706 JP Equity 6954 JP Equity, Value: 2.2676474298290237
5. Key: 7951 JP Equity 9684 JP Equity, Value: 2.0657325467200596
6. Key: 1808 JP Equity 6481 JP Equity, Value: 1.9929348941248262
7. Key: 3099 JP Equity 5831 JP Equity, Value: 1.939742664925484
8. Key: 1808 JP Equity 6971 JP Equity, Value: 1.9132602773493155
9. Key: 4021 JP Equity 9843 JP Equity, Value: 1.8675031161000868
10. Key: 5929 JP Equity 6504 JP Equity, Value: 1.811533049967201


In [4]:
## Get pair stock data
def custom_date_parser(date_str):
    return datetime.strptime(date_str, '%d/%m/%Y')
valid = pd.read_csv('validPairs4.csv', 
                    index_col=0, 
                    parse_dates=True, 
                    date_parser=custom_date_parser)
## get list of pair stocks
validPairsList = [
    [item.strip() + ' Equity' for item in pair.split('Equity') if item.strip()]
    for pair in top_keys
]

  valid = pd.read_csv('validPairs4.csv',


In [5]:
rollingWindow = 262
cutLossSd = 2

In [6]:
for pair in validPairsList:
    df = pd.DataFrame()

    #Calculate Standard Deviations
    df['spread'] = valid[f'spread_{pair[0]}_{pair[1]}']
    df['mid'] =  df['spread'].rolling(rollingWindow).mean()
    df['1sd high'] = df['spread'].rolling(rollingWindow).mean() + df['spread'].rolling(rollingWindow).std()
    df['1sd low'] = df['spread'].rolling(rollingWindow).mean() - df['spread'].rolling(rollingWindow).std()
    df['2sd high'] = df['spread'].rolling(rollingWindow).mean() + df['spread'].rolling(rollingWindow).std() * cutLossSd
    df['2sd low'] = df['spread'].rolling(rollingWindow).mean() - df['spread'].rolling(rollingWindow).std() * cutLossSd
    df['position'] = 0

    df.loc[(df['spread'] > df['1sd high']) & (df['spread'] < df['2sd high']), 'position'] = -1
    df.loc[(df['spread']< df['1sd low']) & (df['spread'] > df['2sd low']), 'position'] = 1

    #Calculate PnL
    df[f'{pair[0]} position'] = df['position']
    df[f'{pair[1]} position'] = df['position'] * -1
    df['dailypnl'] = df[f'{pair[1]} position']*return_df[f'{pair[1]}'].shift(-1) + df[f'{pair[0]} position']*return_df[f'{pair[0]}'].shift(-1)
    df['cumpnl'] = df['dailypnl'].cumsum()

    pairsOutcome[f'{pair[0]} {pair[1]}'] = df

## Make indicators and spread stationary around 0
Deduct the mean from all values to translate to 0 axis

In [7]:
workingPairOutcome = {}

for pair in top_keys:
    dummy_df = pairsOutcome[top_keys[0]].iloc[::,:6]
    dummy_df = dummy_df.subtract(dummy_df['mid'], axis=0).drop(columns=['mid']) # centre spread and SD
    dummy_df = dummy_df.div(dummy_df['2sd high']-dummy_df['1sd high'],axis=0)   # express SD as integers, give spread as propotionate
    dummy_df['1sd_high_boolean'] = (dummy_df['spread']>dummy_df['1sd high']).astype(int)
    dummy_df['2sd_high_boolean'] = (dummy_df['spread']>dummy_df['2sd high']).astype(int)
    dummy_df['1sd_low_boolean'] =  (dummy_df['spread']<dummy_df['1sd low'] ).astype(int)
    dummy_df['2sd_low_boolean'] =  (dummy_df['spread']<dummy_df['2sd low'] ).astype(int)
    dummy_df = dummy_df.drop(columns=['spread','1sd high', '1sd low', '2sd high', '2sd low'])
    workingPairOutcome[pair] = dummy_df.to_numpy()

In [8]:
dummy_df.head()

Unnamed: 0_level_0,1sd_high_boolean,2sd_high_boolean,1sd_low_boolean,2sd_low_boolean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013-01-01,0,0,0,0
2013-01-02,0,0,0,0
2013-01-03,0,0,0,0
2013-01-04,0,0,0,0
2013-01-07,0,0,0,0


In [9]:
workingPairOutcome[top_keys[5]][-5:]     # spread is not a proportion and direction of SD

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

# Machine Learning Challenge

## Background
Initial evaluation of the baseline portfolio shows that draw downs are small. Originally team had the idea of using Machine Learning to optimise for sizing of these pair trades. However since there was no significant drawdowns the returns are linearly increasing with investment sizing i.e. greater nominal investment in the the pair trade the proportionate increase in returns without realising significant drawdown risk.

Instead of optimising for sizing, we can explore Machine Learning in terms of strategy on this stationary dataset. Whereas our prescribed strategy is to enter at +/- 1 std dev, exit at 0 with +/- 2 std dev stop loss. These are only suggestions and arbitrary levels.

With Machine Learning, we can discover if it will uncover the mean reverting nature and recommend another threshhold. We use Q Learner to understand state space with the same spread, mid, std dev parameters as the baseline.

### Q Value table

In [32]:
class PairTradeEnv1:
    def __init__(self):
        # Initialize environment variables and parameters
        self.num_actions = 3
        self.num_states = 2**4
        self.earliest_step = 261  # hot start
        self.last_step = 2868

        self.state = np.zeros(4)
        self.current_step = self.earliest_step

    def reset(self,pair_idx):
        # Reset the environment to its initial state
        self.current_step = self.earliest_step
        self.state = workingPairOutcome[top_keys[pair_idx]][self.current_step]
        return self.state

    def step(self, action,pair_idx):
        # Advance the time step
        self.current_step += 1
        next_state = workingPairOutcome[top_keys[pair_idx]][self.current_step]
        reward = self.calculate_reward(action,pair_idx)
        done = self.current_step >= self.last_step
        return next_state, reward, done
    
    def calculate_reward(self, position,pair_idx):
        """
        Give one _previous_ day's return
        Input:
            position: position for idx (current step)
            idx: usually current timestp 
            pair: tuple of tpx stock
        Output:
            dailypnl
        """
        pair = validPairsList[pair_idx]
        # position = position_vector @ np.array([-1,0,1])
        position_0 = position
        position_1 = position * -1
        ## return_df gives the return for the previous day for the given idx
        dailypnl = position_0*return_df[f'{pair[0]}'].iloc[self.current_step] + position_1*return_df[f'{pair[1]}'].iloc[self.current_step] 

        return dailypnl
    
class Agent:
    def __init__(self, num_states, num_actions, alpha=0.1, gamma=0.1, epsilon=0.3):
        self.num_actions = num_actions
        self.num_states = num_states
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        # Action to index mapping
        self.action_to_index = {-1: 0, 0: 1, 1: 2}
        self.index_to_action = {0: -1, 1: 0, 2: 1}

        # Initialize Q-table
        self.Q = np.zeros((num_states, num_actions))

    def b_to_d_state(self,binary_state):
        """
        Converts a 4-bit binary state to a decimal index.
        Args:
            binary_state: A list of 4 binary values (e.g., [0, 1, 1, 0])
        Returns:
            The corresponding decimal index.
        """

        decimal_index = 0
        for i in range(4):
            decimal_index += binary_state[i] * 2**(3-i)
        return decimal_index

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            # Explore: Choose a random action
            action_index = np.random.randint(self.num_actions)
        else:
            # Exploit: Choose the action with the highest Q-value for the current state
            state_index = self.b_to_d_state(state)
            action_index = np.argmax(self.Q[state_index])
            
        action = self.index_to_action[action_index]  # Map index to action
        
        return action

    def update_Q(self, state, action, reward, next_state):
        # Update the Q-value for the current state-action pair
        state_index = self.b_to_d_state(state)
        next_state_index = self.b_to_d_state(next_state)
        self.Q[state_index, action] += self.alpha * (reward + self.gamma * np.max(self.Q[next_state_index]) - self.Q[state_index, action])

    def learn(self, num_episodes, env):
        for episode in range(num_episodes):
            for pair_idx in range(10):
                state = env.reset(pair_idx)
                done = False

                while not done:
                    action = self.choose_action(state)
                    next_state, reward, done = env.step(action,pair_idx)
                    self.update_Q(state, action, reward, next_state)
                    state = next_state


num_episodes = 1000

env = PairTradeEnv1()
agent = Agent(num_states=env.num_states, num_actions=env.num_actions)
agent.learn(num_episodes, env=env)

In [33]:
agent.Q

array([[ 0.00028252,  0.00042251,  0.0005758 ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.00019244, -0.00327649,  0.00269196],
       [ 0.00024409,  0.00335633,  0.00133598],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.00046989,  0.00243035,  0.00113966],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.0008878 ,  0.00603251,  0.01223529],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ]])

In [34]:

import pandas as pd

# Create a list of binary representations for the row indices
binary_labels = [f"{i:04b}" for i in range(16)]

# Create a DataFrame from the array with binary labels
df = pd.DataFrame(agent.Q, index=binary_labels, columns=["Short", "Flat", "Long"])

# Print the DataFrame
print(df)

         Short      Flat      Long
0000  0.000283  0.000423  0.000576
0001  0.000000  0.000000  0.000000
0010  0.000192 -0.003276  0.002692
0011  0.000244  0.003356  0.001336
0100  0.000000  0.000000  0.000000
0101  0.000000  0.000000  0.000000
0110  0.000000  0.000000  0.000000
0111  0.000000  0.000000  0.000000
1000  0.000470  0.002430  0.001140
1001  0.000000  0.000000  0.000000
1010  0.000000  0.000000  0.000000
1011  0.000000  0.000000  0.000000
1100  0.000888  0.006033  0.012235
1101  0.000000  0.000000  0.000000
1110  0.000000  0.000000  0.000000
1111  0.000000  0.000000  0.000000


In [35]:

df['mean_reversion'] = [0, 0,1,0,0,0,0,0,-1,0,0,0,0,0,0,0]

In [36]:
df['Q_choice'] = df[['Short', 'Flat', 'Long']].idxmax(axis=1)
zero_rows = df[['Short', 'Flat', 'Long']].eq(0).all(axis=1)
df.loc[zero_rows, ['Q_choice']] = 'Flat'
df

Unnamed: 0,Short,Flat,Long,mean_reversion,Q_choice
0,0.000283,0.000423,0.000576,0,Long
1,0.0,0.0,0.0,0,Flat
10,0.000192,-0.003276,0.002692,1,Long
11,0.000244,0.003356,0.001336,0,Flat
100,0.0,0.0,0.0,0,Flat
101,0.0,0.0,0.0,0,Flat
110,0.0,0.0,0.0,0,Flat
111,0.0,0.0,0.0,0,Flat
1000,0.00047,0.00243,0.00114,-1,Flat
1001,0.0,0.0,0.0,0,Flat


table is taking a long time to generalise. using one pair is not good enough to get any poistioning as everything is flat. only after adding all 10 pairs then performance churns out shorting on 1 SD high cross.

In [23]:
dummy_df.columns

Index(['1sd_high_boolean', '2sd_high_boolean', '1sd_low_boolean',
       '2sd_low_boolean'],
      dtype='object')

In [26]:
# Get baseline results
t_pair = validPairsList[0]
max_steps_per_episode = 3000
agent.epsilon = 0

def get_baseline(env, max_steps_per_episode, pair_idx):
    agent.epsilon = 0
    env.reset(pair_idx)
    total_reward = 0
    current_step = 261
    env.current_step = current_step
    env.last_step = 2868

    for step in range(max_steps_per_episode):
        action = pairsOutcome[top_keys[pair_idx]].iloc[env.current_step]['position']
        _, reward, done = env.step(action,pair_idx)
        total_reward += reward

        if done:
            break

    print(f"Baseline {t_pair}, Total Reward: {total_reward}, step {step}")

get_baseline(env, 3000, pair_idx=0)

Baseline ['1801 JP Equity', '2670 JP Equity'], Total Reward: 2.3267375595549673, step 2606


In [None]:
pairsOutcome[top_keys[0]].iloc[999:2002]

Unnamed: 0_level_0,spread,mid,1sd high,1sd low,2sd high,2sd low,position,1801 JP Equity position,2670 JP Equity position,dailypnl,cumpnl
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-10-31,-147.947042,-449.701335,-126.072766,-773.329904,197.555804,-1096.958473,0,0,0,-0.000000,0.939600
2016-11-01,-123.049055,-448.633919,-124.389156,-772.878682,199.855607,-1097.123444,-1,-1,1,0.004531,0.944131
2016-11-02,-136.255059,-447.603892,-122.795412,-772.412372,202.013068,-1097.220852,0,0,0,0.000000,0.944131
2016-11-03,-136.255059,-446.573866,-121.205918,-771.941813,204.162029,-1097.309760,0,0,0,0.000000,0.944131
2016-11-04,-331.433022,-445.283121,-120.132645,-770.433598,205.017832,-1095.584074,0,0,0,0.000000,0.944131
...,...,...,...,...,...,...,...,...,...,...,...
2020-08-27,-74.212095,-244.358511,-39.366446,-449.350575,165.625618,-654.342640,0,0,0,0.000000,1.653724
2020-08-28,48.167917,-241.181553,-38.147293,-444.215813,164.886967,-647.250073,-1,-1,1,-0.026273,1.627451
2020-08-31,141.271907,-238.106065,-35.406198,-440.805932,167.293669,-643.505798,-1,-1,1,0.013071,1.640522
2020-09-01,91.373920,-235.442408,-33.016984,-437.867833,169.408441,-640.293257,-1,-1,1,0.000302,1.640824
