In [2]:
## anaconda3 (Python 3.9.13) Kernel

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# pair trade packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from datetime import datetime

# Load Pairs Data


In [3]:
def custom_date_parser(date_str):
    return datetime.strptime(date_str, '%d/%m/%Y')

# Load the dictionary from the pickle file
with open('pairsOutcome.pkl', 'rb') as file:
    pairsOutcome = pickle.load(file);

print("Dictionary loaded from pairsOutcome.pkl")


# Load stock data and get return 
tpxData = pd.read_csv('TPX_prices.csv', index_col=0, parse_dates=True, date_parser=custom_date_parser)
tpxData = tpxData.dropna(axis='columns')
return_df = (tpxData / tpxData.shift(1)) - 1

Dictionary loaded from pairsOutcome.pkl


  tpxData = pd.read_csv('TPX_prices.csv', index_col=0, parse_dates=True, date_parser=custom_date_parser)


In [4]:
len(pairsOutcome)

508

In [5]:
# Sort the keys by their cumpnl[-2] values in descending order
top_keys = sorted(
    pairsOutcome,
    key=lambda k: pairsOutcome[k].cumpnl.iloc[-2],  # Access cumpnl[-2] safely
    reverse=True
)[:10]  # Get the top 10 keys

# Print the top 10 performing trades
print("Top 10 performing trades:")
for i, key in enumerate(top_keys, 1):
    print(f"{i}. Key: {key}, Value: {pairsOutcome[key].cumpnl.iloc[-2]}")

Top 10 performing trades:
1. Key: 1801 JP Equity 2670 JP Equity, Value: 2.5797887367591246
2. Key: 3778 JP Equity 6701 JP Equity, Value: 2.537242032391529
3. Key: 2760 JP Equity 6254 JP Equity, Value: 2.3688208386917404
4. Key: 5706 JP Equity 6954 JP Equity, Value: 2.2676474298290237
5. Key: 7951 JP Equity 9684 JP Equity, Value: 2.0657325467200596
6. Key: 1808 JP Equity 6481 JP Equity, Value: 1.9929348941248262
7. Key: 3099 JP Equity 5831 JP Equity, Value: 1.939742664925484
8. Key: 1808 JP Equity 6971 JP Equity, Value: 1.9132602773493155
9. Key: 4021 JP Equity 9843 JP Equity, Value: 1.8675031161000868
10. Key: 5929 JP Equity 6504 JP Equity, Value: 1.811533049967201


In [6]:
## greatest return

working_pair = pairsOutcome[top_keys[0]]

In [7]:
working_pair = working_pair.iloc[261:]
working_pair = working_pair.iloc[:, :-2]
working_pair

Unnamed: 0_level_0,spread,mid,1sd high,1sd low,2sd high,2sd low,position,1801 JP Equity position,2670 JP Equity position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1/1/2014,-447.679644,-609.316380,-482.301096,-736.331665,-355.285811,-863.346949,-1,-1,1
2/1/2014,-447.679644,-607.859204,-481.189674,-734.528735,-354.520143,-861.198265,-1,-1,1
3/1/2014,-447.679644,-606.402028,-480.096073,-732.707984,-353.790117,-859.013940,-1,-1,1
6/1/2014,-498.128143,-605.137404,-479.415988,-730.858820,-353.694572,-856.580237,0,0,0
7/1/2014,-499.575654,-604.036501,-478.660197,-729.412805,-353.283893,-854.789110,0,0,0
...,...,...,...,...,...,...,...,...,...
27/5/2024,-39.304773,-46.289714,319.903354,-412.482783,686.096423,-778.675852,0,0,0
28/5/2024,204.731719,-44.231112,321.850855,-410.313080,687.932823,-776.395048,0,0,0
29/5/2024,345.042254,-41.541554,324.802844,-407.885953,691.147243,-774.230351,-1,-1,1
30/5/2024,178.588811,-39.334254,326.585734,-405.254242,692.505722,-771.174230,0,0,0


In [8]:
def count_consecutive(col):
  """
  This function counts the length of consecutive -1 and 1 in a pandas Series.

  Args:
      col: The pandas Series containing the position values.

  Returns:
      A dictionary containing two DataFrames:
          - 'minus_one_consecutive': DataFrame with lengths of consecutive -1.
          - 'plus_one_consecutive': DataFrame with lengths of consecutive 1.
  """
  minus_one_consecutive = []
  plus_one_consecutive = []
  count = 0
  prev = None
  for val in col:
    if val == prev:
      count += 1
    else:
      if prev == -1:
        minus_one_consecutive.append(count)
      elif prev == 1:
        plus_one_consecutive.append(count)
      count = 1
    prev = val
  if prev == -1:
    minus_one_consecutive.append(count)
  elif prev == 1:
    plus_one_consecutive.append(count)

  return {
      'minus_one_consecutive': pd.DataFrame(minus_one_consecutive),
      'plus_one_consecutive': pd.DataFrame(plus_one_consecutive)
  }


In [9]:

# Get results
consecutive_lengths = count_consecutive(working_pair['position'])


In [10]:
consecutive_lengths

{'minus_one_consecutive':      0
 0    3
 1    1
 2    1
 3    1
 4    1
 ..  ..
 110  3
 111  9
 112  2
 113  3
 114  1
 
 [115 rows x 1 columns],
 'plus_one_consecutive':     0
 0   5
 1   3
 2   1
 3   1
 4   5
 .. ..
 76  6
 77  5
 78  4
 79  2
 80  5
 
 [81 rows x 1 columns]}

In [11]:
consecutive_lengths['minus_one_consecutive'].describe()


Unnamed: 0,0
count,115.0
mean,4.208696
std,5.130739
min,1.0
25%,1.0
50%,2.0
75%,4.0
max,33.0


In [12]:
consecutive_lengths['plus_one_consecutive'].describe()


Unnamed: 0,0
count,81.0
mean,4.506173
std,5.657127
min,1.0
25%,1.0
50%,2.0
75%,5.0
max,27.0


In [13]:
consecutive_lengths['minus_one_consecutive'].values.sum()

484

In [14]:
consecutive_lengths['plus_one_consecutive'].values.sum()

365

In [15]:
consecutive_lengths['minus_one_consecutive'].values.sum() + consecutive_lengths['plus_one_consecutive'].values.sum()

849

taking position 1038 out of 2718 days of trading

In [16]:
# Count changes in position values
position_changes = (working_pair['position'] != working_pair['position'].shift(1)).sum()

# Print result
print("\nTotal number of position changes:", position_changes)



Total number of position changes: 392


In [17]:
f"over {2718/261:.1f} years means {350/(2718/261):.1f} ave trades per year"

'over 10.4 years means 33.6 ave trades per year'

In [18]:
working_pair.isna().sum()

spread                     0
mid                        0
1sd high                   0
1sd low                    0
2sd high                   0
2sd low                    0
position                   0
1801 JP Equity position    0
2670 JP Equity position    0
dtype: int64

```   
df.loc[(df['spread'] > df['1sd high']) & (df['spread'] < df['2sd high']), 'position'] = -1 
df.loc[(df['spread']< df['1sd low']) & (df['spread'] > df['2sd low']), 'position'] = 1
```
### Level 1

above is the line where we assign some weightage to the position size. 

1. If we can assign a challenge to the Q learner to let it decide how much to make this number for 10 pairs.
2. Then we measure the total cum pnl 
3. Use that data to give a reward function

### Level 2
1. Only get learner to size when entering a trade. only give the Q learner state on the trade date

### Level 3
1. Daily sizing of trade

In [19]:
working_pair

Unnamed: 0_level_0,spread,mid,1sd high,1sd low,2sd high,2sd low,position,1801 JP Equity position,2670 JP Equity position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1/1/2014,-447.679644,-609.316380,-482.301096,-736.331665,-355.285811,-863.346949,-1,-1,1
2/1/2014,-447.679644,-607.859204,-481.189674,-734.528735,-354.520143,-861.198265,-1,-1,1
3/1/2014,-447.679644,-606.402028,-480.096073,-732.707984,-353.790117,-859.013940,-1,-1,1
6/1/2014,-498.128143,-605.137404,-479.415988,-730.858820,-353.694572,-856.580237,0,0,0
7/1/2014,-499.575654,-604.036501,-478.660197,-729.412805,-353.283893,-854.789110,0,0,0
...,...,...,...,...,...,...,...,...,...
27/5/2024,-39.304773,-46.289714,319.903354,-412.482783,686.096423,-778.675852,0,0,0
28/5/2024,204.731719,-44.231112,321.850855,-410.313080,687.932823,-776.395048,0,0,0
29/5/2024,345.042254,-41.541554,324.802844,-407.885953,691.147243,-774.230351,-1,-1,1
30/5/2024,178.588811,-39.334254,326.585734,-405.254242,692.505722,-771.174230,0,0,0
