In [1]:
import pandas as pd
import numpy as np
import vectorbt as vbt

from sklearn.linear_model import LinearRegression

In [2]:
with pd.HDFStore('./large_files/data_20240328.h5', mode='r') as store:
    df1_ = store.get('data/cl')
    df2_ = store.get('data/gc')

In [3]:
def filter_df(df):
    df = df.set_index('datetime', drop=False)
    df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
    df = df.loc[df.index.day_of_week.isin([0, 1, 2])]
    df = df[(df.index.hour >= 9) & (df.index.hour < 16)]

    return df

df1 = filter_df(df1_)
df2 = filter_df(df2_)
dates = pd.Series(index=df1.datetime, dtype=bool)

df_z = []
weekly_groups = dates.groupby(pd.Grouper(freq='W'))
for week_start, group in weekly_groups:
    # get monday / tuesday of pair, week_start = Sunday
    monday = week_start - pd.Timedelta(days=6)
    tuesday = week_start - pd.Timedelta(days=4, minutes=1)
    wednesday = week_start - pd.Timedelta(days=3, minutes=1)

    df1_week = df1.loc[(df1.index >= monday) & (df1.index <= tuesday)]
    df2_week = df2.loc[(df2.index >= monday) & (df2.index <= tuesday)]
    df1_wed = df1.loc[(df1.index > tuesday) & (df1.index <= wednesday)]
    df2_wed = df2.loc[(df2.index > tuesday) & (df2.index <= wednesday)]

    if (df1_week.shape[0] == 0) or (df2_week.shape[0] == 0) or (df1_wed.shape[0] == 0) or (df2_wed.shape[0] == 0):
        continue
    
    if df1_week.shape[0] > df2_week.shape[0]:
        df1_week = df1_week.loc[df2_week.index]
    elif df2_week.shape[0] > df1_week.shape[0]:
        df2_week = df2_week.loc[df1_week.index]

    # compute mean / std for spread
    price1 = df1_week.close.values.reshape(-1, 1)
    price2 = df2_week.close.values.reshape(-1, 1)
    model = LinearRegression()
    model.fit(price1, price2)
    hedge_ratio = model.coef_[0]
    spread = price2 - hedge_ratio * price1
    mean = spread.mean()
    std = np.std(spread)

    # compute z-scores for wednesday
    spread = df2_wed.close.values - hedge_ratio * df1_wed.close.values

    z = pd.DataFrame({'asset1': df1_wed.close.values, 'asset2': df2_wed.close.values, 'spread': spread, 'z': (spread - mean) / std}, index=df1_wed.index)
    df_z.append(z)

df_z = pd.concat(df_z)

In [4]:
def pair_trade_signals(
    dfz,
    short_entry_threshold=2.0, # sell 1 buy 2
    long_entry_threshold=-2.0, # buy 1 sell 2

):  
    dfz = dfz.copy()
    dfz['datetime'] = dfz.index
    positions = []
    total_positions = 0
    total_days = 0

    for day, df_day in dfz.groupby(pd.Grouper(level='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        position = pd.Series(index=df_day.index)
        df_10 = df_day[(df_day.datetime.dt.time >= pd.Timestamp('9:00').time()) & (df_day.datetime.dt.time <= pd.Timestamp('11:00').time())]
        max_idx = df_10.z.abs().idxmax()
        z_score = df_day.loc[max_idx, 'z']

        is_entry = False
        if z_score > short_entry_threshold:
            position[max_idx] = -1
            is_entry = True
        elif z_score < long_entry_threshold:
            position[max_idx] = 1
            is_entry = True

        if is_entry:
            df_later = df_day[df_day.datetime.dt.time > max_idx.time()]
            absolute_diff = (df_later['z'] - 0).abs()
            idx_zero = absolute_diff.idxmin()
            z = df_day.loc[idx_zero, 'z']
            if z >= z_score:
                position[max_idx] = np.nan
            else:
                position[idx_zero] = 0

            total_positions += 1
        total_days += 1
        
        position = position.ffill()
        positions.append(position)
    
    positions = pd.concat(positions)
    df_z = pd.concat([dfz, positions], axis=1).rename({0: 'position'}, axis=1)
    print(f'{total_positions} / {total_days} have position opened')

    return df_z

In [5]:
def filter_df(df):
    df = df.set_index('datetime', drop=False)
    df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
    df = df.loc[df.index.day_of_week.isin([2])]
    df = df[(df.index.hour >= 9) & (df.index.hour < 16)]

    return df

df1 = filter_df(df1_)
df2 = filter_df(df2_)

df_pos = pair_trade_signals(df_z)
df_pos['cl'] = -df_pos['position']
df_pos['gc'] = df_pos['position']

vbt_short_signal = ((df_pos['position'] == -1) & (df_pos['position'].shift(1) != -1))
vbt_long_signal = ((df_pos['position'] == 1) & (df_pos['position'].shift(1) != 1))

vbt_close_short = ((df_pos['position'] == 0) & (df_pos['position'].shift(1) == -1))
vbt_close_long = ((df_pos['position'] == 0) & (df_pos['position'].shift(1) == 1))

157 / 208 have position opened


  position = pd.Series(index=df_day.index)


In [6]:
asset1 = 'cl'
asset2 = 'gc'

tickers_column = pd.Index([asset1, asset2], name='tickers')
vbt_trades = pd.DataFrame(index=df_pos.index, columns=tickers_column)
vbt_trades[asset1] = np.nan
vbt_trades[asset2] = np.nan

vbt_trades.loc[vbt_short_signal, asset1] = 1
vbt_trades.loc[vbt_long_signal, asset1] = -1
vbt_trades.loc[vbt_short_signal, asset2] = -1
vbt_trades.loc[vbt_long_signal, asset2] = 1

vbt_trades.loc[vbt_close_short, asset1] = 0
vbt_trades.loc[vbt_close_long, asset1] = 0
vbt_trades.loc[vbt_close_short, asset2] = 0
vbt_trades.loc[vbt_close_long, asset2] = 0

df_price = pd.DataFrame(index=df_z.index, columns=['cl', 'gc'])
df_price['cl'] = df1.loc[df_z.index, 'close'].values
df_price['gc'] = df2.loc[df_z.index, 'close'].values

In [7]:
portfolio = vbt.Portfolio.from_orders(
    close=df_price,
    size=vbt_trades,
    size_type='targetpercent',
    val_price=df_price.vbt.fshift(1),
    init_cash=100,
    fees=0,
    cash_sharing=True,
    group_by=False,
    call_seq='auto',
    freq='1m'
)
portfolio.stats()

  portfolio.stats()


Start                         2020-01-08 09:00:00-05:00
End                           2023-12-27 15:59:00-05:00
Period                                 60 days 16:00:00
Start Value                                       100.0
End Value                                    132.769465
Total Return [%]                              32.769465
Benchmark Return [%]                          25.950065
Max Gross Exposure [%]                       100.014605
Total Fees Paid                                     0.0
Max Drawdown [%]                               5.142514
Max Drawdown Duration                  16 days 02:32:00
Total Trades                                       85.0
Total Closed Trades                                85.0
Total Open Trades                                   0.0
Open Trade PnL                                      0.0
Win Rate [%]                                  71.764706
Best Trade [%]                                 9.793585
Worst Trade [%]                                -

In [8]:
# default agg_fn is mean
print(portfolio.stats(agg_func=None).round(3).T.to_markdown())

|                            | ('cl', 'cl')              | ('gc', 'gc')              |
|:---------------------------|:--------------------------|:--------------------------|
| Start                      | 2020-01-08 09:00:00-05:00 | 2020-01-08 09:00:00-05:00 |
| End                        | 2023-12-27 15:59:00-05:00 | 2023-12-27 15:59:00-05:00 |
| Period                     | 60 days 16:00:00          | 60 days 16:00:00          |
| Start Value                | 100.0                     | 100.0                     |
| End Value                  | 111.404                   | 154.135                   |
| Total Return [%]           | 11.404                    | 54.135                    |
| Benchmark Return [%]       | 19.444                    | 32.457                    |
| Max Gross Exposure [%]     | 100.0                     | 100.029                   |
| Total Fees Paid            | 0.0                       | 0.0                       |
| Max Drawdown [%]           | 9.257       

In [13]:
df_orders = portfolio.orders.records_readable.sort_values('Timestamp')
df_orders = pd.merge(df_orders, df_z['z'], left_on='Timestamp', right_index=True, how='left')
df_orders

Unnamed: 0,Order Id,Column,Timestamp,Size,Price,Fees,Side,z
0,0,"(cl, cl)",2020-01-08 09:30:00-05:00,1.603335,62.37,0.0,Buy,2.338704
170,170,"(gc, gc)",2020-01-08 09:30:00-05:00,0.063347,1579.40,0.0,Sell,2.338704
171,171,"(gc, gc)",2020-01-08 10:38:00-05:00,0.063347,1573.20,0.0,Buy,0.001508
1,1,"(cl, cl)",2020-01-08 10:38:00-05:00,1.603335,61.30,0.0,Sell,0.001508
2,2,"(cl, cl)",2020-01-15 09:02:00-05:00,1.689607,58.12,0.0,Buy,2.277987
...,...,...,...,...,...,...,...,...
167,167,"(cl, cl)",2023-11-29 15:08:00-05:00,1.447222,78.06,0.0,Sell,0.707242
338,338,"(gc, gc)",2023-12-27 10:40:00-05:00,0.073450,2093.50,0.0,Sell,8.483542
168,168,"(cl, cl)",2023-12-27 10:40:00-05:00,1.506068,75.01,0.0,Buy,8.483542
169,169,"(cl, cl)",2023-12-27 14:21:00-05:00,1.506068,73.97,0.0,Sell,4.298668
