In [1]:
import vectorbt as vbt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import io
import pytz
import datetime

plt.style.use('ggplot')

In [2]:
with pd.HDFStore('./large_files/data2.h5', mode='r') as store:
    df_ = store.get('data/data_imputed_20240226')
df_.info(verbose=3)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2522881 entries, 525 to 138563
Data columns (total 10 columns):
 #   Column    Dtype                           
---  ------    -----                           
 0   date      object                          
 1   time      object                          
 2   open      float64                         
 3   high      float64                         
 4   low       float64                         
 5   close     float64                         
 6   volume    int64                           
 7   datetime  datetime64[ns, America/New_York]
 8   day       object                          
 9   month     int64                           
dtypes: datetime64[ns, America/New_York](1), float64(4), int64(2), object(3)
memory usage: 211.7+ MB


In [3]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.year.isin([2021, 2022, 2023])]
df = df.loc[df.index.day_of_week == 2]
df = df[(df.index.hour >= 9) & (df.index.hour < 12)]

# Hold

In [4]:
long_entry_signals = []
long_exit_signals = []
for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
    if df_day.shape[0] == 0:
        continue
    
    df_day = df_day.copy()

    long_entry = pd.Series(False, index=df_day.index)
    long_exit = pd.Series(False, index=df_day.index)

    for time, row in df_day.iterrows():
        time = time.time()

        if (time == pd.Timestamp('11:59').time()):
            long_exit[time] = True
        elif (time == pd.Timestamp('10:30').time()):
            long_entry[time] = True

    long_entry_signals.append(long_entry)
    long_exit_signals.append(long_exit)

long_entry_signals = pd.concat(long_entry_signals)
long_exit_signals = pd.concat(long_exit_signals)

In [5]:
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    entries=long_entry_signals,
    exits=long_exit_signals,
    freq='1m',
    allow_partial=True
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                 19 days 12:00:00
Start Value                                       100.0
End Value                                     97.249996
Total Return [%]                              -2.750004
Benchmark Return [%]                          48.829769
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                              13.100505
Max Drawdown Duration                  17 days 03:21:00
Total Trades                                        156
Total Closed Trades                                 156
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       50.0
Best Trade [%]                                 2.675946
Worst Trade [%]                               -2

# Mean Reversion 1
1. Open position between 1025 and 1035
1. Go long if z-score less than threshold
1. Close before 1200 or if z-score reverts

In [6]:
window = 30
long_entry_threshold = -3.0
long_exit_threshold = 0

long_entry_signals = []
long_exit_signals = []
for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
    if df_day.shape[0] == 0:
        continue
    
    df_day = df_day.copy()
    df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

    long_entry = pd.Series(False, index=df_day.index)
    long_exit = pd.Series(False, index=df_day.index)

    for time, row in df_day.iterrows():
        time = time.time()

        if long_entry.any():
            if (row.z_score >= long_exit_threshold) or (time == pd.Timestamp('11:59')):
                long_exit[time] = True
                break
        elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
            long_entry[time] = True

    long_entry_signals.append(long_entry)
    long_exit_signals.append(long_exit)

long_entry_signals = pd.concat(long_entry_signals)
long_exit_signals = pd.concat(long_exit_signals)

In [7]:
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    entries=long_entry_signals,
    exits=long_exit_signals,
    freq='1m',
    allow_partial=True
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                 19 days 12:00:00
Start Value                                       100.0
End Value                                    104.306599
Total Return [%]                               4.306599
Benchmark Return [%]                          48.829769
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               2.212927
Max Drawdown Duration                   1 days 02:53:00
Total Trades                                         13
Total Closed Trades                                  13
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  92.307692
Best Trade [%]                                 0.672704
Worst Trade [%]                               -1

In [8]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '04f68e30-efbe-4024-80ca-4a22297eab85',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 27, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 59, tzinfo=<DstTz

# Mean Reversion 2
1. Open position between 1025 and 1035
1. Go long if z-score less than threshold
1. Go short if z-score above threshold
1. Close before 1200 or if z-score reverts

In [18]:
window = 30
long_entry_threshold = -3.0
long_exit_threshold = 0

short_entry_threshold = 2.0
short_exit_threshold = 0

long_entry_signals = []
long_exit_signals = []

short_entry_signals = []
short_exit_signals = []
for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
    if df_day.shape[0] == 0:
        continue
    
    df_day = df_day.copy()
    df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

    long_entry = pd.Series(False, index=df_day.index)
    long_exit = pd.Series(False, index=df_day.index)
    short_entry = pd.Series(False, index=df_day.index)
    short_exit = pd.Series(False, index=df_day.index)

    for time, row in df_day.iterrows():
        time = time.time()

        if long_entry.any():
            if (row.z_score >= long_exit_threshold) or (time == pd.Timestamp('11:59')):
                long_exit[time] = True
                break
        elif short_entry.any():
            if (row.z_score <= short_exit_threshold) or (time == pd.Timestamp('11:59')):
                short_exit[time] = True
                break
        elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
            long_entry[time] = True
        elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score > short_entry_threshold):
            short_entry[time] = True

    long_entry_signals.append(long_entry)
    long_exit_signals.append(long_exit)
    short_entry_signals.append(short_entry)
    short_exit_signals.append(short_exit)

long_entry_signals = pd.concat(long_entry_signals)
long_exit_signals = pd.concat(long_exit_signals)
short_entry_signals = pd.concat(short_entry_signals)
short_exit_signals = pd.concat(short_exit_signals)

In [19]:
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    entries=long_entry_signals,
    exits=long_exit_signals,
    short_entries=short_entry_signals,
    short_exits=short_exit_signals,
    freq='1m',
    allow_partial=True
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                 19 days 12:00:00
Start Value                                       100.0
End Value                                    109.493345
Total Return [%]                               9.493345
Benchmark Return [%]                          48.829769
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                                2.05809
Max Drawdown Duration                   4 days 16:28:00
Total Trades                                         60
Total Closed Trades                                  60
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  66.666667
Best Trade [%]                                 1.013472
Worst Trade [%]                               -1

In [20]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '3974efb2-adb1-4089-8310-01d288aa9015',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 27, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 59, tzinfo=<DstTz