In [1]:
import vectorbt as vbt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objs as go

import datetime

from pydantic import BaseModel, ConfigDict
from statsmodels.tsa.stattools import adfuller

plt.style.use('ggplot')

In [2]:
class Position(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    direction: str = ''
    price: float = 0.0
    time: pd.Timestamp = pd.Timestamp.now()

In [3]:
with pd.HDFStore('./large_files/data_20240312.h5', mode='r') as store:
    df_ = store.get('data/cl')
df_.info(verbose=3)

<class 'pandas.core.frame.DataFrame'>
Index: 2529084 entries, 525 to 5683617
Data columns (total 10 columns):
 #   Column    Dtype                           
---  ------    -----                           
 0   date      object                          
 1   time      object                          
 2   open      float64                         
 3   high      float64                         
 4   low       float64                         
 5   close     float64                         
 6   volume    int64                           
 7   datetime  datetime64[ns, America/New_York]
 8   day       object                          
 9   month     int64                           
dtypes: datetime64[ns, America/New_York](1), float64(4), int64(2), object(3)
memory usage: 212.2+ MB


In [4]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.day_of_week.isin([2, 3, 4])]
df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
df = df[(df.index.hour >= 9) & (df.index.hour < 16)]

In [5]:
def mean_reversion5(
    df,
    window=30,
    long_entry_threshold=-3.0,
    long_exit_threshold=0, 
    short_entry_threshold=2.0, 
    short_exit_threshold=0,
    fast_sma=5,
    slow_sma=15
):
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    position = None
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        df_day['sma_slow'] = df_day.close.rolling(fast_sma).mean()
        df_day['sma_fast'] = df_day.close.rolling(slow_sma).mean()
        df_day['down'] = df_day.sma_fast.vbt.crossed_below(df_day.sma_slow).replace(True, 'down').replace(False, np.nan)
        df_day['up'] = df_day.sma_fast.vbt.crossed_above(df_day.sma_slow).replace(True, 'up').replace(False, np.nan)

        df_day['direction'] = df_day.up.combine_first(df_day.down).ffill()
        df_day['shift_up'] = (df_day['direction'] == 'up') & (df_day['direction'].shift(1) != 'up')
        df_day['shift_down'] = (df_day['direction'] == 'down') & (df_day['direction'].shift(1) != 'down')

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            if position is not None:
                # within 30 minutes
                if (time - position.time) < datetime.timedelta(minutes=60):
                    # long exit
                    if ((position.direction == 'long') and (row.z_score >= long_exit_threshold) and row.shift_down):
                        long_exit[time] = True
                        position = None
                        break
                    # short exit
                    elif ((position.direction == 'short') and (row.z_score <= short_exit_threshold) and row.shift_up):
                        short_exit[time] = True
                        position = None
                        break
                
                # end of Friday
                elif (time.day_name() == 'Friday') and (time.time() == pd.Timestamp('15:59').time()):
                    if position.direction == 'long': long_exit[time] = True
                    elif position.direction == 'short': short_exit[time] = True
                    position = None
                    break

                # after 30 minutes
                else:
                    returns = (row.close - position.price) / position.price
                    returns = returns * (-1 if position.direction == 'short' else 1)

                    # if position.direction == 'long' and row.shift_down:
                    #     long_exit[time] = True
                    #     position = None
                    #     break
                    # elif position.direction == 'short' and row.shift_up:
                    #     short_exit[time] = True
                    #     position = None
                        # break
                    if returns > 0.05 or returns <= -0.01:
                        if position.direction == 'long': long_exit[time] = True
                        elif position.direction == 'short': short_exit[time] = True
                        position = None
                        break

            # entry signals
            elif (time.day_name() == 'Wednesday') & (time.time() >= pd.Timestamp('10:25').time()) & (time.time() <= pd.Timestamp('10:35').time()):
                if (row.z_score < long_entry_threshold):
                    long_entry[time] = True
                    position = Position(direction='long', price=row.close, time=time)

                elif (row.z_score > short_entry_threshold):
                    short_entry[time] = True
                    position = Position(direction='short', price=row.close, time=time)

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)

        # if day.day_name() == 'Friday':
        #     assert long_entry.sum() == long_exit.sum(), f'Long signals do no match: {day.date()} {long_entry.sum()} {long_exit.sum()}'
        #     assert short_entry.sum() == short_exit.sum(), f'Short signals do no match: {day.date()} {short_entry.sum()} {short_exit.sum()}' 

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [6]:
signals = mean_reversion5(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-02 09:00:00-05:00
End                           2023-12-29 15:59:00-05:00
Period                                177 days 15:00:00
Start Value                                       100.0
End Value                                    122.279088
Total Return [%]                              22.279088
Benchmark Return [%]                          16.748446
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               4.920624
Max Drawdown Duration                  42 days 13:04:00
Total Trades                                         80
Total Closed Trades                                  80
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       65.0
Best Trade [%]                                 5.115274
Worst Trade [%]                               -1

- 64 / 80 trades were exited within 60 minutes

In [7]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '44fd921b-2e76-4bdf-b3f1-287f869aa2c0',
              'x': array([datetime.datetime(2020, 1, 2, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 29, 15, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 15, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 15, 59, tzinfo=<DstTz

In [8]:
df_year = df_.set_index('datetime', drop=False)
df_year = df_year.loc[df_year.index.year.isin([2024])]
df_year = df_year.loc[df_year.index.day_of_week.isin([2, 3, 4])]
df_year = df_year[(df_year.index.hour >= 9) & (df_year.index.hour < 12)]

signals = mean_reversion5(df_year)
portfolio = vbt.Portfolio.from_signals(
    close=df_year['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2024-01-03 09:00:00-05:00
End                           2024-03-01 11:59:00-05:00
Period                                  3 days 09:00:00
Start Value                                       100.0
End Value                                     99.407782
Total Return [%]                              -0.592218
Benchmark Return [%]                          14.419263
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               1.841378
Max Drawdown Duration                   1 days 10:09:00
Total Trades                                          6
Total Closed Trades                                   6
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  66.666667
Best Trade [%]                                 0.573813
Worst Trade [%]                               -1

In [9]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'cdfe30e5-9d9d-4726-a824-9f67e3bac735',
              'x': array([datetime.datetime(2024, 1, 3, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2024, 1, 3, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2024, 1, 3, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2024, 3, 1, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2024, 3, 1, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2024, 3, 1, 11, 59, tzinfo=<DstTzInfo '