In [1]:
import vectorbt as vbt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objs as go

import datetime

from pydantic import BaseModel, ConfigDict
from statsmodels.tsa.stattools import adfuller

plt.style.use('ggplot')

In [2]:
class Position(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    direction: str = ''
    price: float = 0.0
    time: pd.Timestamp = pd.Timestamp.now()

In [70]:
def process_signals(df, signals):
    df_signal = pd.DataFrame(signals)
    df_result = {}
    for day, df_day in df_signal.groupby(pd.Grouper(level='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        if day.day_name() == 'Wednesday':
            entry_long = df_day[df_day.entries == True]
            entry_short = df_day[df_day.short_entries == True]

        if entry_long.shape[0] != 0:
            trade_exit = df_day[df_day.exits == True].index
            trade_entry = entry_long.index
            position = 'long'
        elif entry_short.shape[0] != 0:
            trade_exit = df_day[df_day.short_exits == True].index
            trade_entry = entry_short.index
            position = 'short'
        else:
            continue
        
        df_price = df[df.datetime.dt.date == day.date()].copy()
        df_price['z_score'] = (df_price.close - df_price.close.rolling(30).mean()) / df_price.close.rolling(30).std()

        entry_price = df.loc[trade_entry].close.values[0]
        exit_price = df.loc[trade_exit].close.values[0]
        entry_z = df_price.loc[trade_entry].z_score.values[0]
        exit_z = df_price.loc[trade_exit].z_score.values[0]

        returns = (exit_price - entry_price) / entry_price
        returns = returns * (1 if position == 'long' else -1)

        df_result[trade_exit.date[0]] = {
            'position': position,
            'entry_time': trade_entry.time[0],
            'exit_time': trade_exit.time[0],
            'entry_price': entry_price,
            'exit_price': exit_price,
            'entry_z': entry_z,
            'exit_z': exit_z,
            'return': returns
        }

    df_result = pd.DataFrame(df_result).T

    return df_result

In [4]:
with pd.HDFStore('./large_files/data_20240328.h5', mode='r') as store:
    df_ = store.get('data/cl')
df_.info(verbose=3)

<class 'pandas.core.frame.DataFrame'>
Index: 2538590 entries, 0 to 2540266
Data columns (total 10 columns):
 #   Column    Dtype                           
---  ------    -----                           
 0   date      object                          
 1   time      object                          
 2   open      float64                         
 3   high      float64                         
 4   low       float64                         
 5   close     float64                         
 6   volume    int64                           
 7   datetime  datetime64[ns, America/New_York]
 8   day       object                          
 9   month     int64                           
dtypes: datetime64[ns, America/New_York](1), float64(4), int64(2), object(3)
memory usage: 213.0+ MB


In [57]:
def mean_reversion5(
    df,
    window=30,
    long_entry_threshold=-3.0,
    long_exit_threshold=0, 
    short_entry_threshold=2.0, 
    short_exit_threshold=0,
    fast_sma=5,
    slow_sma=15
):
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    df_test = []
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        df_day['sma_slow'] = df_day.close.rolling(fast_sma).mean()
        df_day['sma_fast'] = df_day.close.rolling(slow_sma).mean()
        df_day['down'] = df_day.sma_fast.vbt.crossed_below(df_day.sma_slow).replace(True, 'down').replace(False, np.nan)
        df_day['up'] = df_day.sma_fast.vbt.crossed_above(df_day.sma_slow).replace(True, 'up').replace(False, np.nan)

        df_day['direction'] = df_day.up.combine_first(df_day.down).ffill()

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            time = time.time()

            if long_entry.any():
                if ((row.z_score >= long_exit_threshold) and row.direction == 'down') or (time == pd.Timestamp('11:59').time()):
                    long_exit[time] = True
                    break
            elif short_entry.any():
                if ((row.z_score <= short_exit_threshold) and row.direction == 'up') or (time == pd.Timestamp('11:59').time()):
                    short_exit[time] = True
                    break
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
                long_entry[time] = True
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score > short_entry_threshold):
                short_entry[time] = True

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [58]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.day_of_week.isin([2])]
df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
df = df[(df.index.hour >= 9) & (df.index.hour < 12)]

signals = mean_reversion5(df, fast_sma=5, slow_sma=15)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-08 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                 26 days 00:00:00
Start Value                                       100.0
End Value                                    117.861594
Total Return [%]                              17.861594
Benchmark Return [%]                          20.349401
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               1.841567
Max Drawdown Duration                   4 days 16:28:00
Total Trades                                         80
Total Closed Trades                                  80
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       67.5
Best Trade [%]                                 2.183956
Worst Trade [%]                               -1

In [71]:
df_result_mr = process_signals(df, signals)
df_result_mr['position'].value_counts()

position
short    63
long     17
Name: count, dtype: int64

In [8]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '8c46fb66-b632-4c51-9f41-8d39797c0a03',
              'x': array([datetime.datetime(2020, 1, 8, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 8, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 8, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 27, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 59, tzinfo=<DstTz

In [9]:
def mean_reversion5_1(
    df,
    window=30,
    long_entry_threshold=-3.0,
    long_exit_threshold=0, 
    short_entry_threshold=2.0, 
    short_exit_threshold=0,
    fast_sma=5,
    slow_sma=15
):
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    position = None
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        df_day['sma_slow'] = df_day.close.rolling(fast_sma).mean()
        df_day['sma_fast'] = df_day.close.rolling(slow_sma).mean()
        df_day['down'] = df_day.sma_fast.vbt.crossed_below(df_day.sma_slow).replace(True, 'down').replace(False, np.nan)
        df_day['up'] = df_day.sma_fast.vbt.crossed_above(df_day.sma_slow).replace(True, 'up').replace(False, np.nan)

        df_day['direction'] = df_day.up.combine_first(df_day.down).ffill()
        df_day['shift_up'] = (df_day['direction'] == 'up') & (df_day['direction'].shift(1) != 'up')
        df_day['shift_down'] = (df_day['direction'] == 'down') & (df_day['direction'].shift(1) != 'down')

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            if position is not None:
                # within 30 minutes
                if (time - position.time) < datetime.timedelta(minutes=60):
                    # long exit
                    if ((position.direction == 'long') and (row.z_score >= long_exit_threshold) and row.shift_down):
                        long_exit[time] = True
                        position = None
                        break
                    # short exit
                    elif ((position.direction == 'short') and (row.z_score <= short_exit_threshold) and row.shift_up):
                        short_exit[time] = True
                        position = None
                        break
                
                # end of Friday
                elif (time.day_name() == 'Friday') and (time.time() == pd.Timestamp('15:59').time()):
                    if position.direction == 'long': long_exit[time] = True
                    elif position.direction == 'short': short_exit[time] = True
                    position = None
                    break

                # after 30 minutes
                else:
                    returns = (row.close - position.price) / position.price
                    returns = returns * (-1 if position.direction == 'short' else 1)

                    # if position.direction == 'long' and row.shift_down and returns > 0.05:
                    #     long_exit[time] = True
                    #     position = None
                    #     break
                    # elif position.direction == 'short' and row.shift_up and returns <= -0.01:
                    #     short_exit[time] = True
                    #     position = None
                    #     break
                    if returns > 0.05 or returns <= -0.005:
                        if position.direction == 'long': long_exit[time] = True
                        elif position.direction == 'short': short_exit[time] = True
                        position = None
                        break

            # entry signals
            elif (time.day_name() == 'Wednesday') & (time.time() >= pd.Timestamp('10:25').time()) & (time.time() <= pd.Timestamp('10:35').time()):
                if (row.z_score < long_entry_threshold):
                    long_entry[time] = True
                    position = Position(direction='long', price=row.close, time=time)

                elif (row.z_score > short_entry_threshold):
                    short_entry[time] = True
                    position = Position(direction='short', price=row.close, time=time)

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)

        # if day.day_name() == 'Friday':
        #     assert long_entry.sum() == long_exit.sum(), f'Long signals do no match: {day.date()} {long_entry.sum()} {long_exit.sum()}'
        #     assert short_entry.sum() == short_exit.sum(), f'Short signals do no match: {day.date()} {short_entry.sum()} {short_exit.sum()}' 

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [10]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.day_of_week.isin([2, 3, 4])]
df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
df = df[(df.index.hour >= 9) & (df.index.hour < 16)]

signals = mean_reversion5_1(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-02 09:00:00-05:00
End                           2023-12-29 15:59:00-05:00
Period                                177 days 15:00:00
Start Value                                       100.0
End Value                                    124.755835
Total Return [%]                              24.755835
Benchmark Return [%]                          16.748446
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               3.358042
Max Drawdown Duration                  42 days 14:16:00
Total Trades                                         80
Total Closed Trades                                  80
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                      63.75
Best Trade [%]                                 5.115274
Worst Trade [%]                               -1

- 64 / 80 trades were exited within 60 minutes

In [11]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '7bcfc744-e7d6-45c5-8661-314205188768',
              'x': array([datetime.datetime(2020, 1, 2, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 29, 15, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 15, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 15, 59, tzinfo=<DstTz

In [12]:
def mean_reversion5_2(
    df,
    window=30,
    long_entry_threshold=-3.0,
    long_exit_threshold=0, 
    short_entry_threshold=2.0, 
    short_exit_threshold=0,
    fast_sma=5,
    slow_sma=15
):
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    position = None
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        df_day['sma_slow'] = df_day.close.rolling(fast_sma).mean()
        df_day['sma_fast'] = df_day.close.rolling(slow_sma).mean()
        df_day['down'] = df_day.sma_fast.vbt.crossed_below(df_day.sma_slow).replace(True, 'down').replace(False, np.nan)
        df_day['up'] = df_day.sma_fast.vbt.crossed_above(df_day.sma_slow).replace(True, 'up').replace(False, np.nan)

        df_day['direction'] = df_day.up.combine_first(df_day.down).ffill()
        df_day['shift_up'] = (df_day['direction'] == 'up') & (df_day['direction'].shift(1) != 'up')
        df_day['shift_down'] = (df_day['direction'] == 'down') & (df_day['direction'].shift(1) != 'down')

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            if position is not None:
                # within 30 minutes
                if (time - position.time) < datetime.timedelta(minutes=60):
                    # long exit
                    if ((position.direction == 'long') and (row.z_score >= long_exit_threshold) and row.shift_down):
                        long_exit[time] = True
                        position = None
                        break
                    # short exit
                    elif ((position.direction == 'short') and (row.z_score <= short_exit_threshold) and row.shift_up):
                        short_exit[time] = True
                        position = None
                        break
                
                # end of day
                elif (time.time() == pd.Timestamp('15:59').time()):
                    if position.direction == 'long': long_exit[time] = True
                    elif position.direction == 'short': short_exit[time] = True
                    position = None
                    break

                # after 30 minutes
                else:
                    returns = (row.close - position.price) / position.price
                    returns = returns * (-1 if position.direction == 'short' else 1)
                    if returns > 0.05 or returns <= -0.005:
                        if position.direction == 'long': long_exit[time] = True
                        elif position.direction == 'short': short_exit[time] = True
                        position = None
                        break

            # entry signals
            elif (time.time() >= pd.Timestamp('10:25').time()) & (time.time() <= pd.Timestamp('10:35').time()):
                if (row.z_score < long_entry_threshold):
                    long_entry[time] = True
                    position = Position(direction='long', price=row.close, time=time)

                elif (row.z_score > short_entry_threshold):
                    short_entry[time] = True
                    position = Position(direction='short', price=row.close, time=time)

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [13]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.day_of_week.isin([2])]
df = df.loc[df.index.year.isin([2020, 2021, 2022, 2023])]
df = df[(df.index.hour >= 9) & (df.index.hour < 16)]

signals = mean_reversion5_2(df, fast_sma=5, slow_sma=15)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-08 09:00:00-05:00
End                           2023-12-27 15:59:00-05:00
Period                                 60 days 16:00:00
Start Value                                       100.0
End Value                                    117.665815
Total Return [%]                              17.665815
Benchmark Return [%]                          19.443546
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                                3.36053
Max Drawdown Duration                  20 days 22:55:00
Total Trades                                         80
Total Closed Trades                                  80
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       67.5
Best Trade [%]                                 2.183956
Worst Trade [%]                               -1

Overall, closing within the day still gives better Sharpe ratio

In [37]:
# cheat strategy
def cheat_strategy(df):
    window = 30
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)
        
        df_day = df_day.copy()

        # get maximum z score
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()
        df_10 = df_day[(df_day.datetime.dt.time >= pd.Timestamp('10:25').time()) & (df_day.datetime.dt.time <= pd.Timestamp('10:35').time())]

        is_entry = False
        if df_10.loc[df_10.z_score.idxmin(), 'z_score'] < -3:
            is_entry = True
            max_idx = df_10.z_score.idxmin()
            long_entry[max_idx] = True
        elif df_10.loc[df_10.z_score.idxmax(), 'z_score'] > 2:
            is_entry = True
            max_idx = df_10.z_score.idxmax()
            short_entry[max_idx] = True
            
        # exit signal
        if is_entry:
            df_later = df_day[df_day.datetime.dt.time > max_idx.time()]
            if df_day.loc[max_idx, 'z_score'] > 0:
                exit_point = df_later.close.idxmin()
                short_exit[exit_point] = True
            else:
                exit_point = df_later.close.idxmax()
                long_exit[exit_point] = True

        assert long_entry.sum() == long_exit.sum(), f'Long signals do no match: {day}'
        assert short_entry.sum() == short_exit.sum(), f'Short signals do no match: {day}'

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)
        
    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [72]:
df_cheat = df_.set_index('datetime', drop=False)
df_cheat = df_cheat.loc[df_cheat.index.day_of_week.isin([2])]
df_cheat = df_cheat.loc[df_cheat.index.year.isin([2020, 2021, 2022, 2023])]
df_cheat = df_cheat[(df_cheat.index.hour >= 9) & (df_cheat.index.hour < 12)]

signals = cheat_strategy(df_cheat)
portfolio = vbt.Portfolio.from_signals(
    close=df_cheat['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-08 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                 26 days 00:00:00
Start Value                                       100.0
End Value                                    211.579309
Total Return [%]                             111.579309
Benchmark Return [%]                          20.349401
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               2.011015
Max Drawdown Duration                   0 days 09:01:00
Total Trades                                         80
Total Closed Trades                                  80
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       97.5
Best Trade [%]                                 4.274265
Worst Trade [%]                               -0

In [73]:
df_result_cheat = process_signals(df, signals)
df_result_cheat['position'].value_counts()

position
short    62
long     18
Name: count, dtype: int64

In [40]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '110aa554-de1e-4ec1-b92d-0dd9a1c62d63',
              'x': array([datetime.datetime(2020, 1, 8, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 8, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 8, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 27, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 59, tzinfo=<DstTz

With lookahead, we are getting same signals as the mean reversion strategy

In [42]:
df_cheat = df_.set_index('datetime', drop=False)
df_cheat = df_cheat.loc[df_cheat.index.day_of_week.isin([0, 1, 3, 4])]
df_cheat = df_cheat.loc[df_cheat.index.year.isin([2020, 2021, 2022, 2023])]
df_cheat = df_cheat[(df_cheat.index.hour >= 9) & (df_cheat.index.hour < 12)]

signals = cheat_strategy(df_cheat)
portfolio = vbt.Portfolio.from_signals(
    close=df_cheat['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2020-01-02 09:00:00-05:00
End                           2023-12-29 11:59:00-05:00
Period                                100 days 03:00:00
Start Value                                       100.0
End Value                                    615.577806
Total Return [%]                             515.577806
Benchmark Return [%]                            17.1737
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               3.565436
Max Drawdown Duration                   1 days 09:07:00
Total Trades                                        218
Total Closed Trades                                 218
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  97.247706
Best Trade [%]                                 7.107496
Worst Trade [%]                               -0

In [43]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '1081763e-a6ac-472e-a33c-9fbaee9ba8c7',
              'x': array([datetime.datetime(2020, 1, 2, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2020, 1, 2, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 29, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 29, 11, 59, tzinfo=<DstTz

Analyze winning trades

In [77]:
print(df_result_mr[~(df_result_cheat.position == df_result_mr.position)].to_markdown())
print(df_result_cheat[~(df_result_cheat.position == df_result_mr.position)].to_markdown())

|            | position   | entry_time   | exit_time   |   entry_price |   exit_price |   entry_z |   exit_z |    return |
|:-----------|:-----------|:-------------|:------------|--------------:|-------------:|----------:|---------:|----------:|
| 2022-12-07 | short      | 10:28:00     | 10:33:00    |         74.75 |        73.96 |   2.88698 | -3.90988 | 0.0105686 |
|            | position   | entry_time   | exit_time   |   entry_price |   exit_price |   entry_z |   exit_z |     return |
|:-----------|:-----------|:-------------|:------------|--------------:|-------------:|----------:|---------:|-----------:|
| 2022-12-07 | long       | 10:33:00     | 10:38:00    |         73.96 |        74.11 |  -3.90988 | -1.25324 | 0.00202812 |


In [74]:
df_mr_win = df_result_mr[df_result_mr['return'] > 0]
df_cheat_win = df_result_cheat.loc[df_mr_win.index]

entry_diff = df_mr_win.entry_price - df_cheat_win.entry_price
exit_diff = df_mr_win.exit_price - df_cheat_win.exit_price

print(f"""
    Entry Difference: {entry_diff.mean():.3f} {entry_diff.std():.3f}
    Exit Difference: {exit_diff.mean():.3f} {exit_diff.std():.3f}
""")

exit_diff.head()


    Entry Difference: 0.002 0.115
    Exit Difference: 0.254 0.543



2020-01-08   -0.02
2020-02-05    0.19
2020-03-04    1.08
2020-03-11    0.03
2020-03-18    0.66
Name: exit_price, dtype: object

- Entry signals seem ok, pretty close to ideal.
- Exit signal difference can go up to $1.