In [1]:
import vectorbt as vbt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import io
import pytz
import datetime

plt.style.use('ggplot')

In [3]:
with pd.HDFStore('./large_files/data_20240312.h5', mode='r') as store:
    df_ = store.get('data/cl')
df_.info(verbose=3)

<class 'pandas.core.frame.DataFrame'>
Index: 2529084 entries, 525 to 5683617
Data columns (total 10 columns):
 #   Column    Dtype                           
---  ------    -----                           
 0   date      object                          
 1   time      object                          
 2   open      float64                         
 3   high      float64                         
 4   low       float64                         
 5   close     float64                         
 6   volume    int64                           
 7   datetime  datetime64[ns, America/New_York]
 8   day       object                          
 9   month     int64                           
dtypes: datetime64[ns, America/New_York](1), float64(4), int64(2), object(3)
memory usage: 212.2+ MB


In [4]:
df = df_.set_index('datetime', drop=False)
df = df.loc[df.index.year.isin([2021, 2022])]
df = df.loc[df.index.day_of_week == 2]
df = df[(df.index.hour >= 9) & (df.index.hour < 12)]

In [5]:
df_2023 = df_.set_index('datetime', drop=False)
df_2023 = df_2023.loc[df_2023.index.year.isin([2023])]
df_2023 = df_2023.loc[df_2023.index.day_of_week == 2]
df_2023 = df_2023[(df_2023.index.hour >= 9) & (df_2023.index.hour < 12)]

# Hold

In [6]:
def hold_strategy(df):
    long_entry_signals = []
    long_exit_signals = []
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            time = time.time()

            if (time == pd.Timestamp('11:59').time()):
                long_exit[time] = True
            elif (time == pd.Timestamp('10:30').time()):
                long_entry[time] = True

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals
    }

In [7]:
signals = hold_strategy(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2022-12-28 11:59:00-05:00
Period                                 13 days 00:00:00
Start Value                                       100.0
End Value                                    104.313894
Total Return [%]                               4.313894
Benchmark Return [%]                          56.271254
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               9.331227
Max Drawdown Duration                  10 days 15:21:00
Total Trades                                        104
Total Closed Trades                                 104
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  52.884615
Best Trade [%]                                 2.675946
Worst Trade [%]                               -2

In [8]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '35d258cb-4cd3-46e6-85e1-79066d0991e7',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2022, 12, 28, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 59, tzinfo=<DstTz

# Mean Reversion 1
1. Open position between 1025 and 1035
1. Go long if z-score less than threshold
1. Close before 1200 or if z-score reverts

In [9]:
def mean_reversion1(df, window=30, long_entry_threshold=-3.0, long_exit_threshold=0):
    long_entry_signals = []
    long_exit_signals = []
    
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            time = time.time()

            if long_entry.any():
                if (row.z_score >= long_exit_threshold) or (time == pd.Timestamp('11:59')):
                    long_exit[time] = True
                    break
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
                long_entry[time] = True
        
        if long_entry.any():
            assert long_exit.any(), f'{day} has no exit signal'

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals
    }

In [10]:
signals = mean_reversion1(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2022-12-28 11:59:00-05:00
Period                                 13 days 00:00:00
Start Value                                       100.0
End Value                                    102.703924
Total Return [%]                               2.703924
Benchmark Return [%]                          56.271254
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               2.212927
Max Drawdown Duration                   0 days 14:57:00
Total Trades                                         10
Total Closed Trades                                  10
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       90.0
Best Trade [%]                                 0.617933
Worst Trade [%]                               -1

In [11]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'af00d068-e5e7-4a88-b646-a69da7b0a7f4',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2022, 12, 28, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 59, tzinfo=<DstTz

# Mean Reversion 2
1. Open position between 1025 and 1035
1. Go long if z-score less than threshold
1. Go short if z-score above threshold
1. Close before 1200 or if z-score reverts

In [12]:
def mean_reversion2(df, window=30, long_entry_threshold=-3.0, long_exit_threshold=0, short_entry_threshold=2.0, short_exit_threshold=0):
    long_entry_signals = []
    long_exit_signals = []

    short_entry_signals = []
    short_exit_signals = []
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)
        short_entry = pd.Series(False, index=df_day.index)
        short_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            time = time.time()

            if long_entry.any():
                if (row.z_score >= long_exit_threshold) or (time == pd.Timestamp('11:59')):
                    long_exit[time] = True
                    break
            elif short_entry.any():
                if (row.z_score <= short_exit_threshold) or (time == pd.Timestamp('11:59')):
                    short_exit[time] = True
                    break
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
                long_entry[time] = True
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score > short_entry_threshold):
                short_entry[time] = True

        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)
        short_entry_signals.append(short_entry)
        short_exit_signals.append(short_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)
    short_entry_signals = pd.concat(short_entry_signals)
    short_exit_signals = pd.concat(short_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals,
        'short_entries': short_entry_signals,
        'short_exits': short_exit_signals
    }

In [13]:
signals = mean_reversion2(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2022-12-28 11:59:00-05:00
Period                                 13 days 00:00:00
Start Value                                       100.0
End Value                                    109.192684
Total Return [%]                               9.192684
Benchmark Return [%]                          56.271254
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               1.809733
Max Drawdown Duration                   3 days 18:03:00
Total Trades                                         42
Total Closed Trades                                  42
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  76.190476
Best Trade [%]                                 1.013472
Worst Trade [%]                               -1

In [14]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '5d696f85-961c-4e07-be9c-44a1f31947cb',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2022, 12, 28, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 59, tzinfo=<DstTz

In [15]:
signals = mean_reversion2(df_2023)
portfolio = vbt.Portfolio.from_signals(
    close=df_2023['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2023-01-04 09:00:00-05:00
End                           2023-12-27 11:59:00-05:00
Period                                  6 days 12:00:00
Start Value                                       100.0
End Value                                    100.275346
Total Return [%]                               0.275346
Benchmark Return [%]                          -0.760304
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                                2.05809
Max Drawdown Duration                   4 days 16:28:00
Total Trades                                         18
Total Closed Trades                                  18
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                  44.444444
Best Trade [%]                                 0.672704
Worst Trade [%]                               -0

In [16]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': '4c4d5b23-0257-4537-9db7-96a569599f89',
              'x': array([datetime.datetime(2023, 1, 4, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 1, 4, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 1, 4, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2023, 12, 27, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2023, 12, 27, 11, 59, tzinfo=<DstTz

# Mean Reversion 3
1. Long only, go long when z-score below threshold.
1. Exit when z-score reverts and macd line cross below macd signal line

In [17]:
def mean_reversion3(
    df, window=30, 
    long_entry_threshold=-3.0, 
    long_exit_threshold=0
):
    long_entry_signals = []
    long_exit_signals = []
    
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()
        df_day['macd_line'] = vbt.MACD.run(df_day.close).macd
        df_day['signal_line'] = vbt.MACD.run(df_day.close).signal
        df_day['up'] = df_day.macd_line.vbt.crossed_above(df_day.signal_line)
        df_day['down'] = df_day.macd_line.vbt.crossed_below(df_day.signal_line)

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)

        for time, row in df_day.iterrows():
            time = time.time()

            if long_entry.any():
                if ((row.z_score >= long_exit_threshold) and row.down) or (time == pd.Timestamp('11:59').time()):
                    long_exit[time] = True
                    break
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
                long_entry[time] = True

        if long_entry.any():
            if not long_exit.any():
                print(df_day.shape)
                raise ValueError(f'{day} has no exit signal')
                
        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals
    }

In [18]:
signals = mean_reversion3(df)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2022-12-28 11:59:00-05:00
Period                                 13 days 00:00:00
Start Value                                       100.0
End Value                                    102.601997
Total Return [%]                               2.601997
Benchmark Return [%]                          56.271254
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               2.436223
Max Drawdown Duration                   3 days 20:57:00
Total Trades                                         10
Total Closed Trades                                  10
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       70.0
Best Trade [%]                                 1.238441
Worst Trade [%]                               -1

In [19]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'd4bd0f28-aef1-41d3-8e88-0a3eed6f8ef1',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2022, 12, 28, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 59, tzinfo=<DstTz

# Mean Reversion 4
1. Long only, go long when z-score below threshold.
1. Exit when z-score reverts and (fast sma cross below slow sma)

In [43]:
def mean_reversion4(
    df, 
    window=30, 
    long_entry_threshold=-3.0, 
    long_exit_threshold=0,
    fast_sma=10,
    slow_sma=30
):
    long_entry_signals = []
    long_exit_signals = []
    
    for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
        if df_day.shape[0] == 0:
            continue
        
        df_day = df_day.copy()
        df_day['z_score'] = (df_day.close - df_day.close.rolling(window).mean()) / df_day.close.rolling(window).std()
        df_day['sma_slow'] = df_day.close.rolling(fast_sma).mean()
        df_day['sma_fast'] = df_day.close.rolling(slow_sma).mean()
        df_day['down'] = df_day.sma_fast.vbt.crossed_below(df_day.sma_slow).replace(True, 'down').replace(False, np.nan)
        df_day['up'] = df_day.sma_fast.vbt.crossed_above(df_day.sma_slow).replace(True, 'up').replace(False, np.nan)

        df_day['direction'] = df_day.up.combine_first(df_day.down).ffill()

        long_entry = pd.Series(False, index=df_day.index)
        long_exit = pd.Series(False, index=df_day.index)

        time_threshold = pd.Timestamp('11:59').time()
        for date_time, row in df_day.iterrows():
            time = date_time.time()

            if long_entry.any():
                if ((row.z_score >= long_exit_threshold) and (row.direction == 'down')) or (time == time_threshold):
                    long_exit[time] = True
                    break
            elif (time >= pd.Timestamp('10:25').time()) & (time <= pd.Timestamp('10:35').time()) & (row.z_score < long_entry_threshold):
                long_entry[time] = True
                # time_threshold = (date_time + datetime.timedelta(minutes=hold_time)).time()

        if long_entry.any():
            if not long_exit.any():
                print(df_day.shape)
                raise ValueError(f'{day} has no exit signal')
                
        long_entry_signals.append(long_entry)
        long_exit_signals.append(long_exit)

    long_entry_signals = pd.concat(long_entry_signals)
    long_exit_signals = pd.concat(long_exit_signals)

    return {
        'entries': long_entry_signals,
        'exits': long_exit_signals
    }

In [45]:
signals = mean_reversion4(df, fast_sma=10, slow_sma=30)
portfolio = vbt.Portfolio.from_signals(
    close=df['close'],
    freq='1m',
    allow_partial=True,
    **signals
)
portfolio.stats()

Start                         2021-01-06 09:00:00-05:00
End                           2022-12-28 11:59:00-05:00
Period                                 13 days 00:00:00
Start Value                                       100.0
End Value                                    102.666483
Total Return [%]                               2.666483
Benchmark Return [%]                          56.271254
Max Gross Exposure [%]                            100.0
Total Fees Paid                                     0.0
Max Drawdown [%]                               2.212927
Max Drawdown Duration                   3 days 20:57:00
Total Trades                                         10
Total Closed Trades                                  10
Total Open Trades                                     0
Open Trade PnL                                      0.0
Win Rate [%]                                       70.0
Best Trade [%]                                    0.944
Worst Trade [%]                               -1

In [46]:
portfolio.plot()

FigureWidget({
    'data': [{'legendgroup': '0',
              'line': {'color': '#1f77b4'},
              'name': 'Close',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'ea9dee4b-119e-433a-b0e2-434ca4e2d355',
              'x': array([datetime.datetime(2021, 1, 6, 9, 0, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 1, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2021, 1, 6, 9, 2, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          ...,
                          datetime.datetime(2022, 12, 28, 11, 57, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 58, tzinfo=<DstTzInfo 'America/New_York' EST-1 day, 19:00:00 STD>),
                          datetime.datetime(2022, 12, 28, 11, 59, tzinfo=<DstTz