### Load Data From Parquet

In [1]:
import pandas as pd

df = pd.read_parquet(f'../futures_ta_data/MES_stream_ta_data3.parquet')

In [2]:
df.loc[:, 'time'] = df.date.apply(lambda x: pd.Timestamp(x))

In [3]:
from datetime import timedelta

df.loc[:, 'date'] = df.time.dt.floor('d')
df.loc[:, 'hour'] = df.time.dt.hour
df.loc[:, 'minute'] = df.time.dt.minute
df.loc[:, 'following_open'] = df.loc[:, 'date']
df.loc[:, 'close_tm'] = df.loc[:, 'date']

# before midnight group
before_midnight = (df.hour >= 16)
# after midnit group
after_midnight = (df.hour < 9) | ((df.hour == 9) & (df.minute < 30))

df.loc[before_midnight, 'close_tm'] = df.loc[before_midnight, 'date'] + pd.Timedelta(hours=16)
df.loc[before_midnight, 'following_open'] = df.loc[before_midnight, 'date'] + pd.Timedelta(days=1, hours=9, minutes=30) 

df.loc[after_midnight, 'close_tm'] = df.loc[after_midnight, 'date'] - pd.Timedelta(hours=8)
df.loc[after_midnight, 'following_open'] = df.loc[after_midnight, 'date'] + pd.Timedelta(hours=9, minutes=30)

df.loc[:, 'after_hours'] = 0
df.loc[df.time.between(df.close_tm, df.following_open), 'after_hours'] = 1

In [4]:
df.loc[:, 'sto_range'] = df.sto_high - df.sto_low

In [5]:
df.sto_range.describe()

count    40950.000000
mean        44.961905
std         19.835110
min          0.000000
25%         31.750000
50%         41.000000
75%         55.000000
max        114.000000
Name: sto_range, dtype: float64

In [6]:
df.loc[df.sto_range.argmax()]

date              2020-07-15 00:00:00
open                          3230.75
high                             3233
low                            3230.5
close                         3231.25
volume                           3436
rsi                           73.4704
adx                           21.1637
mean_pr                       3231.25
sma_pr                        3210.57
ewa_pr                        3228.93
pr_diff_ewa                  0.074924
volume_roc                      1.063
sto                           98.4649
sto_high                         3233
sto_low                          3119
time              2020-07-15 08:54:00
hour                                8
minute                             54
following_open    2020-07-15 09:30:00
close_tm          2020-07-14 16:00:00
after_hours                         1
sto_range                         114
Name: 10449, dtype: object

In [7]:
after_hrs_df = df.loc[df.time.between(df.close_tm, df.following_open)].iloc[:5000]

In [None]:
after_hrs_df = after_hrs_df.sort_values('time')

In [None]:
after_hrs_df.loc[:, 'tooltip'] = 'sto: ' + after_hrs_df.sto.astype(str) + '\nsto range:' + after_hrs_df.sto_range.astype(str) \
                                 + 'sto high: ' + after_hrs_df.sto_high.astype(str) + ' sto low:' + after_hrs_df.sto_low.astype(str)

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(
    go.Candlestick(
        x=after_hrs_df.time, open=after_hrs_df.open, high=after_hrs_df.high,
        low=after_hrs_df.low, close=after_hrs_df.close, name='bars',
        text=after_hrs_df.tooltip
    )
)
fig.update_layout(title=f'Contract ', xaxis_rangeslider_visible=False)

In [None]:
min_max = after_hrs_df.groupby(['following_open', 'close_tm']).agg({'high': 'max', 'low': 'min'})

In [None]:
min_max.loc[:, 'range'] = min_max.high - min_max.low

In [None]:
min_max.range.describe(percentiles=[x * 0.1 for x in range(10)])

In [5]:
for col in ['open', 'close', 'high', 'low']:
    df.loc[:, f'adj_{col}'] = df[col]

In [6]:
def get_entrances(df, sto_thresh_low=20):
    df.loc[:, 'sto_oversold_enter'] = 0
    bool_index = (df.sto < sto_thresh_low)
    bool_index &= (df.rsi < 55)
    bool_index &= ((df.sto_high - df.sto_low) >= 35)
    #bool_index &= (df.after_hours == 1)
    df.loc[bool_index, 'sto_oversold_enter'] = 1
    enter_cols = ['sto_oversold_enter']
    df.loc[:, 'entrances'] = df[enter_cols].sum(axis=1).clip(upper=1)
    return df

In [7]:
def get_exits(df, sto_overbought=101):
    df.loc[:, 'sto_overbought_exit'] = 0
    df.loc[df.rsi >= sto_overbought, 'sto_overbought_exit'] = 1
    exit_cols = ['sto_overbought_exit']
    df.loc[:, 'exits'] = df[exit_cols].sum(axis=1).clip(upper=1)
    return df

In [8]:
df_enter_exit = get_entrances(get_exits(df))

In [9]:
from numba import jit
import numpy as np

def backtest_seq(df, stop_thresh=0.1, run_length=np.inf,
                 prof_avg_offset=30, ewm_prof_offset=100,
                 profit_taker=30):
    df.loc[:, 'enter_exit_sig'] = df.entrances - df.exits
    df.loc[:, 'next_open'] = df.adj_open.shift(-1)
    profit, actual_enter_exit, shares_arr = backtest_numba(
        df.enter_exit_sig.values, df.adj_close.values,
        df.next_open.values, stop_thresh, run_length,
        profit_taker=profit_taker
    )
    df.loc[:, 'profit'] = profit
    df.loc[:, 'cum_profit'] = df.profit.fillna(0).cumsum()
    df.loc[:, 'purch_shares'] = shares_arr
    df.loc[:, 'norm_profit'] = profit / (df.next_open * shares_arr)
    df.loc[df.profit == 0, 'norm_profit'] = np.nan
    df.loc[:, 'avg_profit'] = df.norm_profit.rolling(prof_avg_offset, min_periods=1).mean()
    df.loc[:, 'avg_profit_std'] = df.norm_profit.rolling(prof_avg_offset, min_periods=1).std()
    df.loc[:, 'eavg_profit'] = df.avg_profit.ewm(ewm_prof_offset, ignore_na=True).mean()
    df.loc[:, 'avg_profit'] = df.avg_profit.fillna(0)
    df.loc[:, 'actual_enter_exit'] = actual_enter_exit
    df.loc[:, 'actual_enter'] = 0
    df.loc[:, 'actual_exit'] = 0
    df.loc[df.actual_enter_exit == 1, 'actual_enter'] = 1
    df.loc[df.actual_enter_exit == -1, 'actual_exit'] = 1
    df.loc[:, 'trade_count'] = df.actual_enter_exit.rolling(prof_avg_offset).sum()
    return df
    

@jit(nopython=True)
def backtest_numba(enter_exit, close_price, open_price, stop_thresh,
                   run_length, profit_taker=30):
    in_trade = False
    n = len(enter_exit)
    actual_enter_exit = np.zeros(n)
    shares_arr = np.zeros(n)
    profit = np.zeros(n)
    start_price = 0.0
    top_price = start_price
    shares = 0
    for index in range(0, n):
        signal = enter_exit[index]
        if in_trade and close_price[index] > top_price:
            top_price = close_price[index]
        if not in_trade and signal == 1:
            enter_price = open_price[index]
            start_price = close_price[index]
            top_price = start_price
            shares = 1
            shares_arr[index] = shares
            shares_cost = enter_price * shares
            actual_enter_exit[index] = 1
            in_trade = True
            enter_index = index
        elif in_trade and ((signal == -1) or ((index - enter_index) >= run_length)):
            profit[enter_index] = (open_price[index] - enter_price) * shares
            actual_enter_exit[index] = -1
            in_trade = False
        #elif in_trade and ((top_price - close_price[index]) / top_price) >= stop_thresh:
        elif in_trade and (((start_price - close_price[index]) >= stop_thresh) or ((close_price[index] - start_price) > profit_taker)):
            profit[enter_index] = (open_price[index] - enter_price) * shares
            actual_enter_exit[index] = -1
            in_trade = False
        elif index == (n - 1) and in_trade:
            profit[enter_index] = (open_price[index] - enter_price) * shares
            actual_enter_exit[index] = -1
            in_trade = False
    return profit, actual_enter_exit, shares_arr

In [10]:
df_profits1 = backtest_seq(df_enter_exit, stop_thresh=10.0, profit_taker=10)

In [11]:
import numpy as np

def get_profit_metrics(df_profits):
    wins_losses = {}
    col_name = 'profit'
    win_index = df_profits[col_name] > 0
    loss_index = df_profits[col_name] < 0
    mean_win = df_profits.loc[win_index, col_name].mean()
    mean_loss = df_profits.loc[loss_index, col_name].mean()
    mean_norm_profit_win = df_profits.loc[win_index, 'norm_profit'].mean()
    mean_norm_profit_loss = df_profits.loc[loss_index, 'norm_profit'].mean()
    mean_norm_profit = df_profits.norm_profit.mean()
    sum_win = df_profits.loc[win_index, col_name].sum()
    sum_loss = df_profits.loc[loss_index, col_name].sum()
    
    wins_losses[col_name] = [win_index.sum(), loss_index.sum(), win_index.sum() + loss_index.sum(),
                             mean_win, mean_loss,
                             mean_norm_profit_win, mean_norm_profit_loss,
                             mean_norm_profit,
                             sum_win, sum_loss
                            ]

    df_win_loss = pd.DataFrame(wins_losses, index=['wins', 'losses', 'ttl_trades', 'mean_win',
                                                   'mean_loss',
                                                   'mean_norm_profit_win', 'mean_norm_profit_loss',
                                                   'mean_norm_profit',
                                                   'ttl_win', 'ttl_loss']).transpose()
    df_win_loss.loc[:, 'win_loss_rate'] =  df_win_loss.wins / (df_win_loss.losses + df_win_loss.wins)
    df_win_loss.loc[:, 'win_loss_ratio'] = df_win_loss.mean_win / np.abs(df_win_loss.mean_loss)
    
    df_win_loss.loc[:, 'profit_factor'] = df_win_loss.ttl_win / np.abs(df_win_loss.ttl_loss)
    df_win_loss.loc[:, 'net_profit'] = df_win_loss.ttl_win + df_win_loss.ttl_loss
    return df_win_loss

In [12]:
df_win_loss = get_profit_metrics(df_profits1)

In [53]:
df_win_loss

Unnamed: 0,wins,losses,ttl_trades,mean_win,mean_loss,mean_norm_profit_win,mean_norm_profit_loss,mean_norm_profit,ttl_win,ttl_loss,win_loss_rate,win_loss_ratio,profit_factor,net_profit
profit,21.0,11.0,32.0,11.178571,-10.795455,0.003511,-0.003387,0.00114,234.75,-118.75,0.65625,1.035489,1.976842,116.0


In [24]:
df_win_loss

Unnamed: 0,wins,losses,ttl_trades,mean_win,mean_loss,mean_norm_profit_win,mean_norm_profit_loss,mean_norm_profit,ttl_win,ttl_loss,win_loss_rate,win_loss_ratio,profit_factor,net_profit
profit,20.0,11.0,31.0,11.3375,-10.818182,0.003568,-0.003394,0.001097,226.75,-119.0,0.645161,1.048004,1.905462,107.75


In [13]:
df_win_loss

Unnamed: 0,wins,losses,ttl_trades,mean_win,mean_loss,mean_norm_profit_win,mean_norm_profit_loss,mean_norm_profit,ttl_win,ttl_loss,win_loss_rate,win_loss_ratio,profit_factor,net_profit
profit,38.0,28.0,66.0,11.236842,-10.839286,0.003533,-0.003394,0.000594,427.0,-303.5,0.575758,1.036677,1.406919,123.5


In [None]:
df_profits1.to_parquet('profits/STOAtNight.parquet')

In [None]:
from scipy.stats import binom

sum(binom.pmf(x, 100, 0.60) for x in range(51, 101))

In [None]:
sum(binom.pmf(x, 100, 0.55) for x in range(51, 101))

In [None]:
binom.pmf(3, 3, 1 - 0.65)