In [16]:
contract = 'MES'

In [17]:
import pandas as pd

df = pd.read_parquet(f'../futures_data/{contract}.parquet')
df.loc[:, 'date'] = df.time.dt.date
df.loc[:, 'adj_close'] = df.close
df.loc[:, 'adj_high'] = df.high
df.loc[:, 'adj_low'] = df.low
df.loc[:, 'adj_open'] = df.open
df.loc[:, 'Volume'] = df.volume

In [18]:
import ta
import numpy as np

def mean_atr(df, atr_period=14):
    df.loc[:, 'last_close'] = df.adj_close.shift(1)
    atr_high = np.maximum(df.high_adj, df.last_close)
    atr_low = np.minimum(df.high_adj, df.last_close) 
    atr = atr_high - atr_low
    return atr, atr.ewm(span=atr_period, adjust=False).mean()

def mean_close_diff_norm(close, ma):
    return (close - ma) / ma

def manual_mfi(df, period):
    mfi_df = df[['adj_close', 'time', 'Volume']].set_index('time')
    mfi_df.loc[:, 'prev_close'] = mfi_df.adj_close.shift(1)
    mfi_df.loc[:, 'perc_change'] = (mfi_df.prev_close - mfi_df.adj_close).abs() / mfi_df.adj_close
    up_index = mfi_df.adj_close > mfi_df.prev_close
    down_index = mfi_df.adj_close < mfi_df.prev_close
    mfi_df.loc[:, 'avg_up'] = 0
    mfi_df.loc[:, 'avg_down'] = 0
    mfi_df.loc[up_index, 'avg_up'] = mfi_df.loc[up_index, 'perc_change'] * mfi_df.loc[up_index, 'Volume']
    mfi_df.loc[down_index, 'avg_down'] = mfi_df.loc[down_index, 'perc_change'] * mfi_df.loc[down_index, 'Volume']
    mfi_df.loc[:, 'avg_up'] = mfi_df.loc[:, 'avg_up'].ewm(alpha=1.0 / period, adjust=False).mean()
    mfi_df.loc[:, 'avg_down'] = mfi_df.loc[:, 'avg_down'].ewm(alpha=1.0 / period, adjust=False).mean()
    mfi = (100.0 - (100.0 / (1 + (mfi_df.avg_up / mfi_df.avg_down)))).values
    return mfi


def generate_ta_features(sym_df,
                         macd_n_slow: int = 26, macd_n_fast: int = 12, macd_n_sign: int = 9,
                         rsi_period=5, roc_period=45, roc_short_period=4, fut_roc_period=5, mfi_period=5,
                         sto_period=14, atr_period=14, volitility_short_period=2, volitility_medium_period=7, bba_period=20,
                         dch_period=20, ma_period=14):
    sym_df = sym_df.sort_values('time')
    sym_df.loc[:, 'mv_avg'] = sym_df.set_index(
        'time'
    ).adj_close.rolling(ma_period, min_periods=1).mean().values
    macd = ta.trend.MACD(sym_df.adj_close, n_slow=macd_n_slow, n_fast=macd_n_fast, n_sign=macd_n_sign)
    rsi = ta.momentum.RSIIndicator(close=sym_df.adj_close, n=rsi_period).rsi()
    roc = ta.momentum.ROCIndicator(sym_df.adj_close, n=roc_period).roc()
    roc_short = ta.momentum.ROCIndicator(sym_df.adj_close, n=roc_short_period).roc()
    roc_fut = ta.momentum.ROCIndicator(sym_df.adj_close, n=fut_roc_period).roc()
    '''
    mfi = ta.momentum.MFIIndicator(
        high=sym_df.adj_high, low=sym_df.adj_low,
        close=sym_df.adj_close, volume=sym_df.Volume,
        n=mfi_period
    ).money_flow_index()
    '''
    sto = ta.momentum.StochasticOscillator(high=sym_df.adj_high, low=sym_df.adj_low, close=sym_df.adj_close,
                                     n=sto_period).stoch_signal()
    bb = ta.volatility.BollingerBands(close=sym_df.adj_close, n=bba_period)
    bb_high = bb.bollinger_hband()
    bb_low = bb.bollinger_lband()
    bba = bb_high - bb_low
    dc = ta.volatility.DonchianChannel(close=sym_df.adj_close, n=dch_period, high=sym_df.adj_high,
                                       low=sym_df.adj_low)
    dc_high = dc.donchian_channel_hband()
    dc_low = dc.donchian_channel_lband()
    dch = dc_high - dc_low
    sym_df.loc[:, 'dc_high'] = dc_high
    sym_df.loc[:, 'dc_low'] = dc_low
    sym_df.loc[:, 'dc_perc'] = ((df.adj_close - dc_low) / dch) * 100
    sym_df.loc[:, 'macd'] = macd.macd()
    sym_df.loc[:, 'macd_signal'] = macd.macd_signal()
    sym_df.loc[:, 'macd_hist'] = macd.macd_diff()
    sym_df.loc[:, 'rsi'] = rsi
    sym_df.loc[:, 'roc'] = roc
    sym_df.loc[:, 'roc_short'] = roc_short
    sym_df.loc[:, 'fut_roc'] = roc_fut.shift(-fut_roc_period)
    #sym_df.loc[:, 'mfi'] = mfi
    sym_df.loc[:, 'sto'] = sto
    sym_df.loc[:, 'bba'] = bba
    sym_df.loc[:, 'dch'] = dch
    sym_df.loc[:, 'bba_norm'] = bba / sym_df.adj_close
    sym_df.loc[:, 'dch_norm'] = dch / sym_df.adj_close
    sym_df.loc[:, 'ma_diff_norm'] = mean_close_diff_norm(sym_df.adj_close, sym_df.mv_avg)
    volatility_base = (sym_df.adj_close.diff() / sym_df.adj_close.shift(1)).abs()
    sym_df.loc[:, 'volatility'] = volatility_base.ewm(span=atr_period, adjust=False).mean()
    sym_df.loc[:, 'volatility_short'] = volatility_base.ewm(span=volitility_short_period, adjust=False).mean()
    sym_df.loc[:, 'volatility_medium'] = volatility_base.ewm(span=volitility_medium_period, adjust=False).mean()
    sym_df.loc[:, 'directional_strength'] = -(sym_df.adj_close.diff() / sym_df.adj_close.shift(1)).ewm(span=atr_period,
                                                                                                      adjust=False).mean()
    return sym_df

In [19]:
ta_feats = generate_ta_features(df, rsi_period=(12), dch_period=(12 * 4), ma_period=(12 * 24 * 3))

In [20]:
ta_feats.to_parquet(f'../futures_ta_data/{contract}_ta_data.parquet')