In [1]:
import pandas as pd
import numpy as np

In [2]:
df_nq = pd.read_feather('data/NQ_2017_2023.feather')
display(df_nq)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-02 17:52:00-05:00,4876.75,4890.25,4876.75,4888.00,488
2017-01-02 17:53:00-05:00,4888.00,4888.50,4887.00,4887.00,90
2017-01-02 17:54:00-05:00,4887.25,4888.00,4886.75,4887.75,70
2017-01-02 17:55:00-05:00,4887.75,4888.00,4887.50,4888.00,40
2017-01-02 17:56:00-05:00,4887.50,4890.00,4887.50,4890.00,89
...,...,...,...,...,...
2023-12-29 23:17:00-05:00,17027.50,17029.75,17026.00,17027.50,400
2023-12-29 23:18:00-05:00,17028.00,17033.25,17026.25,17031.25,548
2023-12-29 23:19:00-05:00,17032.00,17035.50,17030.75,17034.25,465
2023-12-29 23:20:00-05:00,17033.75,17039.50,17033.25,17038.00,792


In [3]:

def count_moves(df, tick_size):
    df['body_size'] = (abs(df['close'] - df['open']) /tick_size).astype(int)
    df['top_wick'] = ((df['high'] - df[['open', 'close']].max(axis=1)) /tick_size).astype(int)
    df['bottom_wick'] = ((df[['open', 'close']].min(axis=1) - df['low']) /tick_size).astype(int)
    df['direction'] = np.sign(df['close'] - df['open']).astype(int)
    df['gap_size'] = (abs(df['open'] - df['close'].shift(1)) /tick_size)
    df['gap_direction'] = np.sign(df['open'] - df['close'].shift(1))
    df.fillna(0, inplace=True)
    df['gap_size'] = df['gap_size'].astype(int)
    df['gap_direction'] = df['gap_direction'].astype(int)
    df['hour_start'] = (df.index.minute == 0).astype(int)
    df['15min_start'] = (((df.index.minute % 15) == 0).astype(int) * (df.index.minute // 15)).isin(range(1,4)).astype(int)
    df['5min_start'] = (((df.index.minute % 5) == 0).astype(int) * (df.index.minute // 5)).isin(range(1,12)).astype(int)
    df['volume_per_tick'] = np.where((df['body_size'] + df['top_wick'] + df['bottom_wick']) >0,np.ceil(df['volume'] / (df['body_size'] + df['top_wick'] + df['bottom_wick'])), 0)
    return df


df_nq = count_moves(df_nq, 0.25)

In [15]:
f"{df_nq['volume_per_tick'].min():.2e}"

'0.00e+00'

In [6]:
df_nq

Unnamed: 0_level_0,open,high,low,close,volume,body_size,top_wick,bottom_wick,direction,gap_size,gap_direction,hour_start,15min_start,5min_start,volume_per_tick
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-01-02 17:52:00-05:00,4876.75,4890.25,4876.75,4888.00,488,45,9,0,1,0,0,0,0,0,10.0
2017-01-02 17:53:00-05:00,4888.00,4888.50,4887.00,4887.00,90,4,2,0,-1,0,0,0,0,0,15.0
2017-01-02 17:54:00-05:00,4887.25,4888.00,4886.75,4887.75,70,2,1,2,1,1,1,0,0,0,14.0
2017-01-02 17:55:00-05:00,4887.75,4888.00,4887.50,4888.00,40,1,0,1,1,0,0,0,0,1,20.0
2017-01-02 17:56:00-05:00,4887.50,4890.00,4887.50,4890.00,89,10,0,0,1,2,-1,0,0,0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29 23:17:00-05:00,17027.50,17029.75,17026.00,17027.50,400,0,9,6,0,1,1,0,0,0,27.0
2023-12-29 23:18:00-05:00,17028.00,17033.25,17026.25,17031.25,548,13,8,7,1,2,1,0,0,0,20.0
2023-12-29 23:19:00-05:00,17032.00,17035.50,17030.75,17034.25,465,9,5,5,1,3,1,0,0,0,25.0
2023-12-29 23:20:00-05:00,17033.75,17039.50,17033.25,17038.00,792,17,6,2,1,2,-1,0,0,1,32.0


In [7]:
def create_move(row, tick_size):
    if row['hour_start'] == 1:
        move = 'H'
    elif row['15min_start'] == 1:
        move = 'Q'
    elif row['5min_start'] == 1:
        move = 'F'
    else:
        move = 'M'
        
    if row['gap_direction'] == 1: # if there is gap up after the candle
        move = move + 'G' * int(row['gap_size'])
    elif row['gap_direction'] == -1: # if there is gap down after the candle
        move =  move + 'g' * int(row['gap_size'])

    # if row['direction'] == 1 or row['direction'] == 0: # green candle. Assume doji is green
    #     move =  move + 'D' * int(row['bottom_wick']) + 'U' * int(row['bottom_wick'] + row['body_size'] + row['top_wick']) + 'D' * int(row['top_wick'])
    # else: # red candle
    #     move = move + 'U' * int(row['bottom_wick']) + 'D' * int(row['bottom_wick'] + row['body_size'] + row['top_wick']) + 'U' * int(row['top_wick'])
    if row['direction'] == 1 or row['direction'] == 0: # green candle. Assume doji is green
        move =  move + 'D' * (int(row['bottom_wick'])//4) + 'd' * (int(row['bottom_wick'])%4) + \
        'U' * (int(row['bottom_wick'] + row['body_size'] + row['top_wick'])//4)  + 'u' * (int(row['bottom_wick'] + row['body_size'] + row['top_wick'])%4) + \
            'D' * (int(row['top_wick'])//4) + 'd' * (int(row['top_wick'])%4)
    else: # red candle
        move = move + 'U' * (int(row['bottom_wick'])//4) + 'u' * (int(row['bottom_wick'])%4) + \
        'D' * (int(row['bottom_wick'] + row['body_size'] + row['top_wick'])//4)  + 'd' * (int(row['bottom_wick'] + row['body_size'] + row['top_wick'])%4) + \
            'U' * (int(row['top_wick'])//4) + 'u' * (int(row['top_wick'])%4)
    
    return move + 'E'

def create_move1(row, tick_size):
    if row['hour_start'] == 1:
        move = 'H'
    elif row['15min_start'] == 1:
        move = 'Q'
    elif row['5min_start'] == 1:
        move = 'F'
    else:
        move = 'M'

    if row['gap_direction'] == 1: # if there is gap up after the candle
        move = move + 'G' + str(int(row['gap_size'])) + ''
    elif row['gap_direction'] == -1: # if there is gap down after the candle
        move =  move + 'g' + str(int(row['gap_size'])) + ''

    if row['direction'] == 1 or row['direction'] == 0: # green candle. Assume doji is green
        move =  move + 'D' + str(int(row['bottom_wick'])) + 'U' + str(int(row['bottom_wick'] + row['body_size'] + row['top_wick'])) + 'D' + str(int(row['top_wick'])) + ''
    else: # red candle
        move = move + 'U' + str(int(row['bottom_wick'])) + 'D' + str(int(row['bottom_wick'] + row['body_size'] + row['top_wick'])) + 'U' + str(int(row['top_wick'])) + ''
    
    move = move + 'V' + str(int(row['volume_per_tick'])) + 'E'
    return move


df_nq['move'] = df_nq.apply(lambda row: create_move1(row, 0.25), axis=1)


In [15]:
df_nq['global_time'] = "W" + df_nq.index.weekday.astype(str) + "H" + df_nq.index.hour.astype(str) 

In [16]:
df_nq

Unnamed: 0_level_0,open,high,low,close,volume,body_size,top_wick,bottom_wick,direction,gap_size,gap_direction,hour_start,15min_start,5min_start,volume_per_tick,move,global_time
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-01-02 17:52:00-05:00,4876.75,4890.25,4876.75,4888.00,488,45,9,0,1,0,0,0,0,0,10.0,MD0U54D9V10E,W0H17
2017-01-02 17:53:00-05:00,4888.00,4888.50,4887.00,4887.00,90,4,2,0,-1,0,0,0,0,0,15.0,MU0D6U2V15E,W0H17
2017-01-02 17:54:00-05:00,4887.25,4888.00,4886.75,4887.75,70,2,1,2,1,1,1,0,0,0,14.0,MG1D2U5D1V14E,W0H17
2017-01-02 17:55:00-05:00,4887.75,4888.00,4887.50,4888.00,40,1,0,1,1,0,0,0,0,1,20.0,FD1U2D0V20E,W0H17
2017-01-02 17:56:00-05:00,4887.50,4890.00,4887.50,4890.00,89,10,0,0,1,2,-1,0,0,0,9.0,Mg2D0U10D0V9E,W0H17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-29 23:17:00-05:00,17027.50,17029.75,17026.00,17027.50,400,0,9,6,0,1,1,0,0,0,27.0,MG1D6U15D9V27E,W4H23
2023-12-29 23:18:00-05:00,17028.00,17033.25,17026.25,17031.25,548,13,8,7,1,2,1,0,0,0,20.0,MG2D7U28D8V20E,W4H23
2023-12-29 23:19:00-05:00,17032.00,17035.50,17030.75,17034.25,465,9,5,5,1,3,1,0,0,0,25.0,MG3D5U19D5V25E,W4H23
2023-12-29 23:20:00-05:00,17033.75,17039.50,17033.25,17038.00,792,17,6,2,1,2,-1,0,0,1,32.0,Fg2D2U25D6V32E,W4H23


In [40]:
df_nq[df_nq['move'].str.contains('Mg3D0U5D0V0E')]

Unnamed: 0_level_0,open,high,low,close,volume,body_size,top_wick,bottom_wick,direction,gap_size,gap_direction,hour_start,15min_start,5min_start,volume_per_tick,move
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1


In [31]:
#caclulame max move str length
df_nq['move'].str.len().median()

13.0

In [34]:
df_nq['move'].to_csv('data/NQ_moves_full_vol.txt', header=False, index=False)