In [58]:
import pandas as pd
import numpy as np
import torch

df = pd.read_feather('data/nq17-23_1min.feather')
df.index = df.index - pd.Timedelta(minutes=1)
#df = df[(df.index >= '2022-04-01') & (df.index < '2022-05-01')]


In [59]:
6/0.25//4*4

24.0

In [60]:
tick_size = 0.25

df['bottom_wick'] = (((df[['open', 'close']].min(axis=1)- df['low']) / tick_size).astype(int))
df['top_wick'] = ((df['high'] - df[['open', 'close']].max(axis=1)) / tick_size).astype(int)
df['body'] = ((df['close'] - df['open']).abs() / tick_size).astype(int)
df['open_gap'] = ((df['open'] - df['close'].shift(1)) / tick_size).fillna(0).astype(int)

df['1m_start'] = 0
df['5m_start'] = np.where(df.index.minute % 5 == 0, 1, np.nan)
df['15m_start'] = np.where(df.index.minute % 15 == 0,2, np.nan)
df['1h_start'] = np.where(df.index.minute == 0, 3, np.nan)
df['1d_start'] = np.where((df.index.hour == 0) & (df.index.minute == 0), 4, np.nan)

df['lon_start'] = np.where((df.index.hour == 3) & (df.index.minute == 00), 5, np.nan)
df['ny_start'] = np.where((df.index.hour == 9) & (df.index.minute == 30), 6, np.nan)
df['session_start'] = np.where((df.index.hour == 18) & (df.index.minute == 00) & (df.index.day_of_week.isin([6,0,1,2,3])), 7, np.nan)
df['week_start'] = np.where((df.index.hour == 18) & (df.index.minute == 00) & (df.index.day_of_week == 6), 8, np.nan)



In [61]:
bottom_wick_list = sorted(df['bottom_wick'].unique().tolist())
body_list = sorted(df['body'].unique().tolist())
top_wick_list = sorted(df['top_wick'].unique().tolist())
open_gap_list = sorted(df['open_gap'].unique().tolist())

#convert value to index
num_of_start_features = 9
bottom_wick_to_index = {v: i + num_of_start_features for i, v in enumerate(bottom_wick_list)}
body_to_index = {v: i + num_of_start_features + len(bottom_wick_list) for i, v in enumerate(body_list)}
top_wick_to_index = {v: i + num_of_start_features + len(bottom_wick_list) + len(body_list) for i, v in enumerate(top_wick_list)}
open_gap_to_index = {v: i + num_of_start_features + len(bottom_wick_list) + len(body_list) + len(top_wick_list) for i, v in enumerate(open_gap_list)}

#convert index to value
index_to_bottom_wick = {i + num_of_start_features: v for i, v in enumerate(bottom_wick_list)}
index_to_body = {i + num_of_start_features + len(bottom_wick_list): v for i, v in enumerate(body_list)}
index_to_top_wick = {i + num_of_start_features + len(bottom_wick_list) + len(body_list): v for i, v in enumerate(top_wick_list)}
index_to_open_gap = {i + num_of_start_features + len(bottom_wick_list) + len(body_list) + len(top_wick_list): v for i, v in enumerate(open_gap_list)}


#apply mapping of value to index
df['bottom_wick'] = df['bottom_wick'].map(bottom_wick_to_index)
df['body'] = df['body'].map(body_to_index)
df['top_wick'] = df['top_wick'].map(top_wick_to_index)
df['open_gap'] = df['open_gap'].map(open_gap_to_index)

#define wicks sequence order
df['first_wick']  = np.where(df['close'] >= df['open'], df['bottom_wick'], df['top_wick'])
df['last_wick']  = np.where(df['close'] >= df['open'], df['top_wick'], df['bottom_wick'])

In [65]:
max(bottom_wick_to_index.values()), max(body_to_index.values()), max(top_wick_to_index.values()), max(open_gap_to_index.values())

(206, 586, 779, 1147)

In [28]:
def candle_restore(candles:(np.array,torch.Tensor), start_price:float, number_of_candles = None) -> pd.DataFrame:
    #function receives a dataframe with columns ['first_wick', 'last_wick'] - respectivey columns 0 and 2 in the array) for wicks data and column 'body' (column 1 in the array) for body data 
    # and restores the candles starting from initial price. New open gap should be considered from column 3 in the array
    #use index_to_bottom_wick, index_to_body, index_to_top_wick to convert index to value
    if isinstance(candles, torch.Tensor):
        candles = candles.numpy()
    if number_of_candles is not None:
        candles = candles[:number_of_candles]

    restored_candles = []
    for idx, candle in enumerate(candles):
        restored_candle = {}
        open = start_price if idx == 0 else restored_candles[idx-1]['close'] + index_to_open_gap[candle[3]] * tick_size
        if candle[0] in index_to_bottom_wick:
            low_wick = index_to_bottom_wick[candle[0]]
            high_wick = index_to_top_wick[candle[2]]
            body = index_to_body[candle[1]]
            close = open + body * tick_size
            low = open - low_wick * tick_size
            high = close + high_wick * tick_size
            restored_candle['open'] = open
            restored_candle['high'] = high
            restored_candle['low'] = low
            restored_candle['close'] = close

        elif candle[0] in index_to_top_wick:
            high_wick = index_to_top_wick[candle[0]]
            low_wick = index_to_bottom_wick[candle[2]]
            body = index_to_body[candle[1]]
            close = open - body * tick_size
            low = close - low_wick * tick_size
            high = open + high_wick * tick_size
            restored_candle['open'] = open
            restored_candle['high'] = high
            restored_candle['low'] = low
            restored_candle['close'] = close

            
        restored_candles.append(restored_candle)
    return pd.DataFrame(restored_candles)

#test of restore function

restored_df = candle_restore(df[['first_wick', 'body', 'last_wick','open_gap']].values, 4876.75, 100)

    

In [41]:
sample = torch.tensor([587, 225,   9, 974,   0, 589, 232,  17, 975,   0,  11, 229, 596, 976,
           0,  10, 227, 587, 976,   0,   9, 222, 597, 975,   0,   1,  10, 209,
         599, 974,   0, 588, 229,  14, 978,   0, 594, 209,  14, 975,   0, 590,
         210,  10, 974,   0, 588, 221,  16, 979,   0,   1,   2, 587, 238,  12,
         978,   0, 587, 220,  39, 976,   0, 591])
sample = torch.tensor([  9, 212,   9, 975,   0,   9, 209,   9, 975,   0,   9, 212, 587, 975,
           0,   9, 212, 587, 975,   0,   9, 212, 587, 975,   0,   1,   9, 212,
         587, 975,   0,   9, 209,   9, 975,   0,   9, 209,   9, 975,   0,   9,
         212,   9, 975,   0,   9, 209,   9, 975,   0,   1,   2,   9, 212,   9,
         975,   0,   9, 215,  13, 975,   0,   9])

zero_index = torch.where(sample == 0)[0]
samle_data = torch.empty((len(zero_index) - 1, 4))

for i in range(len(zero_index) - 1):
    samle_data[i] = sample[zero_index[i]+1:zero_index[i+1]][-4:]

restored_df = candle_restore(samle_data, 4876.75)
    

KeyError: 9.0

In [40]:
#plot original and restored candles using plotly
import plotly.graph_objects as go
import plotly.express as px

# original_candles = df[['open', 'high', 'low', 'close']].iloc[:restored_df.shape[0]]

# fig = go.Figure(data=[go.Candlestick(x=original_candles.index,
#                 open=original_candles['open'],
#                 high=original_candles['high'],
#                 low=original_candles['low'],
#                 close=original_candles['close'])])

# fig.update_layout(xaxis_rangeslider_visible=False)
# fig.show()

fig = go.Figure(data=[go.Candlestick(x=restored_df.index,
                open=restored_df['open'],
                high=restored_df['high'],
                low=restored_df['low'],
                close=restored_df['close'])])
fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()



In [30]:
df

Unnamed: 0_level_0,open,high,low,close,volume,bottom_wick,top_wick,body,open_gap,1m_start,5m_start,15m_start,1h_start,1d_start,lon_start,ny_start,session_start,week_start,first_wick,last_wick
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2017-01-02 18:00:00-05:00,4876.75,4890.25,4876.75,4888.00,488,9,596,252,975,0,1.0,2.0,3.0,,,,7.0,,9,596
2017-01-02 18:01:00-05:00,4888.00,4888.50,4887.00,4887.00,90,9,589,211,975,0,,,,,,,,,589,9
2017-01-02 18:02:00-05:00,4887.25,4888.00,4886.75,4887.75,70,11,588,209,976,0,,,,,,,,,11,588
2017-01-02 18:03:00-05:00,4887.75,4888.00,4887.50,4888.00,40,10,587,208,975,0,,,,,,,,,10,587
2017-01-02 18:04:00-05:00,4887.50,4890.00,4887.50,4890.00,89,9,587,217,973,0,,,,,,,,,9,587
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-26 14:40:00-04:00,14268.25,14268.25,14256.75,14259.50,1044,20,587,242,985,0,1.0,,,,,,,,587,20
2023-10-26 14:41:00-04:00,14262.00,14282.50,14261.75,14282.00,1376,10,589,287,985,0,,,,,,,,,10,589
2023-10-26 14:42:00-04:00,14282.00,14286.25,14278.75,14281.00,1001,18,604,211,975,0,,,,,,,,,604,18
2023-10-26 14:43:00-04:00,14281.25,14281.25,14281.00,14281.00,5,9,587,208,976,0,,,,,,,,,587,9


In [31]:
#contstrucc the tensor with  sequence of candles.
seq_columns = ['1m_start', '5m_start', '15m_start', '1h_start',
       '1d_start', 'lon_start', 'ny_start', 'session_start', 'week_start','first_wick', 'body', 'last_wick', 'open_gap']


seq_tensor = torch.tensor(df[seq_columns].values).reshape(-1)


In [32]:
seq_tensor.shape

torch.Size([30846868])

In [33]:
seq_tensor = seq_tensor[~torch.isnan(seq_tensor)].to(torch.int64)


In [34]:

# save sequense tensor to hdf5
import h5py
with h5py.File('data/nq17-23_1min_seq.hdf5', 'w') as f:
       dataset = f.create_dataset('data', shape=seq_tensor.shape, dtype='i8')
       dataset[:] = seq_tensor[:]
       #save index_to_value dictionaries
       f.create_dataset('index_to_bottom_wick', data=np.array(list(index_to_bottom_wick.items())))
       f.create_dataset('index_to_body', data=np.array(list(index_to_body.items())))
       f.create_dataset('index_to_top_wick', data=np.array(list(index_to_top_wick.items())))
       f.create_dataset('index_to_open_gap', data=np.array(list(index_to_open_gap.items())))

       




