In [1]:
from tqdm import tqdm


### Filter ES ticks into a single continous contract

In [3]:
csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.csv" # input
filtered_csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.filtered.csv" # output

with open(csv, "r") as f:
    for i in f:
        print("Column headers: \n", "\n".join(
            [str(x) for x in enumerate(i.split(","))]))
        break

Column headers: 
 (0, 'ts_recv')
(1, 'ts_event')
(2, 'rtype')
(3, 'publisher_id')
(4, 'instrument_id')
(5, 'action')
(6, 'side')
(7, 'depth')
(8, 'price')
(9, 'size')
(10, 'flags')
(11, 'ts_in_delta')
(12, 'sequence')
(13, 'symbol\n')


In [4]:
"""
We want ts_event as time, price, size as volume, symbol
So we want column numbers 1, 8, 9, 13
The symbols we want are ESU3, ESZ3, ESH4, ESM4, ESU4
"""
data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
with open(csv, "r") as f:
    for i, line in enumerate(tqdm(f, total=106615135)):
        if i == 0:
            continue
        line = line.strip().split(",")
        if line[13] not in ["ESU3", "ESZ3", "ESH4", "ESM4", "ESU4"]:
            continue
        data["time"].append(line[1])
        data["price"].append(line[8])
        data["volume"].append(line[9])
        data["symbol"].append(line[13])

  0%|          | 0/106615135 [00:00<?, ?it/s]

100%|██████████| 106615135/106615135 [01:10<00:00, 1503477.46it/s]


In [5]:
"""
We start to see some trades of ESZ3 appearing during the ESU3 contract
Let's further filter the data so that each contract reaches its end date
prior to admitting prices from the next contract
To do this, we need to identify the end index of each contract
"""
esu3_end = 0
esz3_end = 0
esh4_end = 0
esm4_end = 0

for i, s in enumerate(tqdm(data["symbol"])):
    if s == "ESU3":
        esu3_end = i
    elif s == "ESZ3":
        esz3_end = i
    elif s == "ESH4":
        esh4_end = i
    elif s == "ESM4":
        esm4_end = i

print(esu3_end, esz3_end, esh4_end, esm4_end)

  0%|          | 0/105736599 [00:00<?, ?it/s]

100%|██████████| 105736599/105736599 [01:09<00:00, 1525311.72it/s]

12304220 41704811 64404499 89984159





In [6]:
print(f"""
ESU3: {data["time"][0]} - {data["time"][esu3_end]}
ESZ3: {data["time"][esu3_end]} - {data["time"][esz3_end]}
ESH4: {data["time"][esz3_end]} - {data["time"][esh4_end]}
ESM4: {data["time"][esh4_end]} - {data["time"][esm4_end]}
      """.strip())

ESU3: 2023-08-07T00:00:00.039623665Z - 2023-09-15T13:29:59.452410495Z
ESZ3: 2023-09-15T13:29:59.452410495Z - 2023-12-15T14:29:59.410358005Z
ESH4: 2023-12-15T14:29:59.410358005Z - 2024-03-15T13:29:59.847946481Z
ESM4: 2024-03-15T13:29:59.847946481Z - 2024-06-21T13:29:59.522321675Z


In [7]:
filtered_data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
for i in tqdm(range(len(data["symbol"]))):
    if (i <= esu3_end and data["symbol"][i] == "ESU3") or \
       (i > esu3_end and i <= esz3_end and data["symbol"][i] == "ESZ3") or \
       (i > esz3_end and i <= esh4_end and data["symbol"][i] == "ESH4") or \
       (i > esh4_end and i <= esm4_end and data["symbol"][i] == "ESM4") or \
       (i > esm4_end and data["symbol"][i] == "ESU4"):
        filtered_data["time"].append(data["time"][i])
        filtered_data["price"].append(data["price"][i])
        filtered_data["volume"].append(data["volume"][i])
        filtered_data["symbol"].append(data["symbol"][i])


100%|██████████| 105736599/105736599 [01:20<00:00, 1309709.43it/s]


In [8]:
len(data["symbol"]), len(filtered_data["symbol"])

(105736599, 82971690)

In [9]:
with open (filtered_csv, "w") as f:
    f.write("time,price,volume,symbol\n")
    for i in tqdm(range(len(filtered_data["symbol"]))):
        f.write(f"{filtered_data['time'][i]},{filtered_data['price'][i]},{filtered_data['volume'][i]},{filtered_data['symbol'][i]}\n")

100%|██████████| 82971690/82971690 [01:26<00:00, 958235.74it/s] 


### Once the above data is filtered, we can read it in on subsequent runs. 

In [1]:
from tqdm import tqdm
import pandas as pd
import sys; sys.path.append("..")
from bars.imbalance_bars import tick_imbalance_bars, volume_imbalance_bars
# from utils.visualizations.plot_bars_on_prices import plot_bars_on_prices
import plotly.graph_objects as go

In [2]:
filtered_csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.filtered.csv" # now input

data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
with open(filtered_csv, "r") as f:
    for i, line in enumerate(tqdm(f, total=82971690)):
        if i == 0:
            continue
        line = line.strip().split(",")
        data["time"].append(line[0])
        data["price"].append(float(line[1]))
        data["volume"].append(float(line[2]))
        data["symbol"].append(line[3])

82971691it [00:46, 1772621.64it/s]                              


In [4]:
bars = volume_imbalance_bars(data, alpha=0.001, et_init=500, verbose=True)

  0%|          | 189866/82971689 [00:00<01:25, 966945.10it/s]

Making a new bar from 0 to 1180
Making a new bar from 1180 to 12307
Making a new bar from 12307 to 24594
Making a new bar from 24594 to 54383
Making a new bar from 54383 to 56698
Making a new bar from 56698 to 59482
Making a new bar from 59482 to 81001
Making a new bar from 81001 to 95110


  1%|          | 503808/82971689 [00:00<00:59, 1375885.15it/s]

Making a new bar from 95110 to 224197
Making a new bar from 224197 to 240711
Making a new bar from 240711 to 315876
Making a new bar from 315876 to 319891
Making a new bar from 319891 to 352007
Making a new bar from 352007 to 368415
Making a new bar from 368415 to 518958
Making a new bar from 518958 to 521209
Making a new bar from 521209 to 521245
Making a new bar from 521245 to 521246
Making a new bar from 521246 to 521249
Making a new bar from 521249 to 521252
Making a new bar from 521252 to 521264
Making a new bar from 521264 to 521265
Making a new bar from 521265 to 521269
Making a new bar from 521269 to 521270
Making a new bar from 521270 to 521278
Making a new bar from 521278 to 521303
Making a new bar from 521303 to 521400
Making a new bar from 521400 to 521547
Making a new bar from 521547 to 521925
Making a new bar from 521925 to 521926
Making a new bar from 521926 to 521929
Making a new bar from 521929 to 521952
Making a new bar from 521952 to 521960
Making a new bar from 5219

  1%|          | 821756/82971689 [00:00<00:59, 1381567.20it/s]

Making a new bar from 522295 to 522665
Making a new bar from 522665 to 523202
Making a new bar from 523202 to 533356
Making a new bar from 533356 to 533379
Making a new bar from 533379 to 533434
Making a new bar from 533434 to 533649
Making a new bar from 533649 to 534168
Making a new bar from 534168 to 536724
Making a new bar from 536724 to 536735
Making a new bar from 536735 to 536736
Making a new bar from 536736 to 536740
Making a new bar from 536740 to 536745
Making a new bar from 536745 to 536753
Making a new bar from 536753 to 536760
Making a new bar from 536760 to 536865
Making a new bar from 536865 to 536868
Making a new bar from 536868 to 536869
Making a new bar from 536869 to 536878
Making a new bar from 536878 to 536888
Making a new bar from 536888 to 536927
Making a new bar from 536927 to 536928
Making a new bar from 536928 to 536931
Making a new bar from 536931 to 536933
Making a new bar from 536933 to 536947
Making a new bar from 536947 to 536963
Making a new bar from 536

  2%|▏         | 1316711/82971689 [00:00<00:57, 1410188.28it/s]

Making a new bar from 881628 to 1031207
Making a new bar from 1031207 to 1057505
Making a new bar from 1057505 to 1058696
Making a new bar from 1058696 to 1058794
Making a new bar from 1058794 to 1058799
Making a new bar from 1058799 to 1058803
Making a new bar from 1058803 to 1058804
Making a new bar from 1058804 to 1058805
Making a new bar from 1058805 to 1058807
Making a new bar from 1058807 to 1058827
Making a new bar from 1058827 to 1058857
Making a new bar from 1058857 to 1058920
Making a new bar from 1058920 to 1059076
Making a new bar from 1059076 to 1059102
Making a new bar from 1059102 to 1059133
Making a new bar from 1059133 to 1059508
Making a new bar from 1059508 to 1059848
Making a new bar from 1059848 to 1061102
Making a new bar from 1061102 to 1061935
Making a new bar from 1061935 to 1064025
Making a new bar from 1064025 to 1143935
Making a new bar from 1143935 to 1145197
Making a new bar from 1145197 to 1145222
Making a new bar from 1145222 to 1145248
Making a new bar 

  2%|▏         | 1630361/82971689 [00:01<00:55, 1457121.01it/s]

Making a new bar from 1285826 to 1507573
Making a new bar from 1507573 to 1512900
Making a new bar from 1512900 to 1512922
Making a new bar from 1512922 to 1512923
Making a new bar from 1512923 to 1512924
Making a new bar from 1512924 to 1512928
Making a new bar from 1512928 to 1512938
Making a new bar from 1512938 to 1512939
Making a new bar from 1512939 to 1512944
Making a new bar from 1512944 to 1512959
Making a new bar from 1512959 to 1512960
Making a new bar from 1512960 to 1512961
Making a new bar from 1512961 to 1512964
Making a new bar from 1512964 to 1512968
Making a new bar from 1512968 to 1512969
Making a new bar from 1512969 to 1512995
Making a new bar from 1512995 to 1513019
Making a new bar from 1513019 to 1513199
Making a new bar from 1513199 to 1514426
Making a new bar from 1514426 to 1515069
Making a new bar from 1515069 to 1521839
Making a new bar from 1521839 to 1552689
Making a new bar from 1552689 to 1552742
Making a new bar from 1552742 to 1552743
Making a new bar

  3%|▎         | 2098767/82971689 [00:01<01:06, 1225327.76it/s]

Making a new bar from 1649343 to 1827842


  3%|▎         | 2846201/82971689 [00:02<01:04, 1251366.26it/s]

Making a new bar from 1827842 to 2702533
Making a new bar from 2702533 to 2833161


  4%|▍         | 3646391/82971689 [00:02<00:52, 1513744.35it/s]

Making a new bar from 2833161 to 3428676


  5%|▍         | 3946298/82971689 [00:02<01:01, 1292480.03it/s]

Making a new bar from 3428676 to 3818177


 15%|█▍        | 12120041/82971689 [00:08<00:43, 1618924.05it/s]

Making a new bar from 3818177 to 12214178


100%|██████████| 82971689/82971689 [01:01<00:00, 1342324.69it/s]


In [5]:
len(bars['start_idx'])

1570

In [6]:
# use plotly to plot the bars as candles
fig = go.Figure(data=[go.Candlestick(x=[i for i in range(len(bars['start_idx']))],
                open=bars['open'],
                high=bars['high'],
                low=bars['low'],
                close=bars['close'])])

fig.show()