In [None]:
from tqdm import tqdm
import pandas as pd
from datetime import datetime
import pytz
import plotly.graph_objects as go

import sys; sys.path.append("..")
from bars.imbalance_bars import tick_imbalance_bars, volume_imbalance_bars
from utils.visualizations.plot_bars_on_prices import plot_bars_on_prices

### Filter ES ticks into a single continous contract

In [None]:
csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.csv" # input
filtered_csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.filtered.csv" # output

with open(csv, "r") as f:
    for i in f:
        print("Column headers: \n", "\n".join(
            [str(x) for x in enumerate(i.split(","))]))
        break

In [None]:
"""
We want ts_event as time, price, size as volume, symbol
So we want column numbers 1, 8, 9, 13
The symbols we want are ESU3, ESZ3, ESH4, ESM4, ESU4
"""
data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
with open(csv, "r") as f:
    for i, line in enumerate(tqdm(f, total=106615135)):
        if i == 0:
            continue
        line = line.strip().split(",")
        if line[13] not in ["ESU3", "ESZ3", "ESH4", "ESM4", "ESU4"]:
            continue
        data["time"].append(line[1])
        data["price"].append(line[8])
        data["volume"].append(line[9])
        data["symbol"].append(line[13])

In [None]:
"""
We start to see some trades of ESZ3 appearing during the ESU3 contract
Let's further filter the data so that each contract reaches its end date
prior to admitting prices from the next contract
To do this, we need to identify the end index of each contract
"""
esu3_end = 0
esz3_end = 0
esh4_end = 0
esm4_end = 0

for i, s in enumerate(tqdm(data["symbol"])):
    if s == "ESU3":
        esu3_end = i
    elif s == "ESZ3":
        esz3_end = i
    elif s == "ESH4":
        esh4_end = i
    elif s == "ESM4":
        esm4_end = i

print(esu3_end, esz3_end, esh4_end, esm4_end)

In [None]:
print(f"""
ESU3: {data["time"][0]} - {data["time"][esu3_end]}
ESZ3: {data["time"][esu3_end]} - {data["time"][esz3_end]}
ESH4: {data["time"][esz3_end]} - {data["time"][esh4_end]}
ESM4: {data["time"][esh4_end]} - {data["time"][esm4_end]}
      """.strip())

In [None]:
filtered_data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
for i in tqdm(range(len(data["symbol"]))):
    if (i <= esu3_end and data["symbol"][i] == "ESU3") or \
       (i > esu3_end and i <= esz3_end and data["symbol"][i] == "ESZ3") or \
       (i > esz3_end and i <= esh4_end and data["symbol"][i] == "ESH4") or \
       (i > esh4_end and i <= esm4_end and data["symbol"][i] == "ESM4") or \
       (i > esm4_end and data["symbol"][i] == "ESU4"):
        filtered_data["time"].append(data["time"][i])
        filtered_data["price"].append(data["price"][i])
        filtered_data["volume"].append(data["volume"][i])
        filtered_data["symbol"].append(data["symbol"][i])


In [None]:
len(data["symbol"]), len(filtered_data["symbol"])

In [None]:
with open (filtered_csv, "w") as f:
    f.write("time,price,volume,symbol\n")
    for i in tqdm(range(len(filtered_data["symbol"]))):
        f.write(f"{filtered_data['time'][i]},{filtered_data['price'][i]},{filtered_data['volume'][i]},{filtered_data['symbol'][i]}\n")

### Above data is filtered and saved. Proceed from here

In [None]:
from tqdm import tqdm
import pandas as pd
from datetime import datetime
import pytz
import plotly.graph_objects as go

import sys; sys.path.append("..")
from bars.imbalance_bars import tick_imbalance_bars, volume_imbalance_bars
from utils.visualizations.plot_bars_on_prices import plot_bars_on_prices

In [None]:
filtered_csv = "../../../../Historical Data/ES-Futures-Ticks-20230807-20240806.trades.filtered.csv" # now input

data = {"time" : [], "price" : [], "volume" : [], "symbol" : []}
with open(filtered_csv, "r") as f:
    for i, line in enumerate(tqdm(f, total=82971690)):
        if i == 0:
            continue
        if i > 5000000:
            break
        line = line.strip().split(",")
        data["time"].append(datetime.fromisoformat(line[0].rstrip("Z")).replace(tzinfo=pytz.utc).astimezone(pytz.timezone("US/Eastern")))
        data["price"].append(float(line[1]))
        data["volume"].append(float(line[2]))
        data["symbol"].append(line[3])

In [None]:
bars = volume_imbalance_bars(data, alpha=0.02, et_init=15, verbose=True)

In [None]:
len(bars['start_idx'])

In [None]:
plot_bars_on_prices(data, pd.DataFrame(bars))