In [1]:
import os
import polars as pl

%load_ext autoreload
%autoreload 2


In [18]:
def concat_tick_data(symbol: str, interval: str, years: list = [2024, 2025], 
                     save: bool = True, output_dir: str = "tick_data") -> pl.DataFrame:
    dataframes = []
    for year in years:
        for month in range(1, 13):
            filename = f"{symbol}_{interval}_{year}-{month:02d}.parquet"
            file = os.path.join(output_dir, filename)
            try:
                df = pl.read_parquet(file)
                dataframes.append(df)
            except:
                pass
    
    if dataframes:
        data = pl.concat(dataframes)
        print(f"{symbol}_{interval}: {len(data):,} rows")
        
        if save:
            output_file = os.path.join(output_dir, f"{symbol}_{interval}_combined.parquet")
            data.write_parquet(output_file)
        
        return data
    else:
        return pl.dataframe()


data = {}
symbols_gaps = {}
SYMBOLS = ['BTCUSDT', 'ETHUSDT', 'TRXUSDT']
for symbol in SYMBOLS:
    df = concat_tick_data(symbol, "5m")
    gaps = df.select(['timestamp',
                      pl.col('timestamp').diff().alias('time_diff')
                      ]).filter(
                          pl.col('time_diff') > pl.duration(minutes=5)
                          )
    
    symbols_gaps[symbol] = gaps['timestamp'].to_list()

BTCUSDT_5m: 201,595 rows
ETHUSDT_5m: 201,595 rows
TRXUSDT_5m: 201,595 rows


In [20]:
symbols_gaps

{'BTCUSDT': [datetime.datetime(2024, 10, 28, 16, 35),
  datetime.datetime(2025, 8, 29, 6, 35)],
 'ETHUSDT': [datetime.datetime(2024, 10, 28, 16, 35),
  datetime.datetime(2025, 8, 29, 6, 35)],
 'TRXUSDT': [datetime.datetime(2024, 10, 28, 16, 35),
  datetime.datetime(2025, 8, 29, 6, 35)]}

In [23]:
from datetime import datetime

df.filter(
    pl.col('timestamp') > datetime(2025, 8, 29, 6)
)

timestamp,open,high,low,close,volume,buy_volume,sell_volume,delta,total_volume,imbalance,cvd
datetime[ms],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2025-08-29 06:05:00,0.34385,0.34392,0.34375,0.34375,255621.0,86257.0,169364.0,-83107.0,255621.0,-0.325118,-1.8065e8
2025-08-29 06:10:00,0.34376,0.34378,0.34347,0.3435,701738.0,98741.0,602997.0,-504256.0,701738.0,-0.718582,-1.8115e8
2025-08-29 06:15:00,0.3435,0.3435,0.34326,0.3433,681287.0,280545.0,400742.0,-120197.0,681287.0,-0.176426,-1.8127e8
2025-08-29 06:35:00,0.3433,0.3433,0.34272,0.34273,656031.0,285775.0,370256.0,-84481.0,656031.0,-0.128776,-1.8136e8
2025-08-29 06:40:00,0.34274,0.34327,0.34262,0.34324,2.900495e6,1.453491e6,1.447004e6,6487.0,2.900495e6,0.002237,-1.8135e8
…,…,…,…,…,…,…,…,…,…,…,…
2025-11-30 23:35:00,0.2816,0.2816,0.28144,0.28144,731196.0,154573.0,576623.0,-422050.0,731196.0,-0.577205,-1.6201439e7
2025-11-30 23:40:00,0.28143,0.28175,0.28142,0.28171,1.068126e6,784223.0,283903.0,500320.0,1.068126e6,0.468409,-1.5701119e7
2025-11-30 23:45:00,0.28171,0.28182,0.28145,0.28145,785528.0,265359.0,520169.0,-254810.0,785528.0,-0.324381,-1.5955929e7
2025-11-30 23:50:00,0.28144,0.28145,0.28136,0.28139,587140.0,239658.0,347482.0,-107824.0,587140.0,-0.183643,-1.6063753e7
