In [None]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional
from scipy.stats import linregress
import numpy as np

In [None]:
def generate_date_range(start_date, end_date):    
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    
    date_list = []
    current = start
    while current <= end:
        date_list.append(current.strftime('%Y-%m-%d'))
        current += timedelta(days=1)
    return date_list


# 使用示例（假设raw_df为原始数据）：
# '2019-12-01'
date_list = generate_date_range('2025-01-01', '2025-01-02')
# print(date_list)
raw_df = []
for date in date_list:
    raw_df.append(pd.read_csv(f'/Volumes/Ext-Disk/data/futures/um/tardis/orderbook/ETHUSDT/binance_book_snapshot_5_{date}_ETHUSDT.csv.gz'))

raw_df = pd.concat(raw_df)
print(raw_df.head())
# print(raw_df.tail())

In [None]:
def build_dollar_bars(
    trades: pd.DataFrame,
    dollar_threshold: float,
) -> pd.DataFrame:
    """
    基于逐笔成交数据生成 dollar bars 的轴。

    trades 列要求：['time','id','price','qty','quote_qty','is_buyer_maker']
    - time: int|datetime
    - quote_qty: 成交额（价格×数量），若缺失则用 price*qty 代替
    - is_buyer_maker: 市价方向判定（True 表示成交对手为挂单买方 ⇒ 主动卖）

    返回：bar 级 DataFrame，包含每根 bar 的起止时间、OHLC、成交量/额等。
    """
    df = trades.copy()
    # df['time'] = _ensure_datetime(df['time'])
    df['time'] = pd.to_datetime(df['timestamp'], unit='us') 
    df = df.sort_values('time').reset_index(drop=True)

    if 'quote_qty' not in df.columns or df['quote_qty'].isna().all():
        df['quote_qty'] = df['price'] * df['qty']

    # 成交方向：taker 买为 +1，taker 卖为 -1
    # is_buyer_maker == True 表示成交对手是做市买方 ⇒ 主动方为卖
    df['trade_sign'] = np.where(df['is_buyer_maker'], -1, 1)

    # 累计成交额切 bar
    cum_quote = 0.0
    current_bar_id = 0
    bar_ids: List[int] = []
    bar_starts: List[pd.Timestamp] = []
    bar_ends: List[pd.Timestamp] = []
    bar_open: List[float] = []
    bar_high: List[float] = []
    bar_low: List[float] = []
    bar_close: List[float] = []
    bar_volume: List[float] = []
    bar_quote: List[float] = []
    bar_buy_volume: List[float] = []
    bar_sell_volume: List[float] = []

    # 当前 bar 的累计
    start_idx = 0
    acc_volume = 0.0
    acc_quote = 0.0
    acc_buy = 0.0
    acc_sell = 0.0

    for i, row in df.iterrows():
        price = float(row['price'])
        qty = float(row['qty'])
        q = float(row['quote_qty'])
        sign = int(row['trade_sign'])
        cum_quote += q
        acc_volume += qty
        acc_quote += q
        if sign > 0:
            acc_buy += qty
        else:
            acc_sell += qty

        # 初始化 open/high/low
        if i == start_idx:
            o = price
            h = price
            l = price
        else:
            o = bar_open[-1] if bar_open else price
            h = max(bar_high[-1], price) if bar_high else price
            l = min(bar_low[-1], price) if bar_low else price

        # 达到阈值则切 bar（包含当前成交）
        if cum_quote >= dollar_threshold:
            segment = df.iloc[start_idx:i+1]
            bar_ids.append(current_bar_id)
            bar_starts.append(segment['time'].iloc[0])
            bar_ends.append(segment['time'].iloc[-1])
            bar_open.append(segment['price'].iloc[0])
            bar_high.append(segment['price'].max())
            bar_low.append(segment['price'].min())
            bar_close.append(segment['price'].iloc[-1])
            bar_volume.append(segment['qty'].sum())
            bar_quote.append(segment['quote_qty'].sum())
            bar_buy_volume.append((segment.loc[segment['trade_sign']>0,'qty']).sum())
            bar_sell_volume.append((segment.loc[segment['trade_sign']<0,'qty']).sum())

            # 重置
            cum_quote = 0.0
            start_idx = i + 1
            current_bar_id += 1
            acc_volume = 0.0
            acc_quote = 0.0
            acc_buy = 0.0
            acc_sell = 0.0

    bars = pd.DataFrame({
        'bar_id': bar_ids,
        'start_time': bar_starts,
        'end_time': bar_ends,
        'open': bar_open,
        'high': bar_high,
        'low': bar_low,
        'close': bar_close,
        'volume': bar_volume,
        'dollar_value': bar_quote,
        'buy_volume': bar_buy_volume,
        'sell_volume': bar_sell_volume,
    })
    return bars

In [None]:
build_dollar_bars()