In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.linalg import block_diag
import warnings

warnings.simplefilter(action='ignore', category=UserWarning)
pd.set_option('mode.chained_assignment', None)
%matplotlib inline

# 指定要讀取的文件
data_dir = '/Volumes/T7/code/microprice/orderbook_data/binance_futures/ETHUSDT/'
target_files = [
  'binance_futures_orderbook_ETHUSDT_2025_07_30_00.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_01.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_02.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_03.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_04.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_05.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_06.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_07.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_08.parquet', 
  'binance_futures_orderbook_ETHUSDT_2025_07_30_09.parquet'
   
]

print(f"將讀取 {len(target_files)} 個指定的 ETH (market 0) 數據文件")

# 讀取並合併指定文件
eth_data_list = []
for filename in target_files:
    file_path = f"{data_dir}/{filename}"
    try:
        df_temp = pd.read_parquet(file_path)
        print(f"讀取文件: {filename}, 數據量: {len(df_temp)}")
        eth_data_list.append(df_temp)
    except Exception as e:
        print(f"讀取文件 {filename} 時出錯: {e}")

# 合併所有數據
if eth_data_list:
    eth_combined = pd.concat(eth_data_list, ignore_index=True)
    
    # 轉換時間戳並排序 - 修復時間戳格式問題
    print("正在處理時間戳格式...")
    eth_combined['timestamp'] = pd.to_datetime(eth_combined['timestamp'], format='mixed')
    eth_combined = eth_combined.sort_values('timestamp').reset_index(drop=True)
    
    print(f"\n聚合完成:")
    print(f"- 總數據量: {len(eth_combined):,}")
    print(f"- 時間範圍: {eth_combined['timestamp'].min()} 到 {eth_combined['timestamp'].max()}")
    print(f"- 數據列: {list(eth_combined.columns)}")
    
    # 顯示基本統計信息
    print(f"\n價格統計:")
    print(f"- 最佳買價範圍: {eth_combined['best_bid'].min():.2f} - {eth_combined['best_bid'].max():.2f}")
    print(f"- 最佳賣價範圍: {eth_combined['best_ask'].min():.2f} - {eth_combined['best_ask'].max():.2f}")
    
    
else:
    print("未能讀取任何數據文件")
    eth_combined = pd.DataFrame()

將讀取 10 個指定的 ETH (market 0) 數據文件
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_00.parquet, 數據量: 14280
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_01.parquet, 數據量: 14259
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_02.parquet, 數據量: 14245
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_03.parquet, 數據量: 14243
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_04.parquet, 數據量: 14265
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_05.parquet, 數據量: 13758
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_06.parquet, 數據量: 14223
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_07.parquet, 數據量: 14209
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_08.parquet, 數據量: 14203
讀取文件: binance_futures_orderbook_ETHUSDT_2025_07_30_09.parquet, 數據量: 8834
正在處理時間戳格式...

聚合完成:
- 總數據量: 136,519
- 時間範圍: 2025-07-29 23:59:59.983369 到 2025-07-30 09:37:07.369918
- 數據列: ['timestamp', 'sequence_id', 'exchange', 'symbol', 'event_type', 'bids', 'asks', 'best_bid', 'best_ask', 'best_bid_size', 

In [2]:
eth_combined.head()

Unnamed: 0,timestamp,sequence_id,exchange,symbol,event_type,bids,asks,best_bid,best_ask,best_bid_size,best_ask_size,spread,spread_percent,mid_price,total_bid_volume,total_ask_volume,volume_imbalance,funding_rate,funding_time
0,2025-07-29 23:59:59.983369,8186389529855,binance_futures,ETHUSDT,depthUpdate,"[[3759.51, 87.059], [3759.5, 2.644], [3759.49,...","[[3759.52, 80.051], [3759.53, 0.97], [3759.54,...",3759.51,3759.52,87.059,80.051,0.01,0.000266,3759.515,91.869,86.415,3.059164,0.0001,1753804800000
1,2025-07-30 00:00:00.236139,8186389546827,binance_futures,ETHUSDT,depthUpdate,"[[3759.51, 115.095], [3759.5, 35.006], [3759.4...","[[3759.52, 75.023], [3759.53, 0.95], [3759.54,...",3759.51,3759.52,115.095,75.023,0.01,0.000266,3759.515,187.52,82.031,39.13508,0.0001,1753804800000
2,2025-07-30 00:00:00.581134,8186389688208,binance_futures,ETHUSDT,depthUpdate,"[[3759.51, 115.095], [3759.5, 35.006], [3759.4...","[[3759.52, 81.273], [3759.53, 0.95], [3759.54,...",3759.51,3759.52,115.095,81.273,0.01,0.000266,3759.515,187.52,88.281,35.982103,0.0001,1753804800000
3,2025-07-30 00:00:00.912872,8186390007670,binance_futures,ETHUSDT,depthUpdate,"[[3759.51, 79.936], [3759.5, 8.161], [3759.49,...","[[3759.52, 81.273], [3759.53, 0.95], [3759.54,...",3759.51,3759.52,79.936,81.273,0.01,0.000266,3759.515,90.516,88.281,1.250021,0.0001,1753804800000
4,2025-07-30 00:00:01.247842,8186390082502,binance_futures,ETHUSDT,depthUpdate,"[[3759.51, 79.936], [3759.5, 8.161], [3759.49,...","[[3759.52, 81.273], [3759.53, 0.95], [3759.54,...",3759.51,3759.52,79.936,81.273,0.01,0.000266,3759.515,90.516,88.281,1.250021,0.0001,1753804800000


In [None]:
print(eth_combined['asks'])

print(eth_combined['asks'][0][1])   # best asks 

print(eth_combined['bids'][0][1])   # best bids 
 

0         [[3759.52, 80.051], [3759.53, 0.97], [3759.54,...
1         [[3759.52, 75.023], [3759.53, 0.95], [3759.54,...
2         [[3759.52, 81.273], [3759.53, 0.95], [3759.54,...
3         [[3759.52, 81.273], [3759.53, 0.95], [3759.54,...
4         [[3759.52, 81.273], [3759.53, 0.95], [3759.54,...
                                ...                        
136514    [[3789.85, 69.174], [3789.86, 0.045], [3789.88...
136515    [[3789.85, 62.542], [3789.86, 0.045], [3789.88...
136516    [[3789.85, 62.542], [3789.86, 0.045], [3789.88...
136517    [[3789.85, 66.951], [3789.86, 0.039], [3789.88...
136518    [[3789.85, 58.358], [3789.86, 0.039], [3789.88...
Name: asks, Length: 136519, dtype: object
[3.75953e+03 9.70000e-01]
[3.7595e+03 2.6440e+00]
