In [67]:
# %% [markdown]
# # 比特币交易数据采集与分析（7x24连续交易版）
# 使用CCXT获取Binance的BTC/USDT数据

# %% [markdown]
# ## 1. 环境初始化
# %%
import ccxt
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path


# 初始化交易所
def init_exchange(exchange_id: str = "binance"):
    """初始化交易所并加载市场数据"""
    exchange = getattr(ccxt, exchange_id)(
        {"enableRateLimit": True, "options": {"adjustForTimeDifference": True}}
    )
    exchange.load_markets()  # 关键步骤
    return exchange


# 执行初始化
crypto_ex = init_exchange()
print(f"交易所 {crypto_ex.name} 已初始化")

交易所 Binance 已初始化


In [66]:
# %% [markdown]
# ## 2. 数据获取与处理
# %%
def safe_fetch_ohlcv(exchange, symbol: str, timeframe: str, limit: int = 500):
    """安全获取K线数据"""
    for retry in range(3):
        try:
            data = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
            return (
                pd.DataFrame(
                    data,
                    columns=["timestamp", "open", "high", "low", "close", "volume"],
                )
                .assign(
                    datetime=lambda x: pd.to_datetime(x.timestamp, unit="ms", utc=True)
                )
                .set_index("datetime")
                .drop(columns="timestamp")
            )
        except ccxt.NetworkError:
            print(f"⚠️ 网络错误，第{retry+1}次重试...")
            exchange.sleep(5000)
    raise ConnectionError("无法获取数据")


# 执行数据获取
symbol = "BTC/USDT"  # 直接指定已验证的交易对
hourly_df = safe_fetch_ohlcv(crypto_ex, symbol, "1h", 1000)
print(hourly_df)

                                open       high        low      close  \
datetime                                                                
2024-12-19 04:00:00+00:00  101193.15  101228.31  100629.99  100671.62   
2024-12-19 05:00:00+00:00  100671.62  101415.25  100671.62  101300.01   
2024-12-19 06:00:00+00:00  101300.01  101392.15  101015.44  101200.01   
2024-12-19 07:00:00+00:00  101200.01  101727.28  101124.46  101656.57   
2024-12-19 08:00:00+00:00  101656.57  101820.00  101380.10  101640.40   
...                              ...        ...        ...        ...   
2025-01-29 15:00:00+00:00  102029.94  102531.64  101798.01  101980.78   
2025-01-29 16:00:00+00:00  101980.78  102150.00  101675.62  102092.01   
2025-01-29 17:00:00+00:00  102092.00  102777.78  101995.37  102406.98   
2025-01-29 18:00:00+00:00  102406.97  103000.00  102008.00  102854.15   
2025-01-29 19:00:00+00:00  102854.14  102896.92  101400.01  102305.59   

                               volume  
datetime  

In [55]:
# %% [markdown]
# ## 3. 增强可视化
# %%
def plot_bitcoin_data(df: pd.DataFrame):
    """绘制价格与成交量组合图"""
    fig = go.Figure()

    # 添加K线主图
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df.open,
            high=df.high,
            low=df.low,
            close=df.close,
            name="价格走势",
        )
    )

    # 添加成交量副图
    fig.add_trace(
        go.Bar(
            x=df.index,
            y=df.volume,
            name="成交量",
            marker_color="rgba(100, 100, 100, 0.6)",
            yaxis="y2",
        )
    )

    # 布局配置
    fig.update_layout(
        title="比特币价格与成交量分析",
        yaxis_title="价格 (USDT)",
        yaxis2=dict(title="成交量 (BTC)", overlaying="y", side="right", showgrid=False),
        xaxis_rangeslider_visible=False,
        hovermode="x unified",
        template="plotly_white",
        height=600,
    )
    fig.show()


# 生成图表
plot_bitcoin_data(hourly_df)

In [74]:
# %% [markdown]
# ## 4. 数据缓存
# %%
def save_dataset(df: pd.DataFrame, name: str):
    """保存数据集到本地"""
    save_path = Path("../../data/cache") / f"{name}.parquet"
    save_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_parquet(save_path)
    print(f"✅ 数据已保存至：{save_path.resolve()}")


# 执行保存
save_dataset(hourly_df, "btc_1h_basic")

✅ 数据已保存至：/Users/francis/Project/bitcoin-quant/data/cache/btc_1h_basic.parquet
