In [3]:
from qlib import init
# 0.9.7版本的正确导入路径：Dataset和DataLoader在qlib.data.dataset中
from qlib.data.dataset import Dataset, DataLoader
from qlib.data.filter import ExpressionFilter
# 0.9.7版本中DataHandlerLP的正确路径
from qlib.data.dataset.handler import DataHandlerLP
import pandas as pd
import numpy as np

# --------------------------
# 1. 初始化Qlib（本地模式）
# --------------------------
init(provider_uri="./qlib_metadata", region="cn")  # 元数据存储路径


# --------------------------
# 2. 收益率转换（解决daily_increase是百分比的问题）
# --------------------------
def preprocess_data(df):
    # 转换收益率：百分比→原始值（如3→0.03）
    if "daily_increase" in df.columns:
        df["daily_increase"] = df["daily_increase"] / 100
        df["daily_increase"] = df["daily_increase"].replace([np.inf, -np.inf], np.nan)
    # 基础过滤（与你的逻辑一致）
    df = df[
        (df["pre_close"] > 0)
        & (df["open"] > 0)
        & (df["boll_low"].notna())
        & (df["high_limit"].notna())
    ].copy()
    return df


# --------------------------
# 3. 加载Parquet数据（0.9.7版本兼容写法）
# --------------------------
data_loader = DataLoader(
    config={
        "feature": [
            "auction_volume_ratio_10d",  # 因子列
            "pre_close", "open", "boll_low", "high_limit"  # 筛选用列
        ],
        "label": ["daily_increase"],  # 收益列
        "data_path": r"D:\workspace\xiaoyao\data\stock_daily_auction_with_metrics.parquet",
        "preprocess": preprocess_data,  # 应用收益率转换
        "columns": {
            "trade_date": "datetime",  # 日期列映射
            "code": "instrument"       # 股票ID列映射
        },
        "freq": "day"
    }
)

# 定义数据集（0.9.7版本兼容写法）
dataset = Dataset(
    data_loader=data_loader,
    handler=DataHandlerLP(
        label=["daily_increase"],
        label_delay=0,  # 当日收益
        filter=ExpressionFilter(
            expr="open < boll_low and open < high_limit"  # 核心筛选条件
        )
    )
)


# --------------------------
# 4. 提取数据并计算IC/IR
# --------------------------
# 加载因子和收益数据
df = dataset.prepare("train", col_set=["feature", "label"])
print(f"数据形状：{df.shape}，列名：{df.columns.tolist()}")

# 按日计算IC（复用Qlib的calc_ic函数，0.9.7版本可用）
from qlib.contrib.evaluate import calc_ic
ic_result = calc_ic(
    df=df,
    factor_col="auction_volume_ratio_10d",
    label_col="daily_increase",
    ic_type="pearson",
    return_all=True
)

# 计算每日样本量并过滤无效IC
daily_count = df.groupby(level="datetime").size().rename("stock_count")
valid_ic = pd.merge(ic_result, daily_count, left_index=True, right_index=True)
valid_ic = valid_ic[valid_ic["stock_count"] >= 2]
invalid_days = len(daily_count) - len(valid_ic)


# --------------------------
# 5. 输出结果
# --------------------------
avg_ic = valid_ic["ic"].mean()
ir = (avg_ic * np.sqrt(240)) / (valid_ic["ic"].std() * np.sqrt(240)) if valid_ic["ic"].std() != 0 else 0

print("\n" + "="*70)
print("Qlib 0.9.7 因子评估结果")
print("="*70)
print(f"IC均值：{avg_ic:.4f}")
print(f"IR：{ir:.4f}")
print(f"有效IC交易日数：{len(valid_ic)}")
print("="*70)

ImportError: cannot import name 'DataLoader' from 'qlib.data.dataset' (d:\sdk\Anaconda3\envs\xiaoyao\lib\site-packages\qlib\data\dataset\__init__.py)