In [24]:
import os
import pandas as pd
import baostock as bs
from datetime import datetime


class BaostockDataHandler:
    """修复索引越界问题的Baostock数据处理工具"""
    
    def fetch_ohlcv(self, stock_code: str, start_date: str = None, end_date: str = None) -> pd.DataFrame:
        """获取OHLCV数据，动态查找上市日期字段索引"""
        bs.login()
        
        # 处理结束日期
        today = datetime.now().strftime("%Y-%m-%d")
        end_date = end_date or today
        
        # 动态获取上市日期（核心修复点）
        if not start_date:
            rs_basic = bs.query_stock_basic(code=stock_code)
            if rs_basic.error_code != "0":
                raise Exception(f"获取基本信息失败: {rs_basic.error_msg}")
            
            # 获取字段列表，动态查找"ipoDate"索引（避免固定索引7导致越界）
            fields = rs_basic.fields
            if "ipoDate" not in fields:
                # 找不到上市日期字段时，使用默认起始日期
                start_date = "2000-01-01"
            else:
                rs_basic.next()
                ipo_date_index = fields.index("ipoDate")
                start_date = rs_basic.get_row_data()[ipo_date_index]
                # 容错处理
                if start_date in ["", "0000-00-00"]:
                    start_date = "2000-01-01"
        
        # 获取K线数据
        rs_kline = bs.query_history_k_data_plus(
            code=stock_code,
            fields="date,open,high,low,close,volume",
            start_date=start_date,
            end_date=end_date,
            frequency="d",
            adjustflag="1"
        )
        
        data_list = []
        while (rs_kline.error_code == "0") and rs_kline.next():
            data_list.append(rs_kline.get_row_data())
        
        bs.logout()
        
        if not data_list:
            raise ValueError(f"未获取到 {stock_code} 的数据")
        
        # 处理数据格式
        df = pd.DataFrame(
            data_list,
            columns=["date", "open", "high", "low", "close", "volume"]
        )
        numeric_cols = ["open", "high", "low", "close", "volume"]
        df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")
        df = df.dropna().reset_index(drop=True)
        
        print(f"成功获取 {stock_code} 数据：{len(df)} 条（{start_date} 至 {end_date}）")
        return df
    
    def save_to_csv(self, df: pd.DataFrame, stock_code: str, save_dir: str = "./stock_data") -> str:
        """保存数据到CSV"""
        os.makedirs(save_dir, exist_ok=True)
        csv_path = os.path.join(save_dir, f"{stock_code}.csv")
        df[["date", "open", "high", "low", "close", "volume"]].to_csv(csv_path, index=False)
        print(f"保存至：{csv_path}")
        return csv_path


# 使用示例
if __name__ == "__main__":
    handler = BaostockDataHandler()
    try:
        # 尝试获取恒生电子数据
        stock_code = "sh.600812"
        df = handler.fetch_ohlcv(stock_code)
        handler.save_to_csv(df, stock_code)
    except Exception as e:
        print(f"处理失败：{str(e)}")


login success!
logout success!
成功获取 sh.600812 数据：7694 条（1994-01-14 至 2025-09-08）
保存至：./stock_data\sh.600812.csv


In [25]:
# 在conda环境下运行 
!python ../qlib/scripts/dump_bin.py dump_all --data_path D:/workspace/xiaoyao/qlibusing/stock_data --qlib_dir D:/workspace/xiaoyao/qlib_data --include_fields open,close,high,low,volume --date_field_name "date"

2025-09-08 22:40:17.813 | INFO     | __main__:_get_all_date:307 - start get all date......

  0%|          | 0/2 [00:00<?, ?it/s]
 50%|█████     | 1/2 [00:02<00:02,  2.69s/it]
100%|██████████| 2/2 [00:02<00:00,  1.50s/it]
2025-09-08 22:40:20.813 | INFO     | __main__:_get_all_date:326 - end of get all date.

2025-09-08 22:40:20.814 | INFO     | __main__:_dump_calendars:329 - start dump calendars......
2025-09-08 22:40:20.996 | INFO     | __main__:_dump_calendars:332 - end of calendars dump.

2025-09-08 22:40:20.996 | INFO     | __main__:_dump_instruments:335 - start dump instruments......
2025-09-08 22:40:21.000 | INFO     | __main__:_dump_instruments:337 - end of instruments dump.

2025-09-08 22:40:21.000 | INFO     | __main__:_dump_features:340 - start dump features......

  0%|          | 0/2 [00:00<?, ?it/s]
 50%|█████     | 1/2 [00:02<00:02,  2.21s/it]
100%|██████████| 2/2 [00:02<00:00,  1.24s/it]
2025-09-08 22:40:23.479 | INFO     | __main__:_dump_features:347 - end of features d