In [6]:
import pandas as pd
import subprocess
import qlib

def get_test_df():
    # 示例因子数据（日期 | 个股代码 | 自定义因子）
    df = pd.DataFrame({
        'datetime': ['2023-01-03', '2023-01-03', '2023-01-04', '2023-01-04'],
        'instrument': ['600000.SH', '000001.SZ', '600000.SH', '000001.SZ'],
        'Alpha_Test_001': [0.12, -0.05, 0.18, 0.15],  # 自定义因子1
        'Beta_Volity': [0.25, 0.31, 0.22, 0.27]  # 自定义因子2
    })
    df = df.set_index(['datetime', 'instrument'])
    return df



def save_features_to_qlib(df: pd.DataFrame, csv_path: str, fac_path: str, qlib_dir: str, 
                          freq: str = 'day', date_field_name: str = 'datetime', symbol_field_name: str = 'instrument'):
    
    # 创建存储目录
    import os
    try:
        os.mkdir(csv_path)
    except:
        pass
    try:
        os.mkdir(fac_path)
    except:
        pass
    try:
        os.mkdir(qlib_dir)
    except:
        pass
    
    # 按个股拆分保存CSV
    
    # 转换为Qlib标准结构（关键步骤）
    df_csv = df.unstack().swaplevel(axis=1).sort_index(axis=1)    
    for symbol in df_csv.columns.get_level_values(0).unique():
        symbol_df = df_csv[symbol].dropna(how='all')        
        symbol_df.to_csv(f"{csv_path}/{symbol}.csv")
    print(f"特征数据已按标的代码存储到CSV中...")


    # 按因子拆分保存csv，供因子入库使用
    df_fac = df.unstack(level='instrument').sort_index(axis=1)
    for factor in df_fac.columns.get_level_values(0).unique():
        factor_df = df[factor].dropna(how='all')  
        factor_df.to_csv(f"{fac_path}/{factor}.csv")
        # 其它格式存储（pkl，parquet），
        # factor_df.to_pickle(f"{fac_path}/{factor}.pkl")
        # factor_df.to_frame().to_parquet(f"{fac_path}/{factor}.parquet", engine='pyarrow')
    print(f"特征数据已按特征名称存储到CSV中...")

    # 将个股CSV转换为Qlib二进制格式
    python_path = "D:/anaconda/envs/py10/python.exe" # 修改为实际的Python路径
    dump_bin_path = "C:/Users/tantra/Desktop/data/.venv/Lib/site-packages/qlib/scripts/dump_bin.py" # 修改为实际的dump_bin.py路径
    subprocess.run(f"{python_path} {dump_bin_path} dump_all --csv_path {csv_path} --qlib_dir {qlib_dir} --freq {freq} --date_field_name {date_field_name} --symbol_field_name {symbol_field_name}")
    print(f"CSV特征数据转QLib数据格式完成...")

def qlib_test(qlib_dir: str):
    import qlib
    from qlib.data import D

    qlib.init(provider_uri=qlib_dir, region="cn")
    # 查询特定因子
    data = D.features(
        instruments=["600000.SH", "000001.SZ"],
        fields=["$Alpha_Test_001", "$Beta_Volity"],
        start_time="2023-01-03",
        end_time="2023-01-04"
    )
    print(data)

In [9]:
if __name__ == '__main__':
    # 获取测试特征数据
    df = get_test_df()    
    # 保存特征数据并转化为QLib可识别格式
    save_features_to_qlib(df, 
                          csv_path="features_csv",
                          fac_path="features_fac",
                          qlib_dir="features_qlib")
    # 测试QLib读取
    qlib_test(qlib_dir="features_qlib")

特征数据已按标的代码存储到CSV中...
特征数据已按特征名称存储到CSV中...


[7480:MainThread](2025-04-29 17:51:51,823) INFO - qlib.Initialization - [config.py:420] - default_conf: client.
[7480:MainThread](2025-04-29 17:51:51,828) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[7480:MainThread](2025-04-29 17:51:51,829) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': WindowsPath('C:/Users/tantra/Desktop/data/features_qlib')}


CSV特征数据转QLib数据格式完成...
                       $Alpha_Test_001  $Beta_Volity
instrument datetime                                 
000001.SZ  2023-01-03            -0.05          0.31
           2023-01-04             0.15          0.27
600000.SH  2023-01-03             0.12          0.25
           2023-01-04             0.18          0.22
