In [2]:
## 自造表达式引擎
## 参考：
## https://blog.csdn.net/itnerd/article/details/136041182
## https://zhuanlan.zhihu.com/p/679186652
## https://blog.csdn.net/weixin_38175458/article/details/139828021
import sys
sys.path.append("..")
from utils.qlib_exp_engine import *
import re
import pandas as pd



def parse_field(field):
    # Following patterns will be matched:
    # - $close -> Feature("close")
    # - $close5 -> Feature("close5")
    # - $open+$close -> Feature("open")+Feature("close")

    if not isinstance(field, str):
        field = str(field)

    for pattern, new in [
        (rf"\$([\w]+)", r'Feature("\1")'),
    ]:  # Features  # Operators
        field = re.sub(pattern, new, field)
    return field


def compute_feature(df, exp):
    exp = eval(parse_field(exp))
    return exp.load(df, df.index[0], df.index[-1])

def compute_features(df, exps, labels):
    data = dict()
    for label, exp in zip(labels, exps):
        # print(label,exp)
        data[label] = compute_feature(df, exp)
    if len(data) > 1:
        return pd.concat(data, axis=1)
    else:
        return pd.DataFrame(data)

In [12]:
import qlib
import pandas as pd
from qlib.contrib.data.handler import Alpha158
from qlib.data.dataset.handler import DataHandlerLP
from qlib.data.dataset.loader import StaticDataLoader, QlibDataLoader, DLWParser
from qlib.data.dataset import DatasetH

## 特征与标签配置
feature_config = Alpha158.parse_config_to_fields(dict(price={"windows": [0],"feature": ["OPEN", "HIGH", "LOW", "VWAP"]},rolling={},kbar={}))
label_config = (["Ref($close, -1)/$close-1"],['LABEL0']) 


## 加载数据
# qlib.init(provider_uri = "../data/raw/qlib_data/cn_data")
# dl = QlibDataLoader(config = {"feature":feature_config, "label":label_config})
df = pd.read_csv('../data/cleaned/csi300_stock_feats.csv', index_col=["datetime"])
instruments = df.groupby(by='instrument')
# for name, instrument in instruments:
#     print(name)
#     df2=compute_features(instrument, *feature_config)
df = df.loc[df['instrument'] == 'SH600005']
df2=compute_features(df, *feature_config)

In [None]:
df = compute_features(df, *label_config)
dl = StaticDataLoader(config=df)

## 创建数据处理器
dh = DataHandlerLP(
    instruments='csi300', 
    start_time='20160101', 
    end_time='20191231',
    data_loader=dl
)

## 创建数据集
ds = DatasetH(handler=dh,segments={"train": ('20160101', '20171231'), "valid": ('20170101', '20181231'),"test": ('20180101', '20191231')})

In [None]:
test = instruments.get_group('SH600000')
print(compute_features(test, *feature_config))

In [None]:
dl.load(instruments="csi300")

In [None]:
from qlib.contrib.model.gbdt import LGBModel
model = LGBModel(
    loss="mse",
    colsample_bytree=0.8879,
    learning_rate=0.0421,
    subsample=0.8789,
    lambda_l1=205.6999,
    lambda_l2=580.9768,
    max_depth=8,
    num_leaves=210,
    num_threads=20,
)

In [None]:
# 训练模型
from qlib.workflow import R
with R.start(uri=None):
    model.fit(ds)

In [None]:
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy

# 初始化模型和策略配置
STRATEGY_CONFIG = {"signal": model.predict(ds),"topk": 50,"n_drop": 5,}
# 设置策略回测参数
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# 执行回测
report_normal, positions_normal = backtest_daily(
    start_time="20210101", end_time="20211231", strategy=strategy_obj
)
# 分析回测结果
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])
analysis_df = pd.concat(analysis)  # type: pd.DataFrame
print(analysis_df)