In [1]:
#  Copyright (c) Microsoft Corporation.
#  Licensed under the MIT License.

# -*- coding: utf-8 -*-
# @Author   : Yi Li (liyi_best@foxmail.com)
# @Time     : 2022/8/24 8:49
# @File     : 快速入门.ipynb
# @Project  : ai_quant_trade
# Copyright (c) Personal 2022 Yi Li
# Function Description: Fast tutorial for qlib
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  本样例基于qlib/examples/workflow_by_code.py，进行了小修改和注释加入
#  本样例仅是一个快速入门样例，详细使用查看后续章节

In [1]:
import sys, site
from pathlib import Path

################################# NOTE #################################
#  Please be aware that if colab installs the latest numpy and pyqlib  #
#  in this cell, users should RESTART the runtime in order to run the  #
#  following cells successfully.                                       #
########################################################################

# try:
#     import qlib
# except ImportError:
#     # install qlib
#     ! pip install --upgrade numpy
#     ! pip install pyqlib
#     # reload
#     site.main()
#
# scripts_dir = Path.cwd().parent.joinpath("scripts")
# if not scripts_dir.joinpath("get_data.py").exists():
#     # download get_data.py script
#     scripts_dir = Path("~/tmp/qlib_code/scripts").expanduser().resolve()
#     scripts_dir.mkdir(parents=True, exist_ok=True)
#     import requests
#     with requests.get("https://raw.githubusercontent.com/microsoft/qlib/main/scripts/get_data.py") as resp:
#         with open(scripts_dir.joinpath("get_data.py"), "wb") as fp:
#             fp.write(resp.content)

In [1]:
import qlib
import pandas as pd
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

# 1. 数据下载
* 通过qlib库获取数据：from qlib.tests.data import GetData
* 数据从微软官网获取：REMOTE_URL = "http://fintech.msra.cn/stock_data/downloads"
* 下载后默认存放在"~/.qlib/qlib_data/cn_data"
* 压缩包命名格式：20220701081835_qlib_data_simple_cn_1d_latest
* 之后自动解压

In [2]:
# use default data
# NOTE: 也可以通过qlib源码运行获取数据: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
    # sys.path.append(str(scripts_dir))
    from qlib.tests.data import GetData
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)

[10944:MainThread](2022-08-31 13:12:51,918) INFO - qlib.Initialization - [config.py:402] - default_conf: client.
[10944:MainThread](2022-08-31 13:12:51,926) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[10944:MainThread](2022-08-31 13:12:51,926) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': WindowsPath('C:/Users/pc/.qlib/qlib_data/cn_data')}


In [3]:
market = "csi300"  # 沪深300股票池代码，在instruments文件夹下有对应的csi300.txt
benchmark = "SH000300"

# 2. 模型训练

In [4]:
# 数据配置信息
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
}

# 模型及数据配置信息
# qlib.contrib.model.gbdt继承了qlib/model/base.py
task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2020-08-01"),
            },
        },
    },
}

# 数据和模型初始化
# 使用上面json中定义的模块路径和名称初始化类
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id


ImportError: cannot import name 'abc' from 'bson.py3compat' (D:\Users\pc\anaconda3\lib\site-packages\bson\py3compat.py)

# prediction, backtest & analysis

In [5]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2017-01-01",
        "end_time": "2020-08-01",
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "day")
    par.generate()


NameError: name 'model' is not defined

# analyze graphs

In [None]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

## analysis position

### report

In [None]:
analysis_position.report_graph(report_normal_df)

### risk analysis

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

## analysis model

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ['label']

### score IC

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

### model performance

In [None]:
analysis_model.model_performance_graph(pred_label)