<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.utils import init_instance_by_config, flatten_dict
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord, SigAnaRecord
import qlib
import pandas as pd
from qlib.contrib.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
    backtest_daily as normal_backtest,
    risk_analysis,
)
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data.dataset.loader import QlibDataLoader
from qlib.data.dataset.processor import ZScoreNorm, Fillna, CSZScoreNorm, DropnaLabel

In [None]:
# retrive the data
# run this command only once
#!python qlib-main/scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

In [None]:
qlib.init()  # must init before running all the other commands

market = "csi300"  # or csi500
benchmark = "SH000300"  # must be one of the codes included in the market
train = ["2013-01-01", "2015-01-29"]
valid = ["2015-01-31", "2016-12-31"]
test = ["2017-01-01", "2020-08-01"]

In [None]:
# all alphas operators: https://github.com/microsoft/qlib/blob/main/qlib/data/ops.py
# all processors: https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py

data_handler_config = {
    "start_time": train[0],
    "end_time": test[1],
    "fit_start_time": train[0],
    "fit_end_time": train[1],
    "instruments": market,
    "data_loader": {
        "class": QlibDataLoader,
        "kwargs": {
            "config": {
                "feature": {
                    ("Resi($close, 15)/$close", "Std(Abs($close/Ref($close, 1)-1)*$volume, 5)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, 5)+1e-12)", "Rsquare($close, 5)", "($high-$low)/$open", "Rsquare($close, 10)", "Corr($close, Log($volume+1), 5)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 5)", "Corr($close, Log($volume+1), 10)", "Rsquare($close, 20)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 60)", "Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), 10)", "Corr($close, Log($volume+1), 20)", "(Less($open, $close)-$low)/$open"),
                    ("RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"),
                },
                "label": {
                    ("Ref($close, -2)/Ref($close, -1) - 1"),
                    ("LABEL0"),
                },
            "freq": "day",
            },
        },
    },
    "learn_processors": {
        "class": DropnaLabel,
        "class": CSZScoreNorm,  # cross sectional standardization (z-score)
        "kwargs": {
            "fields_group": "label",
        },
    },
}

In [None]:
task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": tuple(train),
                "valid": tuple(valid),
                "test": tuple(test),
            },
        },
    },
}

port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "signal": "<PRED>",
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": test[0],
        "end_time": test[1],
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

In [None]:
# model and dataset initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

In [None]:
# quick look at the dataset
train_sampler = dataset.prepare('train')
train_sampler

In [None]:
# train and test the model
with R.start(experiment_name="workflow"):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# backtest and analysis
with R.start(experiment_name='exp', recorder_id=rid, resume=True):

    # signal-based analysis
    rec = R.get_recorder()
    sar = SigAnaRecord(rec)  # get IC, ICIR, Rank IC, Rank ICIR
    sar.generate()
    
    #  portfolio-based analysis: backtest
    par = PortAnaRecord(rec, port_analysis_config, "day")  # get mean, std, annualized_return, information_ratio, max_drawdown
    par.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name='exp')
# load previous results
pred_df = recorder.load_object("pred.pkl")
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

In [None]:
# https://qlib.readthedocs.io/en/latest/component/report.html#graphical-result
analysis_position.report_graph(report_normal_df)

In [None]:
# https://qlib.readthedocs.io/en/latest/component/report.html#graphical-result
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ['label']
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
# https://qlib.readthedocs.io/en/latest/component/report.html#graphical-result
analysis_position.score_ic_graph(pred_label)

In [None]:
# https://qlib.readthedocs.io/en/latest/component/report.html#graphical-result
analysis_model.model_performance_graph(pred_label)