<a href="https://colab.research.google.com/github/microsoft/qlib/blob/main/examples/workflow_by_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# qlib

In [1]:
#  Copyright (c) Microsoft Corporation.
#  Licensed under the MIT License.
import sys, site
from pathlib import Path
scripts_dir = Path.cwd().parent.joinpath("scripts")
print(scripts_dir)
assert scripts_dir.joinpath("get_data.py").exists()

/mnt/d/code/quant/qlib/scripts


In [2]:
import sys
sys.version

'3.8.13 (default, Mar 28 2022, 11:38:47) \n[GCC 7.5.0]'

In [3]:
################################# NOTE #################################
#  Please be aware that if colab installs the latest numpy and pyqlib  #
#  in this cell, users should RESTART the runtime in order to run the  #
#  following cells successfully.                                       #
########################################################################

try:
    import qlib
except ImportError:
    # install qlib
    ! pip install --upgrade numpy
    ! pip install pyqlib
    # reload
    site.main()

# scripts_dir = Path.cwd().parent.joinpath("scripts")
if not scripts_dir.joinpath("get_data.py").exists():
    # download get_data.py script
    scripts_dir = Path("~/tmp/qlib_code/scripts").expanduser().resolve()
    scripts_dir.mkdir(parents=True, exist_ok=True)
    import requests
    with requests.get("https://raw.githubusercontent.com/microsoft/qlib/main/scripts/get_data.py") as resp:
        with open(scripts_dir.joinpath("get_data.py"), "wb") as fp:
            fp.write(resp.content)

In [4]:
import qlib
import pandas as pd
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict


In [5]:
# use default data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
    sys.path.append(str(scripts_dir))
    from get_data import GetData
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)

[18564:MainThread](2022-04-20 08:41:38,030) INFO - qlib.Initialization - [config.py:402] - default_conf: client.
[18564:MainThread](2022-04-20 08:41:38,035) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[18564:MainThread](2022-04-20 08:41:38,036) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/home/jeffye/.qlib/qlib_data/cn_data')}


In [6]:
market = "csi300"
benchmark = "SH000300"

# train model

In [7]:
###################################
# train model
###################################
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2022-04-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
    "infer_processors": [
      {
        "class": "RobustZScoreNorm",
        "kwargs": {
          "fields_group": "feature",
          "clip_outlier": True
        }
      },
      {
        "class": "Fillna",
        "kwargs": {
          "fields_group": "feature"
        }
      }
    ],
    "learn_processors": [
      {
        "class": "CSRankNorm",
        "kwargs": {
          "fields_group": "label"
        }
      }
    ],
    "label": [
      "Ref($close, -2) / Ref($close, -1) - 1"
    ]
}

# port_analysis_config = {
#     "executor": {
#         "class": "SimulatorExecutor",
#         "module_path": "qlib.backtest.executor",
#         "kwargs": {
#             "time_per_step": "day",
#             "generate_portfolio_metrics": True,
#         },
#     },
#     "strategy": {
#         "class": "TopkDropoutStrategy",
#         "module_path": "qlib.contrib.strategy.signal_strategy",
#         "kwargs": {
#             "model": model,
#             "dataset": dataset,
#             "topk": 50,
#             "n_drop": 5,
#         },
#     },
#     "backtest": {
#         "start_time": "2017-01-01",
#         "end_time": "2020-08-01",
#         "account": 100000000,
#         "benchmark": benchmark,
#         "exchange_kwargs": {
#             "freq": "day",
#             "limit_threshold": 0.095,
#             "deal_price": "close",
#             "open_cost": 0.0005,
#             "close_cost": 0.0015,
#             "min_cost": 5,
#         },
#     },
# }

task = {
   "model": {
      "class": "TRAModel",
      "module_path": "qlib.contrib.model.pytorch_tra",
       "GPU": 1,
      "kwargs": {
        "tra_config": {
          "num_states": 3,
          "rnn_arch": "LSTM",
          "hidden_size": 32,
          "num_layers": 1,
          "dropout": 0.0,
          "tau": 1.0,
          "src_info": "LR_TPE"
        },
        "model_config": {
          "input_size": 6,
          "hidden_size": 64,
          "num_layers": 2,
          "rnn_arch": "LSTM",
          "use_attn": True,
          "dropout": 0.0
        },
        "model_type": "RNN",
        "lr": 0.001,
        "n_epochs": 100,
        "max_steps_per_epoch": None,
        "early_stop": 20,
        "logdir": "output/Alpha360",
        "seed": 0,
        "lamb": 1.0,
        "rho": 0.99,
        "alpha": 0.5,
        "transport_method": "router",
        "memory_mode": "sample",
        "eval_train": False,
        "eval_test": True,
        "pretrain": True,
        "init_state": None,
        "freeze_model": False,
        "freeze_predictors": False
      }
    },
    "dataset": {
      "class": "MTSDatasetH",
      "module_path": "qlib.contrib.data.dataset",
      "kwargs": {
        "handler": {
          "class": "Alpha360",
          "module_path": "qlib.contrib.data.handler",
          "kwargs": data_handler_config,
        },
        "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2021-01-01", "2022-03-31"),
            },
        "seq_len": 60,
        "horizon": 2,
        "input_size": 6,
        "num_states": 3,
        "batch_size": 1024,
        "n_samples": None,
        "memory_mode": "sample",
        "drop_last": True
      }
    },
  # "record": [
  #     {
  #       "class": "SignalRecord",
  #       "module_path": "qlib.workflow.record_temp",
  #       "kwargs": {
  #         "model": model,
  #         "dataset": dataset
  #       }
  #     },
  #     {
  #       "class": "SigAnaRecord",
  #       "module_path": "qlib.workflow.record_temp",
  #       "kwargs": {
  #         "ana_long_short": False,
  #         "ann_scaler": 252
  #       }
  #     },
  #     {
  #       "class": "PortAnaRecord",
  #       "module_path": "qlib.workflow.record_temp",
  #       "kwargs": {
  #         "config": {
  #           "strategy": {
  #             "class": "TopkDropoutStrategy",
  #             "module_path": "qlib.contrib.strategy",
  #             "kwargs": {
  #               "signal": [
  #                 model,
  #                 dataset
  #               ],
  #               "topk": 50,
  #               "n_drop": 5
  #             }
  #           },
  #           "backtest": {
  #             "config": port_analysis_config
  #           }
  #         }
  #       }
  #     }
  #   ]
}

# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# start exp to train model
# with R.start(experiment_name="TRAModel_train_model"):
#     R.log_params(**flatten_dict(task))
#     model.fit(dataset)
#     R.save_objects(trained_model=model)
#     rid = R.get_recorder().id


Please install necessary libs for CatBoostModel.


  from pandas import MultiIndex, Int64Index
[18564:MainThread](2022-04-20 08:41:41,786) INFO - qlib.TRA - [pytorch_tra.py:140] - init TRAModel...
[18564:MainThread](2022-04-20 08:41:47,052) INFO - qlib.TRA - [pytorch_tra.py:170] - # model params: 55936
[18564:MainThread](2022-04-20 08:41:47,053) INFO - qlib.TRA - [pytorch_tra.py:171] - # tra params: 5606


RNN(
  (rnn): LSTM(6, 64, num_layers=2, batch_first=True)
  (W): Linear(in_features=64, out_features=64, bias=True)
  (u): Linear(in_features=64, out_features=1, bias=False)
)
TRA(
  (predictors): Linear(in_features=128, out_features=3, bias=True)
  (router): LSTM(3, 32, batch_first=True)
  (fc): Linear(in_features=160, out_features=3, bias=True)
)


[18564:MainThread](2022-04-20 08:42:15,427) INFO - qlib.timer - [log.py:113] - Time cost: 28.371s | Loading data Done
  result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
[18564:MainThread](2022-04-20 08:44:14,423) INFO - qlib.timer - [log.py:113] - Time cost: 117.149s | RobustZScoreNorm Done
[18564:MainThread](2022-04-20 08:45:54,785) INFO - qlib.timer - [log.py:113] - Time cost: 100.360s | Fillna Done
[18564:MainThread](2022-04-20 08:45:57,883) INFO - qlib.timer - [log.py:113] - Time cost: 0.806s | CSRankNorm Done
[18564:MainThread](2022-04-20 08:45:57,884) INFO - qlib.timer - [log.py:113] - Time cost: 222.455s | fit & process data Done
[18564:MainThread](2022-04-20 08:45:57,885) INFO - qlib.timer - [log.py:113] - Time cost: 250.829s | Init data Done


In [8]:
import datetime
import numpy as np
print(type(dataset._data))
print(type(dataset._daily_index))
print(type(dataset._daily_index.values), dataset._daily_index.dtype)

from_date = datetime.date(2002,3,17)

# to_date=from_date+datetime.timedelta(days=1)
dataset._daily_index.values > pd.Timestamp(from_date)

<class 'numpy.ndarray'>
<class 'pandas.core.series.Series'>
<class 'numpy.ndarray'> datetime64[ns]


array([ True,  True,  True, ...,  True,  True,  True])

In [9]:
from qlib.data.dataset.handler import DataHandlerLP
label_train, label_valid = dataset.prepare(
    ["train", "valid"],
    # col_set=["label"],
    # data_key=DataHandlerLP.DK_R,
)
# self.fit_thresh(label_train)
# df_train, df_valid = dataset.prepare(
#     ["train", "valid"],
#     col_set=["feature", "label"],
#     data_key=DataHandlerLP.DK_L,
# )

scl= ('2008-01-01', '2014-12-31') 2008-01-01
scl= ('2015-01-01', '2016-12-31') 2015-01-01


In [10]:
# print(label_valid.iloc[1])

# prediction, backtest & analysis

In [11]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2021-01-01",
        "end_time": "2022-03-31",
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# # backtest and analysis
# with R.start(experiment_name="backtest_analysis"):
#     recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
#     model = recorder.load_object("trained_model")

#     # prediction
#     recorder = R.get_recorder()
#     ba_rid = recorder.id
#     sr = SignalRecord(model, dataset, recorder)
#     sr.generate()

#     # backtest & analysis
#     par = PortAnaRecord(recorder, port_analysis_config, "day")
#     par.generate()


In [12]:
# backtest and analysis
with R.start(experiment_name="TRAModel_backtest_analysis", resume=True):
    rid = "93ce1a9cf5a84ef981d1f7f5baa0d082"
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")
    model.fitted = True
    port_analysis_config["strategy"]["kwargs"]['model'] = model
    # due to bug of qlib 
    import os
    print(model.logdir)
    os.path.exists(model.logdir)
    try:
        from torch.utils.tensorboard import SummaryWriter
    except ImportError:
        SummaryWriter = None
        print("SummaryWriter = None")
    print(SummaryWriter)
    model._writer = SummaryWriter(log_dir=model.logdir)  # here is the bug in pytorch_tra.py L134
    ##############################

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    # par = PortAnaRecord(recorder, port_analysis_config, "day")
    # par.generate()

[18564:MainThread](2022-04-20 08:46:07,050) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7fd5c2c92fa0>
[18564:MainThread](2022-04-20 08:46:07,236) INFO - qlib.workflow - [exp.py:257] - Experiment 4 starts running ...
[18564:MainThread](2022-04-20 08:46:07,679) INFO - qlib.workflow - [recorder.py:293] - Recorder c8cd1d9bca3045aca03b2f54d8945b8e starts running under Experiment 4 ...


output/Alpha360
<class 'torch.utils.tensorboard.writer.SummaryWriter'>
scl= ('2021-01-01', '2022-03-31') 2021-01-01


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, s

'The following are prediction results of the TRAModel model.'
                          score     label   score_0   score_1   score_2
datetime   instrument                                                  
2021-01-04 SH600000    0.018230  0.818243  0.049377  0.012907  0.018230
           SH600004   -0.110520 -1.203987 -0.082736 -0.101145 -0.110520
           SH600009   -0.131467  0.151959 -0.076913 -0.096610 -0.131467
           SH600010   -0.018740 -0.005845  0.005471 -0.024033 -0.018740
           SH600011    0.031496  0.385743  0.023085  0.010871  0.031496


[18564:MainThread](2022-04-20 08:46:13,108) INFO - qlib.timer - [log.py:113] - Time cost: 0.000s | waiting `async_log` Done


In [13]:
port_analysis_config["strategy"]["kwargs"]['model'] = model # 之前指定的model.fitted = False, 所以需要load_model后的修改。
port_analysis_config['backtest']["end_time"] = "2022-03-29"
par = PortAnaRecord(recorder, port_analysis_config, "day")
par.generate()

[18564:MainThread](2022-04-20 08:46:13,157) INFO - qlib.backtest caller - [__init__.py:83] - Create new exchange


scl= ('2021-01-01', '2022-03-31') 2021-01-01


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
  result = _VF.lstm(input, hx, self._flat_weights, s

backtest loop:   0%|          | 0/243 [00:00<?, ?it/s]

  return np.nanmean(self.data)
  return np.nanmean(self.data)


IndexError: index 4132 is out of bounds for axis 0 with size 4132

# Analysis: Evaluation & Results Analysis, not in Experiment Manager
https://qlib.readthedocs.io/en/latest/component/report.html

In [14]:
from qlib.backtest import backtest, executor
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy
import qlib.contrib.report as qcr
from qlib.utils.time import Freq
from qlib.utils import flatten_dict

pred_score = par.load("pred.pkl")

CSI300_BENCH = "SH000300"
FREQ = "day"
STRATEGY_CONFIG = {
    "topk": 50,
    "n_drop": 5,
    # pred_score, pd.Series
    "signal": pred_score,
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2021-01-01",
    "end_time": "2021-12-31",
    "account": 100000000,
    "benchmark": CSI300_BENCH,
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.0015,
        "min_cost": 5,
    },
}

# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
# backtest info
report_normal_df, positions_normal = portfolio_metric_dict.get(analysis_freq)

qcr.analysis_position.report_graph(report_normal_df)

[18564:MainThread](2022-04-20 08:47:01,029) INFO - qlib.backtest caller - [__init__.py:83] - Create new exchange


backtest loop:   0%|          | 0/243 [00:00<?, ?it/s]

  return np.nanmean(self.data)
  return np.nanmean(self.data)


IndexError: index 4132 is out of bounds for axis 0 with size 4132

In [15]:
# portfolio_metric_dict, indicator_dict = backtest()
from qlib.backtest import get_strategy_executor, backtest_loop
trade_strategy, trade_executor = get_strategy_executor(
        executor=executor_obj, strategy=strategy_obj, **backtest_config
    )


[18564:MainThread](2022-04-20 08:47:23,420) INFO - qlib.backtest caller - [__init__.py:83] - Create new exchange


In [18]:
print(trade_strategy, trade_executor)

<qlib.contrib.strategy.signal_strategy.TopkDropoutStrategy object at 0x7fd5d440a340> <qlib.backtest.executor.SimulatorExecutor object at 0x7fd344163f70>


In [57]:
# portfolio_metrics, indicator = backtest_loop("2021-01-01", "2021-12-31", trade_strategy, trade_executor)

from tqdm import tqdm
from qlib.backtest.utils import TradeCalendarManager
start_time = "2021-01-01"
end_time = "2021-12-31"
# end_time = "2021-08-01"
# start_time = "2017-01-01"
# end_time = "2020-08-01"
calendar_manager = TradeCalendarManager("day", start_time, end_time)


for i in range(calendar_manager.get_trade_len()):
    trade_start_time, trade_end_time = calendar_manager.get_step_time(i)
    print(trade_start_time, trade_end_time, i, calendar_manager.start_index + i)


# trade_executor.reset(start_time=start_time, end_time=end_time)
# trade_strategy.reset(level_infra=trade_executor.get_level_infra())
# print(trade_executor.trade_calendar, trade_executor.trade_calendar.get_trade_len())
# print(trade_strategy.trade_calendar, trade_strategy.trade_calendar.get_trade_len())
# print(trade_strategy.trade_calendar.freq, trade_strategy.trade_calendar.start_time, trade_strategy.trade_calendar.end_time)
# print(trade_strategy.trade_calendar.start_index, trade_strategy.trade_calendar.end_index)

# # trade_strategy.trade_calendar.end_index = 4130
# for i in range(trade_strategy.trade_calendar.get_trade_len()):
#     trade_start_time, trade_end_time = trade_strategy.trade_calendar.get_step_time(i)
#     print(trade_start_time, trade_end_time, i, trade_strategy.trade_calendar.start_index + i)


# with tqdm(total=trade_executor.trade_calendar.get_trade_len()-1, desc="backtest loop") as bar:
# with tqdm(total=trade_strategy.trade_calendar.get_trade_len()-1, desc="backtest loop") as bar:
#     print('start one step')
#     _execute_result = None
#     while not trade_executor.finished():
#         print("calendar step", trade_executor.trade_calendar.trade_step, trade_executor.trade_calendar.trade_len, trade_strategy.trade_calendar.get_step_time(trade_executor.trade_calendar.trade_step))
#         _trade_decision = trade_strategy.generate_trade_decision(_execute_result)
#         # print("trade_decision:", _trade_decision)
#         _execute_result = trade_executor.collect_data(_trade_decision, level=0)
#         print("innner:", _execute_result)
#         trade_executor.trade_calendar.step()
#         # for _execute_result in trade_executor.collect_data(_trade_decision, level=0):
#         #     print("innner:", _execute_result)
#         bar.update(1)
            


2021-01-04 00:00:00 2021-01-04 23:59:59 0 3889
2021-01-05 00:00:00 2021-01-05 23:59:59 1 3890
2021-01-06 00:00:00 2021-01-06 23:59:59 2 3891
2021-01-07 00:00:00 2021-01-07 23:59:59 3 3892
2021-01-08 00:00:00 2021-01-10 23:59:59 4 3893
2021-01-11 00:00:00 2021-01-11 23:59:59 5 3894
2021-01-12 00:00:00 2021-01-12 23:59:59 6 3895
2021-01-13 00:00:00 2021-01-13 23:59:59 7 3896
2021-01-14 00:00:00 2021-01-14 23:59:59 8 3897
2021-01-15 00:00:00 2021-01-17 23:59:59 9 3898
2021-01-18 00:00:00 2021-01-18 23:59:59 10 3899
2021-01-19 00:00:00 2021-01-19 23:59:59 11 3900
2021-01-20 00:00:00 2021-01-20 23:59:59 12 3901
2021-01-21 00:00:00 2021-01-21 23:59:59 13 3902
2021-01-22 00:00:00 2021-01-24 23:59:59 14 3903
2021-01-25 00:00:00 2021-01-25 23:59:59 15 3904
2021-01-26 00:00:00 2021-01-26 23:59:59 16 3905
2021-01-27 00:00:00 2021-01-27 23:59:59 17 3906
2021-01-28 00:00:00 2021-01-28 23:59:59 18 3907
2021-01-29 00:00:00 2021-01-31 23:59:59 19 3908
2021-02-01 00:00:00 2021-02-01 23:59:59 20 3909
20

IndexError: index 4132 is out of bounds for axis 0 with size 4132

# analyze graphs

In [None]:
from pprint import pprint

import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy

CSI300_BENCH = "SH000300"
STRATEGY_CONFIG = {
    "topk": 50,
    "n_drop": 5,
    # pred_score, pd.Series
    "signal": pred_score,
}


strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
report_normal, positions_normal = backtest_daily(
    start_time="2021-01-01", end_time="2022-04-01", strategy=strategy_obj
)
analysis = dict()
# default frequency will be daily (i.e. "day")
analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])

analysis_df = pd.concat(analysis)  # type: pd.DataFrame
pprint(analysis_df)

In [None]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

## analysis position

### report

In [None]:
analysis_position.report_graph(report_normal_df)

### risk analysis

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

## analysis model

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ['label']

### score IC

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

### model performance

In [None]:
analysis_model.model_performance_graph(pred_label)