In [1]:
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
    backtest as normal_backtest,
    risk_analysis,
)
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict
import platform

In [6]:
# use default data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
# 使用默认的本地数据
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if(platform.system()=='Windows'):
    provider_uri = "D:\qlib_data\cn_data"  # target_dir
else:
    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir

print(provider_uri)


# 使用本地数据和中国市场初始化
qlib.init(provider_uri=provider_uri, region=REG_CN)

[15172:MainThread](2021-05-23 14:22:53,334) INFO - qlib.Initialization - [config.py:276] - default_conf: client.
D:\qlib_data\cn_data
[15172:MainThread](2021-05-23 14:22:55,378) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.
[15172:MainThread](2021-05-23 14:22:55,380) INFO - qlib.Initialization - [__init__.py:47] - data_path=D:\qlib_data\cn_data


In [8]:


from qlib.data import D

# 获取交易日期和全部股票代码
trade_date = D.calendar(start_time="2020-01-01", end_time="2020-11-30", freq="day")

# 展现前5个交易日期
print(trade_date[:5])


# 获取沪深300全部股票代码
instruments = D.instruments(market="csi300")
sock_list = D.list_instruments(instruments=instruments, start_time="2020-01-01", end_time="2020-12-30",as_list=True)

# 展示最后5个股票代码
print(sock_list[:5])


[Timestamp('2020-01-02 00:00:00') Timestamp('2020-01-03 00:00:00')
 Timestamp('2020-01-06 00:00:00') Timestamp('2020-01-07 00:00:00')
 Timestamp('2020-01-08 00:00:00')]
['SH600000', 'SH600004', 'SH600009', 'SH600010', 'SH600011']


In [10]:
# 获取指定股票指定日期指定字段数据
features_df = D.features(instruments=["SZ002236"],fields=["$close","$volume"], start_time="2020-01-01",end_time="2020-12-30", freq="day")

print(features_df.head())

# fields ？？？

                          $close     $volume
instrument datetime                         
SZ002236   2020-01-02  32.685493  70629488.0
           2020-01-03  32.853664  37219624.0
           2020-01-06  33.694496  48228080.0
           2020-01-07  33.220570  40416324.0
           2020-01-08  32.028118  55896308.0


In [11]:
# 自定义股票池
from qlib.data.filter import NameDFilter, ExpressionDFilter
# 静态Filter
nameDFilter = NameDFilter(name_rule_re="SZ[0-9!]")

# 动态Filter: 后复权价格大于1元
expressoionDFilter = ExpressionDFilter(rule_expression="$close>1")

# 获取满足价格的股票
instruments = D.instruments(market="csi300",filter_pipe=[nameDFilter, expressoionDFilter])
sock_list = D.list_instruments(instruments=instruments, start_time="2020-01-01", end_time="2020-12-30",as_list=True)

# 展示最后5个股票代码
print(sock_list[:5])


['SZ000001', 'SZ000002', 'SZ000063', 'SZ000066', 'SZ000069']


In [12]:
# 生成 Alpha158 特征（当期因子）和标签（下期收益）
from qlib.contrib.data.handler import Alpha158

# 设置日期、股票池等参数
data_handler_config = {
    "start_time":"2020-01-01",      # 用来定义完整数据起止日期
    "end_time":"2020-12-30",
    "fit_start_time":"2020-01-01",  # 拟合数据起止日期（fit_start_time 和 fit_end_time）
    "fit_end_time":"2020-06-30",
    "instruments":instruments,
}

h = Alpha158(**data_handler_config)

# 获取列名(因子名称)
print(h.get_cols())

# 获取标签(收益)
Alpha158_df_label = h.fetch(col_set="label")
print(Alpha158_df_label)

[15172:MainThread](2021-05-23 15:19:27,396) INFO - qlib.timer - [log.py:81] - Time cost: 137.619s | Loading data Done
[15172:MainThread](2021-05-23 15:19:27,504) INFO - qlib.timer - [log.py:81] - Time cost: 0.093s | DropnaLabel Done
[15172:MainThread](2021-05-23 15:19:27,984) INFO - qlib.timer - [log.py:81] - Time cost: 0.479s | CSZScoreNorm Done
[15172:MainThread](2021-05-23 15:19:27,985) INFO - qlib.timer - [log.py:81] - Time cost: 0.577s | fit & process data Done
[15172:MainThread](2021-05-23 15:19:27,986) INFO - qlib.timer - [log.py:81] - Time cost: 138.209s | Init data Done
['KMID', 'KLEN', 'KMID2', 'KUP', 'KUP2', 'KLOW', 'KLOW2', 'KSFT', 'KSFT2', 'OPEN0', 'HIGH0', 'LOW0', 'VWAP0', 'ROC5', 'ROC10', 'ROC20', 'ROC30', 'ROC60', 'MA5', 'MA10', 'MA20', 'MA30', 'MA60', 'STD5', 'STD10', 'STD20', 'STD30', 'STD60', 'BETA5', 'BETA10', 'BETA20', 'BETA30', 'BETA60', 'RSQR5', 'RSQR10', 'RSQR20', 'RSQR30', 'RSQR60', 'RESI5', 'RESI10', 'RESI20', 'RESI30', 'RESI60', 'MAX5', 'MAX10', 'MAX20', 'MAX

In [13]:
# 获取标签(收益)
Alpha158_df_label = h.fetch(col_set="label")
print(Alpha158_df_label)

                         LABEL0
datetime   instrument          
2020-01-02 SZ000001   -0.006403
           SZ000002   -0.016849
           SZ000063    0.004104
           SZ000069    0.002581
           SZ000100   -0.008602
...                         ...
2020-09-25 SZ300347         NaN
           SZ300408         NaN
           SZ300413         NaN
           SZ300433         NaN
           SZ300601         NaN

[19230 rows x 1 columns]


In [14]:
# 获取特征(因子)
Alpha158_df_feature = h.fetch(col_set="feature")
print(Alpha158_df_feature)

                           KMID      KLEN     KMID2       KUP      KUP2  \
datetime   instrument                                                     
2020-01-02 SZ000001    0.013213  0.024024  0.549999  0.004805  0.200000   
           SZ000002   -0.007317  0.033232 -0.220181  0.024390  0.733943   
           SZ000063   -0.005889  0.018788 -0.313437  0.003926  0.208952   
           SZ000069   -0.010178  0.024173 -0.421052  0.008906  0.368419   
           SZ000100    0.013304  0.035477  0.374999  0.008869  0.249999   
...                         ...       ...       ...       ...       ...   
2020-09-25 SZ300347    0.009418  0.031660  0.297469  0.022242  0.702531   
           SZ300408    0.011619  0.021787  0.533331  0.008715  0.400002   
           SZ300413    0.014902  0.028392  0.524862  0.010510  0.370167   
           SZ300433   -0.015789  0.031579 -0.499997  0.000929  0.029414   
           SZ300601   -0.021212  0.063011 -0.336641  0.028770  0.456576   

                        

In [None]:
# 导入 Qlib 模块代码
import time;
import numpy as np 
import pandas as pd 

import qlib
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
    backtest as normal_backtest,
    risk_analysis,
)

from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.utils import flatten_dict

In [None]:
# 定义股票池和基准指数代码
market = instruments
benchmark = "csi300"

In [None]:
# 因子生成参数 data_handler_config 和模型训练参数 task 设置代码
data_handler_config = {
    "start_time":"2020-01-01",
    "end_time":"2020-12-30",
    "fit_start_time":"2020-01-01",
    "fit_end_time":"2020-06-30",
    "instruments":market,
}

task = {
    "model": {
        "class":"LGBModel",
        "module_path":"qlib.contrib.model.gbdt",
        "kwargs":{
            "loss":"mse",
            "colsample_bytree":0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_11": 205.6999,
            "lambda_12": 580.9768,
            "max_depth":    8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset":{
        "class": "DatasetH",
        "model_path":"qlib.data.dataset",
        "kwargs":{
            
        }
    }
}