In [1]:
import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
from qlib.contrib.evaluate import (
    backtest as normal_backtest,
    risk_analysis,
)
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict
import platform

In [6]:
# use default data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
# 使用默认的本地数据
provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
if(platform.system()=='Windows'):
    provider_uri = "D:\qlib_data\cn_data"  # target_dir
else:
    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir

print(provider_uri)


# 使用本地数据和中国市场初始化
qlib.init(provider_uri=provider_uri, region=REG_CN)

[15172:MainThread](2021-05-23 14:22:53,334) INFO - qlib.Initialization - [config.py:276] - default_conf: client.
D:\qlib_data\cn_data
[15172:MainThread](2021-05-23 14:22:55,378) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.
[15172:MainThread](2021-05-23 14:22:55,380) INFO - qlib.Initialization - [__init__.py:47] - data_path=D:\qlib_data\cn_data


In [8]:


from qlib.data import D

# 获取交易日期和全部股票代码
trade_date = D.calendar(start_time="2020-01-01", end_time="2020-11-30", freq="day")

# 展现前5个交易日期
print(trade_date[:5])


# 获取沪深300全部股票代码
instruments = D.instruments(market="csi300")
sock_list = D.list_instruments(instruments=instruments, start_time="2020-01-01", end_time="2020-12-30",as_list=True)

# 展示最后5个股票代码
print(sock_list[:5])


[Timestamp('2020-01-02 00:00:00') Timestamp('2020-01-03 00:00:00')
 Timestamp('2020-01-06 00:00:00') Timestamp('2020-01-07 00:00:00')
 Timestamp('2020-01-08 00:00:00')]
['SH600000', 'SH600004', 'SH600009', 'SH600010', 'SH600011']


In [10]:
# 获取指定股票指定日期指定字段数据
features_df = D.features(instruments=["SZ002236"],fields=["$close","$volume"], start_time="2020-01-01",end_time="2020-12-30", freq="day")

print(features_df.head())

# fields ？？？

                          $close     $volume
instrument datetime                         
SZ002236   2020-01-02  32.685493  70629488.0
           2020-01-03  32.853664  37219624.0
           2020-01-06  33.694496  48228080.0
           2020-01-07  33.220570  40416324.0
           2020-01-08  32.028118  55896308.0


In [11]:
# 自定义股票池
from qlib.data.filter import NameDFilter, ExpressionDFilter
# 静态Filter
nameDFilter = NameDFilter(name_rule_re="SZ[0-9!]")

# 动态Filter: 后复权价格大于1元
expressoionDFilter = ExpressionDFilter(rule_expression="$close>1")

# 获取满足价格的股票
instruments = D.instruments(market="csi300",filter_pipe=[nameDFilter, expressoionDFilter])
sock_list = D.list_instruments(instruments=instruments, start_time="2020-01-01", end_time="2020-12-30",as_list=True)

# 展示最后5个股票代码
print(sock_list[:5])


['SZ000001', 'SZ000002', 'SZ000063', 'SZ000066', 'SZ000069']


In [None]:
# 生成 Alpha158 特征（当期因子）和标签（下期收益）
from qlib.contrib.data.handler import Alpha158

# 设置日期、股票池等参数