In [119]:
# coding: utf-8
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import numpy as np
from plutus.research.backtest.backtest import BacktestCS
from plutus.utils.visualization.plot import PlotCS

np.set_printoptions(suppress=True)
pd.set_option("display.float_format", lambda x: "%.5f" % x)

In [120]:
def __warm_up(s: pd.Series, num):
    s[:num] = 0
    return s

In [121]:
# 读取数据
datapath = "../../../datahub/raw/cn/stock/md/all_1m.parquet"
table = pq.read_table(
    datapath, filters=[("trading_date", ">", pd.to_datetime("2022-03-01"))]
)
data_bfq = table.to_pandas()
data_bfq.sort_values(["code", "datetime"], inplace=True)
data_bfq.set_index(["datetime", "code"], inplace=True)
data_bfq

Unnamed: 0_level_0,Unnamed: 1_level_0,trading_date,turnover,trade_num,low,high,open,volume,close
datetime,code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-03-16 09:31:00,000001,2022-03-16,86586275.00000,3760.00000,13.70000,13.90000,13.86000,6255347.00000,13.73000
2022-03-16 09:32:00,000001,2022-03-16,31157300.00000,1487.00000,13.71000,13.78000,13.74000,2267000.00000,13.73000
2022-03-16 09:33:00,000001,2022-03-16,27613091.00000,1400.00000,13.68000,13.72000,13.72000,2015500.00000,13.69000
2022-03-16 09:34:00,000001,2022-03-16,18292642.00000,905.00000,13.69000,13.74000,13.69000,1333700.00000,13.73000
2022-03-16 09:35:00,000001,2022-03-16,23918339.00000,1014.00000,13.70000,13.74000,13.72000,1744620.00000,13.70000
...,...,...,...,...,...,...,...,...,...
2022-03-18 14:56:00,689009,2022-03-18,1005827.00000,35.00000,43.60000,43.70000,43.68000,23051.00000,43.63000
2022-03-18 14:57:00,689009,2022-03-18,218103.00000,18.00000,43.63000,43.72000,43.63000,4992.00000,43.72000
2022-03-18 14:58:00,689009,2022-03-18,0.00000,0.00000,43.72000,43.72000,43.72000,0.00000,43.72000
2022-03-18 14:59:00,689009,2022-03-18,0.00000,0.00000,43.72000,43.72000,43.72000,0.00000,43.72000


In [122]:
# 对数据进行基本的处理
open_ = data_bfq["open"].unstack()
close = data_bfq["close"].unstack()
high = data_bfq["high"].unstack()
low = data_bfq["low"].unstack()
vol = data_bfq["volume"].unstack()
amount = data_bfq["turnover"].unstack()

# 去除涨跌停,去除停牌股
# tradeable


# 获取基准
# benchmark
meta_data = pd.DataFrame()
meta_data["period"] = (
    data_bfq["close"].groupby(["code"]).apply(lambda x: x.sort_index().diff())
)
# merge_data["period"]=data_bfq["open"].groupby(['code']).apply(lambda x: x.sort_index().diff())
# merge_data["period"]=data_bfq["close"]-data_bfq["open"]

meta_data = meta_data[np.isfinite(meta_data).all(1)]
# merge_data.reset_index(inplace =True)
meta_data

Unnamed: 0_level_0,Unnamed: 1_level_0,period
datetime,code,Unnamed: 2_level_1
2022-03-16 09:32:00,000001,0.00000
2022-03-16 09:33:00,000001,-0.04000
2022-03-16 09:34:00,000001,0.04000
2022-03-16 09:35:00,000001,-0.03000
2022-03-16 09:36:00,000001,-0.02000
...,...,...
2022-03-18 14:56:00,689009,-0.06000
2022-03-18 14:57:00,689009,0.09000
2022-03-18 14:58:00,689009,0.00000
2022-03-18 14:59:00,689009,0.00000


In [None]:
# 把因子处理好的话，不需要对pnl进行调整
# merge_data['date']  = meta_data['datetime'].apply(lambda x : x.date())
# merge_data['period'] = meta_data.groupby('date')['period'].apply(lambda x :__warm_up(x,num=2))
# merge_data.set_index(['datetime','code'],inplace  =True)

In [None]:
# 定义一个因子
def factor_simple():
    factor = -1 * data_bfq["close"].groupby(["code"]).apply(lambda x: x.pct_change(5))
    return factor.unstack()


test_factor = factor_simple()

In [None]:
# 计算fator_rank
backtest_cs = BacktestCS()
clean_factor_data = backtest_cs.cal_factor_rank(meta_data, test_factor)
clean_factor_data

In [None]:
# 计算fator_quantile
clean_factor_data = backtest_cs.cal_factor_quantile(clean_factor_data, group_num=20)
clean_factor_data

In [None]:
# 选择自己需要的hold_portfolio
long_portfolio_data, short_portfolio_data = backtest_cs.cal_hold_portfolio(
    clean_factor_data, hold_num=1
)
long_portfolio_data, short_portfolio_data

In [None]:
# 计算标的权重,可以自己拟定
portfolio_data = backtest_cs.cal_portfolio_weight(long_portfolio_data)
portfolio_data

In [None]:
# 计算回测指标
ret_df, sharpe_ratio, annual_return, max_down = backtest_cs.describer_01(
    long_portfolio_data, short_portfolio_data
)

In [None]:
# 画出分组累计收益
PlotCS.plot_group_cumsum_pnl(
    clean_factor_data, groupby_col=["datetime", "factor_quantile"]
)

In [None]:
# 画出long-short对冲收益
ret_df.index = range(len(ret_df))
ret_df.plot(figsize=(16, 9), title="test")