# Step By Step 07 - 风险因子模型
-------------------

> 在本篇中我们使用数据供应商提供的因子模型，进行alpha建模

## 一、数据展示

In [44]:
import numpy as np
from alphamind.api import *
from PyFin.api import *
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder

engine = SqlEngine(os.environ['DB_URI'])
engine

<alphamind.data.engines.sqlengine.mysql.SqlEngine at 0x7fce1f1eff10>

### 1.1 因子暴露

这里我们展示某一天的因子截面值，这里我们只使用**风格因子**以及**行业因子**：

In [2]:
print(f"# 风格因子: {risk_styles}")
print(f"# 行业因子: {industry_styles}")

# 风格因子: ['BETA', 'MOMENTUM', 'SIZE', 'EARNYILD', 'RESVOL', 'GROWTH', 'BTOP', 'LEVERAGE', 'LIQUIDTY', 'SIZENL']
# 行业因子: ['Bank', 'RealEstate', 'Health', 'Transportation', 'Mining', 'NonFerMetal', 'HouseApp', 'LeiService', 'MachiEquip', 'BuildDeco', 'CommeTrade', 'CONMAT', 'Auto', 'Textile', 'FoodBever', 'Electronics', 'Computer', 'LightIndus', 'Utilities', 'Telecom', 'AgriForest', 'CHEM', 'Media', 'IronSteel', 'NonBankFinan', 'ELECEQP', 'AERODEF', 'Conglomerates']


In [3]:
ref_date = "2020-07-30"
universe = Universe("HS300")

factors = all_styles = risk_styles + industry_styles

codes = engine.fetch_codes(ref_date, universe)
factor_data = engine.fetch_factor(ref_date, factors, codes)
print(f"因子暴露矩阵: {factor_data.shape}")

因子暴露矩阵: (300, 41)


In [4]:
factor_data.head()

Unnamed: 0,AERODEF,AgriForest,Auto,BETA,BTOP,Bank,BuildDeco,CHEM,CONMAT,CommeTrade,...,RealEstate,SIZE,SIZENL,Telecom,Textile,Transportation,Utilities,code,chgPct,secShortName
0,0,0,0,-0.991,3.521,1,0,0,0,0,...,0,1.646,-0.291,0,0,0,0,2010000001,-1.0466,浦发银行
1,0,0,0,-0.102,0.263,0,0,0,0,0,...,0,-0.465,0.415,0,0,1,0,2010000005,-2.0237,白云机场
2,0,0,0,-0.628,-0.721,0,0,0,0,0,...,0,0.892,-0.396,0,0,1,0,2010000010,-1.7115,上海机场
3,0,0,0,-0.732,1.504,0,0,0,0,0,...,0,0.08,0.063,0,0,0,0,2010000011,-0.8475,包钢股份
4,0,0,0,-0.963,1.569,0,0,0,0,0,...,0,0.373,-0.136,0,0,0,1,2010000012,0.0,华能国际


### 1.2 因子风险模型

类似的，我们也可以获取因子的风险矩阵：

In [5]:
factor_model, _, _ = engine.fetch_risk_model(ref_date, codes, risk_model="short", model_type="factor")

In [6]:
factor_cov = factor_model.factor_cov
factor_loaing = factor_model.risk_exp
factor_idsync = factor_model.idsync

#### 1.2.1 因子协方差

In [7]:
factor_cov.iloc[:10, :10]

Unnamed: 0_level_0,AERODEF,AgriForest,Auto,BETA,BTOP,Bank,BuildDeco,CHEM,CONMAT,COUNTRY
Factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AERODEF,0.062969,0.004594,0.001708,0.001801,0.001031,-0.006454,0.001015,0.001987,0.001412,0.012135
AgriForest,0.004594,0.058825,0.001609,-0.001543,0.000393,-0.009808,0.003832,0.002811,0.004971,-0.005977
Auto,0.001708,0.001609,0.011279,0.000355,2.3e-05,-0.003275,1.6e-05,0.001883,0.00047,0.001611
BETA,0.001801,-0.001543,0.000355,0.003289,0.000323,-6.9e-05,-0.000691,-0.000425,-0.00086,0.012874
BTOP,0.001031,0.000393,2.3e-05,0.000323,0.0007,-0.000368,-2e-05,-0.000229,-0.000435,0.001591
Bank,-0.006454,-0.009808,-0.003275,-6.9e-05,-0.000368,0.017824,-0.003455,-0.004883,-0.006962,-0.007152
BuildDeco,0.001015,0.003832,1.6e-05,-0.000691,-2e-05,-0.003455,0.014501,0.001957,0.012481,-0.003628
CHEM,0.001987,0.002811,0.001883,-0.000425,-0.000229,-0.004883,0.001957,0.007744,0.005092,-0.00032
CONMAT,0.001412,0.004971,0.00047,-0.00086,-0.000435,-0.006962,0.012481,0.005092,0.031932,-0.002775
COUNTRY,0.012135,-0.005977,0.001611,0.012874,0.001591,-0.007152,-0.003628,-0.00032,-0.002775,0.090338


#### 1.2.2 因子载荷

这个值实际上就是之前1.1的**因子暴露**

In [35]:
factor_loaing.iloc[:, :10].head()

Unnamed: 0_level_0,AERODEF,AgriForest,Auto,BETA,BTOP,Bank,BuildDeco,CHEM,CONMAT,COUNTRY
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010000001,0,0,0,-0.991,3.521,1,0,0,0,1
2010000005,0,0,0,-0.102,0.263,0,0,0,0,1
2010000010,0,0,0,-0.628,-0.721,0,0,0,0,1
2010000011,0,0,0,-0.732,1.504,0,0,0,0,1
2010000012,0,0,0,-0.963,1.569,0,0,0,0,1


#### 1.2.3 因子特质风险

In [9]:
factor_idsync.head()

code
2010000001    0.025719
2010000005    0.166317
2010000010    0.066714
2010000011    0.034306
2010000012    0.052153
Name: srisk, dtype: float64

## 二、结合因子模型的alpha信号构造

首先我们给予 **Finance-Python** 提供的功能来构造一个目标因子：

In [10]:
alpha_formula = CSRank(LAST("EMA5D") + LAST("EMV6D"))
alpha_raw = engine.fetch_factor(ref_date, factors={"alpha": alpha_formula}, codes=codes).dropna()
alpha_raw["alpha"] = standardize(alpha_raw[["alpha"]].values)
alpha_raw.head()

Unnamed: 0,alpha,code,chgPct,secShortName
0,-0.586055,2010000001,-1.0466,浦发银行
1,-0.214693,2010000005,-2.0237,白云机场
2,1.317173,2010000010,-1.7115,上海机场
3,-1.711744,2010000011,-0.8475,包钢股份
4,-1.340383,2010000012,0.0,华能国际


我们可以使用因子暴露对原始因子进行 **风格剥离**（中性化）,这里我们只选用 **市值以及Beta** 风格因子进行中性化：

In [11]:
specific_factor_loading = factor_loaing.loc[alpha_raw.code, ["BETA", "SIZE", "COUNTRY"]]  # 加入COUNTRY相当于加入截距
alpha_raw["neu."] = standardize(neutralize(specific_factor_loading.values, alpha_raw[["alpha"]].values))

我们可以验证这两个因子确实被从因子中剥离了

In [12]:
print(f"BETA 中性: 前: {alpha_raw['alpha'].values @ specific_factor_loading['BETA'].values:.4f} 后： {alpha_raw['neu.'].values @ specific_factor_loading['BETA'].values:.4f}")
print(f"SIZE 中性: 前: {alpha_raw['alpha'].values @ specific_factor_loading['SIZE'].values:.4f} 后： {alpha_raw['neu.'].values @ specific_factor_loading['SIZE'].values:.4f}")

BETA 中性: 前: 68.9051 后： -0.0000
SIZE 中性: 前: 57.8010 后： -0.0000


## 三、组合构建

在本节中，我们使用上一节计算算得到的因子作为我们的单期预测，进行组合构建。

在这里我们使用经典的**单期均值方差模型**，但是会引入投资交易中经常需要考虑的各种限制：

1. 杠杆率的要求（不得过低或者过高）；
2. 单票集中度的要求（单票不可持仓过大）；
3. 行业板块集中度的要求（不可过高）；
4. 风格偏移的限制（对某些风格的偏好不可过高或过低）；
5. 在以上的限制下，最大化风险调整后收益。

在这里我们将展示一个例子，涵盖以上各个点；

In [83]:
er = alpha_raw["neu."].values
benchmark = engine.fetch_benchmark(ref_date, 300).set_index("code").loc[alpha_raw.code]["weight"].values
benchmark = benchmark / benchmark.sum()
constraint_risk = ['SIZE', 'SIZENL', 'BETA']
total_risk_names = constraint_risk + industry_styles + ['total']

我们将做控制以下控制：

1. 所以个股相对于基准，偏离不超过2%；
2. 所有行业板块，相对于基准偏离不超过3%；
3. 三个风格因子的风格偏离，不高于50%；
4. 杠杆率不低于80%，同时不高于120%

这里我们选用的基准为沪深300指数权重（做归一化）; 用户也可以直接使用0作为基准，这样的化，就是一个long-short组合。

下面进入我们整个环节中最复杂的一步，构建风险控制矩阵以及限制边界：

In [96]:
risk_con = factor_loaing.loc[alpha_raw.code, industry_styles + ["SIZE", "SIZENL", "BETA", "COUNTRY"]]
risk_con_matrix = risk_con.values
benchmark_risk = risk_con_matrix.T @ benchmark
clbound = np.array([-0.03] * len(industry_styles) + [-0.50, -0.50, -0.50] + [-0.20]) + benchmark_risk
cubound = np.array([0.03] * len(industry_styles) + [0.50, 0.50, 0.50] + [0.20]) + benchmark_risk

# 个股限制
lbound = np.array([-0.02] * len(alpha_raw) + benchmark)
ubound = np.array([0.02] * len(alpha_raw) + benchmark)

risk_model = factor_model.get_risk_profile(alpha_raw.code.tolist())

In [97]:
# 我们这里使用一个带限制条件的优化器来解决这个组合优化问题：

status, value, weights = mean_variance_builder(er, risk_model, benchmark, lbound, ubound, risk_con_matrix, (clbound, cubound))

让我们逐个检验限制条件是否被满足：

1. 个股限制

In [98]:
diff = weights - benchmark
print(f"个股偏移 MIN: {diff.min():.4f} - MAX: {diff.max():.4f}") 

个股偏移 MIN: -0.0200 - MAX: 0.0200


2. 行业偏离

In [99]:
indutry_diff = risk_con[industry_styles].values.T @ diff
print(f"行业偏移 MIN: {indutry_diff.min():.4f} - MAX: {indutry_diff.max():.4f}") 

行业偏移 MIN: -0.0300 - MAX: 0.0300


3. 风格偏移

In [100]:
style_diff = risk_con[["SIZE", "SIZENL", "BETA", "COUNTRY"]].values.T @ diff
print(f"风格偏移 MIN: {style_diff.min():.4f} - MAX: {style_diff.max():.4f}") 

风格偏移 MIN: -0.1853 - MAX: 0.2934


4. 杠杆率

In [101]:
print(f"持仓 - {weights.sum():.4f}") 

持仓 - 0.9900


5. 效用

> **注意**: 这里为了节省篇幅，我们简化了效用计算，实际的效用还包括风险的调整。但是简单的直接考虑期望的变化，也能看到优化器的作用。

In [103]:
print(f"效用 优化前: {benchmark @ er:.4f} - 优化后: {weights @ er:.4f}")

效用 优化前: 0.2485 - 优化后: 3.8402


## 四、后记

这里我们非常细致的拆解了，使用风险因子模型，进行因子构造以及组合优化的过程。

实际上，alpha-mind提供了直接使用上面的算法进行回测的功能，用户没有必要自己手写上面每一步。

主要目的是通过这个例子，让用户了解alpha-mind因子建模的原理，鼓励用户自己基于alpha-mind的基础组件进行定制开发。本篇也无法涵盖alpha-mind在因子建模和组合优化上的所有功能。例如：对交易成本的控制等，留待以后的篇章向大家介绍。