alpha-mind的data文件夹提供了对于因子数据进行排序和求分位数的工具函数

### 因子排序： *rank*
- 从小到大排序，返回序列值。
- 可以进行整体排序，也可以分行业(分组)排序。

In [1]:
import numpy as np
import pandas as pd
from alphamind.data.rank import rank

# 假设有10只股票，每只股票有2个因子，构成一个矩阵
factors = pd.DataFrame(np.random.rand(10, 2))
factors.columns = ['factor_1', 'factor_2']
factors['rank_1'] = rank(factors['factor_1'].values)
factors['rank_2'] = rank(factors['factor_2'].values)

factors


Unnamed: 0,factor_1,factor_2,rank_1,rank_2
0,0.161516,0.400066,0.0,5.0
1,0.713214,0.742485,9.0,8.0
2,0.61347,0.492425,7.0,6.0
3,0.440124,0.701836,4.0,7.0
4,0.301544,0.180132,2.0,2.0
5,0.505832,0.069305,5.0,1.0
6,0.513519,0.354891,6.0,4.0
7,0.206737,0.061027,1.0,0.0
8,0.437573,0.916369,3.0,9.0
9,0.670278,0.299662,8.0,3.0


In [2]:
# 假设有10只股票，每只股票有1个因子
factors = pd.DataFrame(np.random.rand(10, 1))
factors.columns = ['factor_1']

# 假设这10只股票分为两个行业,前5个和后5个分属不同类别
industry = np.concatenate([np.array([1.0]*5), np.array([2.0]*5)])

factors['rank'] = rank(factors['factor_1'].values, groups=industry)
factors

Unnamed: 0,factor_1,rank
0,0.990521,4
1,0.366384,1
2,0.098782,0
3,0.644139,2
4,0.790434,3
5,0.775871,0
6,0.871524,2
7,0.910101,3
8,0.863434,1
9,0.923118,4


### 因子分位数: *quantile*
- 根据给定组数*(n_bins)*，按从小达到的顺序进行分组，返回每个因子属于的组别。

In [3]:
from alphamind.data.quantile import quantile

factors['quantile'] = quantile(factors['factor_1'].values, n_bins=5)
factors

Unnamed: 0,factor_1,rank,quantile
0,0.990521,4,4
1,0.366384,1,0
2,0.098782,0,0
3,0.644139,2,1
4,0.790434,3,2
5,0.775871,0,1
6,0.871524,2,3
7,0.910101,3,3
8,0.863434,1,2
9,0.923118,4,4
