alpha-mind的data文件夹提供了对于因子数据进行排序和求分位数的工具函数

### 因子排序： *rank*
- 从小到大排序，返回序列值。
- 可以进行整体排序，也可以分行业(分组)排序。

In [1]:
import numpy as np
import pandas as pd
from alphamind.data.rank import rank

# 假设有10只股票，每只股票有2个因子，构成一个矩阵
factors = pd.DataFrame(np.random.rand(10, 2))
factors.columns = ['factor_1', 'factor_2']
factors['rank_1'] = rank(factors['factor_1'].values)
factors['rank_2'] = rank(factors['factor_2'].values)

factors


Unnamed: 0,factor_1,factor_2,rank_1,rank_2
0,0.769525,0.514083,8.0,5.0
1,0.826507,0.19858,9.0,2.0
2,0.760019,0.106153,7.0,0.0
3,0.408003,0.676058,2.0,8.0
4,0.408346,0.526524,3.0,6.0
5,0.53646,0.526606,5.0,7.0
6,0.524268,0.248475,4.0,3.0
7,0.172635,0.443078,0.0,4.0
8,0.609834,0.115356,6.0,1.0
9,0.397096,0.975337,1.0,9.0


In [2]:
# 假设有10只股票，每只股票有1个因子
factors = pd.DataFrame(np.random.rand(10, 1))
factors.columns = ['factor_1']

# 假设这10只股票分为两个行业,前5个和后5个分属不同类别
industry = np.concatenate([np.array([1.0]*5), np.array([2.0]*5)])

factors['rank'] = rank(factors['factor_1'].values, groups=industry)
factors

Unnamed: 0,factor_1,rank
0,0.713036,1
1,0.923663,2
2,0.9668,4
3,0.406231,0
4,0.963093,3
5,0.690472,2
6,0.879955,4
7,0.847376,3
8,0.446222,1
9,0.151234,0


### 因子分位数: *quantile*
- 根据给定组数*(n_bins)*，按从小达到的顺序进行分组，返回每个因子属于的组别。

In [3]:
from alphamind.data.quantile import quantile

factors['quantile'] = quantile(factors['factor_1'].values, n_bins=5)
factors

Unnamed: 0,factor_1,rank,quantile
0,0.713036,1,2
1,0.923663,2,3
2,0.9668,4,4
3,0.406231,0,0
4,0.963093,3,4
5,0.690472,2,1
6,0.879955,4,3
7,0.847376,3,2
8,0.446222,1,1
9,0.151234,0,0
