alpha-mind的data文件夹提供了对于因子数据进行排序和求分位数的工具函数

### 因子排序： *rank*
- 从小到大排序，返回序列值。
- 可以进行整体排序，也可以分行业(分组)排序。

In [13]:
import numpy as np
import pandas as pd
from alphamind.data.rank import rank

# 假设有10只股票，每只股票有2个因子，构成一个矩阵
factors = pd.DataFrame(np.random.rand(10, 2))
factors.columns = ['factor_1', 'factor_2']
factors['rank_1'] = rank(factors['factor_1'])
factors['rank_2'] = rank(factors['factor_2'])

factors


  x = x.reshape((-1, 1))


Unnamed: 0,factor_1,factor_2,rank_1,rank_2
0,0.940105,0.328598,9.0,4.0
1,0.473932,0.334819,7.0,5.0
2,0.337995,0.335863,4.0,6.0
3,0.371221,0.286139,5.0,3.0
4,0.462262,0.182403,6.0,0.0
5,0.126732,0.843093,2.0,8.0
6,0.762878,0.472779,8.0,7.0
7,0.075146,0.924889,0.0,9.0
8,0.238197,0.206311,3.0,1.0
9,0.112166,0.240062,1.0,2.0


In [15]:
# 假设有10只股票，每只股票有1个因子
factors = pd.DataFrame(np.random.rand(10, 1))
factors.columns = ['factor_1']

# 假设这10只股票分为两个行业,前5个和后5个分属不同类别
industry = np.concatenate([np.array([1.0]*5), np.array([2.0]*5)])

factors['rank'] = rank(factors['factor_1'], groups=industry)
factors

  x = x.reshape((-1, 1))


Unnamed: 0,factor_1,rank
0,0.765457,3
1,0.162792,0
2,0.431309,1
3,0.633497,2
4,0.943491,4
5,0.477439,0
6,0.742096,2
7,0.561797,1
8,0.974109,4
9,0.921705,3


### 因子分位数: *quantile*
- 根据给定组数*(n_bins)*，按从小达到的顺序进行分组，返回每个因子属于的组别。

In [22]:
from alphamind.data.quantile import quantile

factors['quantile'] = quantile(factors['factor_1'], n_bins=5)
factors


Unnamed: 0,factor_1,rank,quantile
0,0.765457,3,3
1,0.162792,0,0
2,0.431309,1,0
3,0.633497,2,2
4,0.943491,4,4
5,0.477439,0,1
6,0.742096,2,2
7,0.561797,1,1
8,0.974109,4,4
9,0.921705,3,3
