### 按索引排序
sort_index方法

In [15]:
import pandas as pd
import numpy as np

#### Series

In [16]:
obj = pd.Series(range(4), index=['d', 'a', 'c', 'b'])
obj

d    0
a    1
c    2
b    3
dtype: int64

In [17]:
obj.sort_index()

a    1
b    3
c    2
d    0
dtype: int64

#### DataFrame

In [18]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
                     index=['three', 'one'],
                     columns=['d', 'a', 'b', 'c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


默认按照行索引排序(axis=0)

In [19]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


按照列索引排序(axis=1)

In [20]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


设置降序排序

In [21]:
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


### 按值排序

#### 根据一个列的值排序

In [22]:
df2 = pd.DataFrame({'a': [20, 3, 3], 'b': [1, -6, 18]})
df2

Unnamed: 0,a,b
0,20,1
1,3,-6
2,3,18


In [23]:
df2.sort_values(by='b')

Unnamed: 0,a,b
1,3,-6
0,20,1
2,3,18


#### 对多列进行排序

In [24]:
df2.sort_values(by=['a', 'b'])

Unnamed: 0,a,b
1,3,-6
2,3,18
0,20,1


In [25]:
df2.sort_values(by=['b', 'a'])

Unnamed: 0,a,b
1,3,-6
0,20,1
2,3,18


### 排名

DataFrame.rank(axis=0, method='average', numeric_only=NoDefault.no_default, na_option='keep', ascending=True, pct=False)[source]
Compute numerical data ranks (1 through n) along axis.

By default, equal values are assigned a rank that is the average of the ranks of those values.

Parameters
axis{0 or ‘index’, 1 or ‘columns’}, default 0
Index to direct ranking.

method: {‘average’, ‘min’, ‘max’, ‘first’, ‘dense’}, default ‘average’
How to rank the group of records that have the same value (i.e. ties):

average: average rank of the group

min: lowest rank in the group

max: highest rank in the group

first: ranks assigned in order they appear in the array

dense: like ‘min’, but rank always increases by 1 between groups.

numeric_only: bool, optional
For DataFrame objects, rank only numeric columns if set to True.

na_option{‘keep’, ‘top’, ‘bottom’}, default ‘keep’
How to rank NaN values:

keep: assign NaN rank to NaN values

top: assign lowest rank to NaN values

bottom: assign highest rank to NaN values

ascending: bool, default True
Whether or not the elements should be ranked in ascending order.

pctbool: default False
Whether or not to display the returned rankings in percentile form.

Returns
same type as caller
Return a Series or DataFrame with data ranks as values.

#### 对行排名

In [26]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [27]:
# 默认为平均排名
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [28]:
# 条目0和条目2的名词依次被设置为6和7，故平均排名为6.5
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [29]:
# 降序排名，对整个组使用最大排名
obj.rank(ascending=False, method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

In [30]:
# 降序排名，对整个组使用最小排名
obj.rank(ascending=False, method='min')

0    1.0
1    7.0
2    1.0
3    3.0
4    5.0
5    6.0
6    3.0
dtype: float64

#### 对列排名

In [35]:
frame = pd.DataFrame(
    {
        'a': [0, 1, 0, 1],
        'b': [4.3, 7, -3, 2],
        'c': [-2, 5, 8, -2.5]
    }
)

frame

Unnamed: 0,a,b,c
0,0,4.3,-2.0
1,1,7.0,5.0
2,0,-3.0,8.0
3,1,2.0,-2.5


In [36]:
frame.rank(axis='columns')

Unnamed: 0,a,b,c
0,2.0,3.0,1.0
1,1.0,3.0,2.0
2,2.0,1.0,3.0
3,2.0,3.0,1.0


In [37]:
frame.rank(axis=1)

Unnamed: 0,a,b,c
0,2.0,3.0,1.0
1,1.0,3.0,2.0
2,2.0,1.0,3.0
3,2.0,3.0,1.0
