In [1]:
import numpy as np
import pandas as pd

## 5.2.1 Reindexing（重新索引）

In [2]:
ser = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
ser

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

##### 调用reindex()根据新传入的索引进行重新排序，如果没有对应index的话会引入缺失数据,返回新的数组,并不是在原始的对象上修改

In [3]:
ser2 = ser.reindex(['a', 'b', 'c', 'd', 'e'])
ser2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [4]:
# 原始数组并不改变
ser

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

### 使用reindex()+参数 插入数据 

In [5]:
ser3 = pd.Series(['bule', 'purple', 'yellow'], index=[1, 3, 5])
ser3

1      bule
3    purple
5    yellow
dtype: object

In [6]:
# 对不存在的索引指定填充值,若存在则不改变
ser3.reindex(range(6), fill_value='fff') 

0       fff
1      bule
2       fff
3    purple
4       fff
5    yellow
dtype: object

In [7]:
# 使用指定的函数填充，ffill是填充该列上一行的数值，如果是第一行那就填充NaN
ser3.reindex(range(6), method='ffill')

0       NaN
1      bule
2      bule
3    purple
4    purple
5    yellow
dtype: object

In [8]:
ser3.reindex(range(6), method='pad')

0       NaN
1      bule
2      bule
3    purple
4    purple
5    yellow
dtype: object

In [9]:
ser3.reindex(range(6), method='bfill')

0      bule
1      bule
2    purple
3    purple
4    yellow
5    yellow
dtype: object

### 对于DataFrame，reindex能同时更改row index,或column index

In [10]:
df = pd.DataFrame(np.arange(9).reshape(3, 3),
                     index=['a', 'c', 'd'],
                     columns=['北京', '上海', '深圳'])
df

Unnamed: 0,北京,上海,深圳
a,0,1,2
c,3,4,5
d,6,7,8


In [11]:
# 默认是行索引
df2 = df.reindex(index=['a', 'b', 'c', 'd'], method='ffill')
df2

Unnamed: 0,北京,上海,深圳
a,0,1,2
b,0,1,2
c,3,4,5
d,6,7,8


更改columns index:

In [12]:
# 按列更新索引
columnsSort = ['上海', '北京', '广州', '深圳']
df3 = df.reindex(columns=columnsSort)
df3

Unnamed: 0,上海,北京,广州,深圳
a,1,0,,2
c,4,3,,5
d,7,6,,8


In [13]:
# 也可同时对行和列进行更新索引,但是插值只能按行
df4 = df.reindex(index=['a', 'b', 'c', 'd'], columns=columnsSort)
df4

Unnamed: 0,上海,北京,广州,深圳
a,1.0,0.0,,2.0
b,,,,
c,4.0,3.0,,5.0
d,7.0,6.0,,8.0


还可以使用loc, ix实现更简洁的重新索引的功能

In [14]:
df5 = df.loc[['a', 'b', 'c', 'd'], columnsSort]
df5

Unnamed: 0,上海,北京,广州,深圳
a,1.0,0.0,,2.0
b,,,,
c,4.0,3.0,,5.0
d,7.0,6.0,,8.0


In [15]:
del ser, ser2, ser3, df, df2, df3, df4, df5