In [1]:
import numpy as np
import pandas as pd

In [2]:
obj1 = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj1

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [3]:
new_index = ['a', 'f', 'b', 'd', 'e']
obj2 = obj1.reindex(new_index)  # obj1不变
'''
1. 原有行索引a, b, d不变(即obj1['a']=obj2['a'])
2. 新增行索引f, e默认赋值为NaN(即obj2['a']=NaN)
3. 原有行索引c被删除
'''
# 应用:改变Series index顺序
obj2

a   -5.3
f    NaN
b    7.2
d    4.5
e    NaN
dtype: float64

In [4]:
frame1 = pd.DataFrame(np.arange(9).reshape((3, 3)),
                      index=['a', 'c', 'd'],
                      columns=['Ohio', 'Texas', 'California'])
frame1

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [5]:
# 应用:改变DataFrame index,columns顺序
frame1.reindex(['a', 'b', 'd', 'c'])  # frame1不变

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
d,6.0,7.0,8.0
c,3.0,4.0,5.0


In [6]:
'''
method : {None, ‘backfill’/’bfill’, ‘pad’/’ffill’, ‘nearest’}
    Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to 
    DataFrames/Series with a monotonically increasing/decreasing index.
        None (default): don’t fill gaps
        pad / ffill: propagate last valid observation forward to next valid
        backfill / bfill: use next valid observation to fill gap
        nearest: use nearest valid observations to fill gap
        
fill_value : scalar, default np.NaN
    Value to use for missing values. Defaults to NaN, but can be any “compatible” value
'''
states = ['Utah', 'California', 'Texas']
frame1.reindex(columns=states, fill_value=0)

Unnamed: 0,Utah,California,Texas
a,0,2,1
c,0,5,4
d,0,8,7


In [7]:
date_index = pd.date_range('1/1/2010', periods=6, freq='D')
df = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]},
                  index=date_index)
df

Unnamed: 0,prices
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,88.0


In [8]:
date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
df.reindex(date_index2)

Unnamed: 0,prices
2009-12-29,
2009-12-30,
2009-12-31,
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,88.0
2010-01-07,


In [9]:
midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
                             ['speed', 'weight', 'length']],
                     codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
                            [0, 1, 2, 0, 1, 2, 0, 1, 2]])
complex_s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
                      index=midx)
complex_s

lama    speed      45.0
        weight    200.0
        length      1.2
cow     speed      30.0
        weight    250.0
        length      1.5
falcon  speed     320.0
        weight      1.0
        length      0.3
dtype: float64

In [10]:
# 当索引为分层索引时
midx1 = pd.MultiIndex(levels=[['lama', 'falcon', 'cow', 'new_lama'],
                              ['speed', 'length', 'weight', 'age']],
                      codes=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
                             [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]])
complex_s.reindex(midx1)

lama      speed      45.0
          length      1.2
          weight    200.0
          age         NaN
falcon    speed     320.0
          length      0.3
          weight      1.0
          age         NaN
cow       speed      30.0
          length      1.5
          weight    250.0
          age         NaN
new_lama  speed       NaN
          length      NaN
          weight      NaN
          age         NaN
dtype: float64