In [1]:
import numpy as np
import  pandas as pd

In [14]:
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
                    index=pd.Index(['Ohio', 'Colorado'], name='state'),
                    columns=pd.Index(['one', 'two', 'three'],
                    name='number'))

In [15]:
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [16]:
result = data.stack() # 列索引(默认level=-1,最内层)转换为行索引

In [17]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [18]:
result.unstack() # 行索引(默认为level=-1,最内层)引转换为列索引

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [19]:
result.unstack(level=0) # 实现了行列转换(表格的转置)

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [20]:
result.unstack(level='state') # 也可以按照级别名称进行拆堆(等价于result.unstack(0))

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [21]:
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
data1 = pd.concat([s1, s2], keys=['one', 'two'])
data1

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [22]:
'''
fill_value : replace NaN with this value if the unstack produces
    missing values
'''
data1.unstack(fill_value='空') # 默认fill_value=None

Unnamed: 0,a,b,c,d,e
one,0,1,2,3,空
two,空,空,4,5,6


In [23]:
data1.unstack().stack() # 默认情况下,堆叠会过滤出缺失值,因此堆叠拆堆是可逆的

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

In [24]:
data1.unstack().stack(dropna=False) # dropna:bool，默认为True,False则不删除缺失值行

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [25]:
columns = pd.MultiIndex.from_tuples([
     ('A', 'cat', 'long'), ('B', 'cat', 'long'),
     ('A', 'dog', 'short'), ('B', 'dog', 'short')],
     names=['exp', 'animal', 'hair_length'])
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df

exp,A,B,A,B
animal,cat,cat,dog,dog
hair_length,long,long,short,short
0,-1.347954,-0.693583,0.397692,-0.072737
1,-1.108119,-1.926243,1.949626,2.456465
2,-2.426339,0.046738,-0.067806,-0.501661
3,0.395709,-1.074723,0.4766,1.032024


In [26]:
# 等价于:df.stack(level=[1, 2])
df.stack(level=['animal', 'hair_length']) # 默认为level=-1(最内层)

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,-1.347954,-0.693583
0,dog,short,0.397692,-0.072737
1,cat,long,-1.108119,-1.926243
1,dog,short,1.949626,2.456465
2,cat,long,-2.426339,0.046738
2,dog,short,-0.067806,-0.501661
3,cat,long,0.395709,-1.074723
3,dog,short,0.4766,1.032024
