In [77]:
import numpy as np
import pandas as pd

In [78]:
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
                    index=pd.Index(['Ohio', 'Colorado'], name='state'),
                    columns=pd.Index(['one', 'two', 'three'],
                                     name='number'))
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [79]:
result = data.stack()  # 列索引(默认level=-1,最内层)转换为行索引
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [80]:
result.unstack()  # 行索引(默认为level=-1,最内层)引转换为列索引

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [81]:
result.unstack(level=0)  # 实现了行列转换(表格的转置)

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [82]:
result.unstack(level='state')  # 也可以按照级别名称进行拆堆(等价于result.unstack(0))

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [83]:
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
data1 = pd.concat([s1, s2], keys=['one', 'two'])
data1

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [84]:
'''
fill_value : replace NaN with this value if the unstack produces
    missing values
'''
data1.unstack(fill_value='空')  # 默认fill_value=None

Unnamed: 0,a,b,c,d,e
one,0,1,2,3,空
two,空,空,4,5,6


In [85]:
data1.unstack().stack()  # 默认情况下,堆叠会过滤出缺失值,因此堆叠拆堆是可逆的

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

In [86]:
data1.unstack().stack(dropna=False)  # dropna:bool，默认为True,False则不删除缺失值行

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [87]:
columns = pd.MultiIndex.from_tuples([
    ('A', 'cat', 'long'), ('B', 'cat', 'long'),
    ('A', 'dog', 'short'), ('B', 'dog', 'short')],
    names=['exp', 'animal', 'hair_length'])
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df

exp,A,B,A,B
animal,cat,cat,dog,dog
hair_length,long,long,short,short
0,1.861684,1.563304,0.748866,0.040511
1,2.344696,1.093645,0.820117,-0.157101
2,0.639378,2.310787,2.08576,0.307314
3,-0.339122,-1.284282,0.577264,0.163245


In [88]:
# 等价于:df.stack(level=[1, 2])
df.stack(level=['animal', 'hair_length'])  # 默认为level=-1(最内层)

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,1.861684,1.563304
0,dog,short,0.748866,0.040511
1,cat,long,2.344696,1.093645
1,dog,short,0.820117,-0.157101
2,cat,long,0.639378,2.310787
2,dog,short,2.08576,0.307314
3,cat,long,-0.339122,-1.284282
3,dog,short,0.577264,0.163245
