In [1]:
# 导入包
import numpy as np
import pandas as pd

## 一、重塑层次化索引

In [4]:
# 创建实验数据
data = pd.DataFrame(
    np.arange(6).reshape(2, 3),
    index=pd.Index(["小红", "小蓝"], name="学生"),
    columns=pd.Index(["语文", "数学", "英语"], name="科目"),
)
data

科目,语文,数学,英语
学生,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
小红,0,1,2
小蓝,3,4,5


### 1.1 stack函数：DataFrame --> Series(层级索引)

In [12]:
# DataFrame转Series(层级索引)
stackeData = data.stack()
stackeData

学生  科目
小红  语文    0
    数学    1
    英语    2
小蓝  语文    3
    数学    4
    英语    5
dtype: int64

### 1.2 unstack函数：Series(层级索引) --> DataFrame

In [13]:
# Series(层级索引)转DataFrame
# 默认操作内层索引
stackeData.unstack()

科目,语文,数学,英语
学生,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
小红,0,1,2
小蓝,3,4,5


In [14]:
# Series(层级索引)转DataFrame,
# 指定操作外层索引，用层级编号
stackeData.unstack(0)

学生,小红,小蓝
科目,Unnamed: 1_level_1,Unnamed: 2_level_1
语文,0,3
数学,1,4
英语,2,5


In [15]:
# Series(层级索引)转DataFrame
# 指定操作外层索引，用层级名
stackeData.unstack("学生")

学生,小红,小蓝
科目,Unnamed: 1_level_1,Unnamed: 2_level_1
语文,0,3
数学,1,4
英语,2,5


In [16]:
# Series(层级索引)转DataFrame
# 指定操作外层索引，用层级名
stackeData.unstack("科目")

科目,语文,数学,英语
学生,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
小红,0,1,2
小蓝,3,4,5


### 1.3 不规则层级结构的Series对象

In [19]:
# 创建实验数据
s1 = pd.Series(np.arange(5), index=list("abcde"))
s1

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [20]:
# 创建实验数据
s2 = pd.Series(np.arange(3), index=list("bde"))
s2

b    0
d    1
e    2
dtype: int64

In [22]:
# 创建实验数据
sdata = pd.concat([s1, s2], keys=["series1", "series2"])
sdata

series1  a    0
         b    1
         c    2
         d    3
         e    4
series2  b    0
         d    1
         e    2
dtype: int64

In [23]:
# Series(层级索引)转DataFrame, 默认补全nan
sdata.unstack()

Unnamed: 0,a,b,c,d,e
series1,0.0,1.0,2.0,3.0,4.0
series2,,0.0,,1.0,2.0


In [24]:
# DataFrame转Series(层级索引)，默认删除nan
sdata.unstack().stack()

series1  a    0.0
         b    1.0
         c    2.0
         d    3.0
         e    4.0
series2  b    0.0
         d    1.0
         e    2.0
dtype: float64

In [26]:
# DataFrame转Series(层级索引)，设定保留nan
sdata.unstack().stack(dropna=False)

series1  a    0.0
         b    1.0
         c    2.0
         d    3.0
         e    4.0
series2  a    NaN
         b    0.0
         c    NaN
         d    1.0
         e    2.0
dtype: float64

## 二、轴向旋转

In [36]:
# 创建实验数据
data2 = pd.DataFrame(
    {
        "date": ["2022-05-22", "2022-05-23", "2022-05-23", "2022-05-24", "2022-05-24"],
        "type": ["手机", "电脑", "配件", "厨具", "食物"],
        "stock": [123, 234, 543, 423, 543],
    }
)
data2

Unnamed: 0,date,type,stock
0,2022-05-22,手机,123
1,2022-05-23,电脑,234
2,2022-05-23,配件,543
3,2022-05-24,厨具,423
4,2022-05-24,食物,543


In [37]:
# 轴向旋转，set_index和unstack组合实现
data2.set_index(["date", "type"]).unstack("type")

Unnamed: 0_level_0,stock,stock,stock,stock,stock
type,厨具,手机,电脑,配件,食物
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2022-05-22,,123.0,,,
2022-05-23,,,234.0,543.0,
2022-05-24,423.0,,,,543.0


In [38]:
# 轴向旋转，pivot组合实现
data2.pivot("date", "type")

Unnamed: 0_level_0,stock,stock,stock,stock,stock
type,厨具,手机,电脑,配件,食物
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2022-05-22,,123.0,,,
2022-05-23,,,234.0,543.0,
2022-05-24,423.0,,,,543.0


In [39]:
# 轴向旋转，pivot组合实现
data2.pivot("date", "type", "stock")

type,厨具,手机,电脑,配件,食物
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-05-22,,123.0,,,
2022-05-23,,,234.0,543.0,
2022-05-24,423.0,,,,543.0
