Chapter 21
# 用多个可迭代对象的笛卡尔积构造多层行标签
Book_1《编程不难》 | 鸢尾花书：从加减乘除到机器学习  

In [1]:
import pandas as pd
import numpy as np

In [2]:
# 示例数据
data = np.random.randint(0,9,size=(8,4))

In [3]:
# 两组列表
categories = ['A','B','C','D']
types = ['X', 'Y']

In [4]:
# 创建多层行索引，先categories，再types
idx_1 = pd.MultiIndex.from_product([categories, types], 
                                    names=['I', 'II'])
idx_1

MultiIndex([('A', 'X'),
            ('A', 'Y'),
            ('B', 'X'),
            ('B', 'Y'),
            ('C', 'X'),
            ('C', 'Y'),
            ('D', 'X'),
            ('D', 'Y')],
           names=['I', 'II'])

In [5]:
df_1 = pd.DataFrame(data, index=idx_1, 
                    columns=['X1','X2','X3','X4'])
df_1

Unnamed: 0_level_0,Unnamed: 1_level_0,X1,X2,X3,X4
I,II,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,X,1,2,2,4
A,Y,0,4,1,5
B,X,5,8,8,2
B,Y,6,7,2,7
C,X,1,0,6,6
C,Y,5,1,3,5
D,X,3,4,1,3
D,Y,8,1,1,1


In [6]:
# 创建多层行索引，先types，再categories
idx_2 = pd.MultiIndex.from_product([types, categories], 
                                    names=['I', 'II'])
idx_2

MultiIndex([('X', 'A'),
            ('X', 'B'),
            ('X', 'C'),
            ('X', 'D'),
            ('Y', 'A'),
            ('Y', 'B'),
            ('Y', 'C'),
            ('Y', 'D')],
           names=['I', 'II'])

In [7]:
df_2 = pd.DataFrame(data, index=idx_2, 
                    columns=['X1','X2','X3','X4'])
df_2

Unnamed: 0_level_0,Unnamed: 1_level_0,X1,X2,X3,X4
I,II,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,A,1,2,2,4
X,B,0,4,1,5
X,C,5,8,8,2
X,D,6,7,2,7
Y,A,1,0,6,6
Y,B,5,1,3,5
Y,C,3,4,1,3
Y,D,8,1,1,1


In [8]:
# 将第0级索引的名称设置为 'Level_0_idx'
df_2.index.set_names('Level_0_idx', level=0, inplace=True)
df_2

Unnamed: 0_level_0,Unnamed: 1_level_0,X1,X2,X3,X4
Level_0_idx,II,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,A,1,2,2,4
X,B,0,4,1,5
X,C,5,8,8,2
X,D,6,7,2,7
Y,A,1,0,6,6
Y,B,5,1,3,5
Y,C,3,4,1,3
Y,D,8,1,1,1


In [9]:
# 将第1级索引的名称设置为 'Level_1_idx'
df_2.index.set_names('Level_1_idx', level=1, inplace=True)
df_2

Unnamed: 0_level_0,Unnamed: 1_level_0,X1,X2,X3,X4
Level_0_idx,Level_1_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,A,1,2,2,4
X,B,0,4,1,5
X,C,5,8,8,2
X,D,6,7,2,7
Y,A,1,0,6,6
Y,B,5,1,3,5
Y,C,3,4,1,3
Y,D,8,1,1,1


In [10]:
# 获取 DataFrame 中多级索引的第0级别（level=0）的所有标签值
df_2.index.get_level_values(0)

Index(['X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y'], dtype='object', name='Level_0_idx')

In [11]:
# 获取 DataFrame 中多级索引的第1级别（level=1）的所有标签值
df_2.index.get_level_values(1)

Index(['A', 'B', 'C', 'D', 'A', 'B', 'C', 'D'], dtype='object', name='Level_1_idx')

In [12]:
df_2.xs('X', level='Level_0_idx')  
# df_2.xs('X')  
# 获取 Level_0_idx 等于 'X' 的所有行

Unnamed: 0_level_0,X1,X2,X3,X4
Level_1_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,2,2,4
B,0,4,1,5
C,5,8,8,2
D,6,7,2,7


In [13]:
df_2.xs('A', level='Level_1_idx')  
# 获取 Level_1_idx 等于 'A' 的所有行

Unnamed: 0_level_0,X1,X2,X3,X4
Level_0_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
X,1,2,2,4
Y,1,0,6,6


In [14]:
df_2.xs(('X', 'A'), level=['Level_0_idx','Level_1_idx']) 
# df_2.xs(('X', 'A'))
# 获取 Level_0_idx 等于 'X' 且 Level_1_idx 等于 'A' 的所有行

Unnamed: 0_level_0,Unnamed: 1_level_0,X1,X2,X3,X4
Level_0_idx,Level_1_idx,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
X,A,1,2,2,4
