In [34]:
import numpy as np
import pandas as pd

In [35]:
data = pd.Series(np.random.randn(9),
                 index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1   -0.553114
   2   -0.162858
   3    0.360178
b  1   -1.072050
   3    0.530795
c  1   -0.511526
   2    0.464624
d  2    2.264810
   3   -1.200569
dtype: float64

In [36]:
data['b']  # 类型是Series

1   -1.072050
3    0.530795
dtype: float64

In [37]:
data[['b', 'd']]

b  1   -1.072050
   3    0.530795
d  2    2.264810
   3   -1.200569
dtype: float64

In [38]:
data.loc[['b', 'd']]  # 与上等价

b  1   -1.072050
   3    0.530795
d  2    2.264810
   3   -1.200569
dtype: float64

In [39]:
data.loc['b':'d']

b  1   -1.072050
   3    0.530795
c  1   -0.511526
   2    0.464624
d  2    2.264810
   3   -1.200569
dtype: float64

In [40]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [41]:
# 分层的层级可以有名称(可以是字符串或python对象)
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']

In [42]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [43]:
frame.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['key1', 'key2'])

In [44]:
frame.columns

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

In [45]:
# Make a MultiIndex from the cartesian product of multiple iterables.
df = pd.DataFrame(np.random.randint(2, 20, [30, 5]), columns=[*"ABCDE"],
                  index=pd.MultiIndex.from_product([['a1', 'a2', 'a3', 'a4', 'a5'],
                                                    ['b1', 'b2'], ['c1', 'c2', 'c3']], names=[*'abc']))  # 5*2*3
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D,E
a,b,c,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
a1,b1,c1,13,11,10,7,18
a1,b1,c2,17,2,15,18,9
a1,b1,c3,19,12,5,2,16
a1,b2,c1,14,4,10,13,7
a1,b2,c2,4,17,4,19,19
a1,b2,c3,16,5,12,2,12
a2,b1,c1,17,3,17,13,8
a2,b1,c2,10,16,3,8,2
a2,b1,c3,7,18,10,6,8
a2,b2,c1,17,8,5,9,6


In [46]:
idx = pd.IndexSlice  # 多索引切片
df.loc[idx[['a1', 'a3'], 'b1':'b2', ['c1', 'c3']], 'A':'D']
# 与上等价:df.loc[idx[['a1', 'a3'], 'b1':'b2', ['c1', 'c3']], idx['A':'D']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D
a,b,c,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
a1,b1,c1,13,11,10,7
a1,b1,c3,19,12,5,2
a1,b2,c1,14,4,10,13
a1,b2,c3,16,5,12,2
a3,b1,c1,6,9,6,16
a3,b1,c3,15,4,9,5
a3,b2,c1,8,5,7,4
a3,b2,c3,14,10,15,11


In [47]:
# Convert arrays to MultiIndex.
new_columns = pd.MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'],
                                         ['Green', 'Red', 'Green']],
                                        names=['state', 'color'])
new_index = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b'],
                                       [1, 2, 3, 4, 5]],
                                      names=['key1', 'key2'])
print(new_index)

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 4),
            ('b', 5)],
           names=['key1', 'key2'])


In [48]:
data1 = pd.DataFrame(np.arange(15).reshape((5, 3)),
                     columns=new_columns, index=new_index)

In [49]:
data1

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
a,3,6,7,8
b,4,9,10,11
b,5,12,13,14


In [50]:
data1.loc['a', :]

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,3,4,5
3,6,7,8


In [51]:
data1.loc[:, 'Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
a,3,6,7
b,4,9,10
b,5,12,13


In [52]:
data1.loc[:, 'Ohio'].loc[:, 'Red'].loc['a'].loc[1]  # 分层索引的依次索引

1

In [53]:
# DataFrame和Series中很多描述性/汇总性统计有一个level选项
data1.sum(level='key2', axis=0)

  data1.sum(level='key2', axis=0)


state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,3,4,5
3,6,7,8
4,9,10,11
5,12,13,14


In [54]:
data1.sum(level='key1', axis=0)

  data1.sum(level='key1', axis=0)


state,Ohio,Ohio,Colorado
color,Green,Red,Green
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,9,12,15
b,21,23,25


In [55]:
data1.sum(level='color', axis=1)

  data1.sum(level='color', axis=1)


Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
a,3,14,7
b,4,20,10
b,5,26,13


In [56]:
data1.sum(level='state', axis=1)

  data1.sum(level='state', axis=1)


Unnamed: 0_level_0,state,Ohio,Colorado
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,2
a,2,7,5
a,3,13,8
b,4,19,11
b,5,25,14


In [57]:
"""
i, jint or str
    Levels of the indices to be swapped. Can pass level name as string.
"""
data1.swaplevel('key1', 'key2', axis=0)  # 在特定轴上的MultiIndex中交换i和j级别

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
3,a,6,7,8
4,b,9,10,11
5,b,12,13,14


In [58]:
data1.swaplevel('state', 'color', axis=1)

Unnamed: 0_level_0,color,Green,Red,Green
Unnamed: 0_level_1,state,Ohio,Ohio,Colorado
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
a,3,6,7,8
b,4,9,10,11
b,5,12,13,14


In [59]:
data1.swaplevel(0, 1, axis=1).sort_index(level=0)  # 对外层进行排序

Unnamed: 0_level_0,color,Green,Red,Green
Unnamed: 0_level_1,state,Ohio,Ohio,Colorado
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
a,3,6,7,8
b,4,9,10,11
b,5,12,13,14


In [60]:
data1.columns.__class__.__name__   # MultiIndex

'MultiIndex'

In [61]:
temp = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"))
temp.columns.__class__.__name__  # Index

'Index'