In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series(np.random.randn(9),
                 index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])

In [3]:
data

a  1    0.943749
   2    0.459937
   3    0.195909
b  1    0.584899
   3   -1.817889
c  1    0.067168
   2    0.640400
d  2   -1.622206
   3    0.011178
dtype: float64

In [4]:
data['b']  # 类型是Series

1    0.584899
3   -1.817889
dtype: float64

In [5]:
data[['b', 'd']]

b  1    0.584899
   3   -1.817889
d  2   -1.622206
   3    0.011178
dtype: float64

In [6]:
data.loc[['b', 'd']] # 与上等价

b  1    0.584899
   3   -1.817889
d  2   -1.622206
   3    0.011178
dtype: float64

In [7]:
data.loc['b':'d']

b  1    0.584899
   3   -1.817889
c  1    0.067168
   2    0.640400
d  2   -1.622206
   3    0.011178
dtype: float64

In [8]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],
                              ['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [9]:
# 分层的层级可以有名称(可以是字符串或python对象)
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color'] 

In [10]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [11]:
frame.index 

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['key1', 'key2'])

In [12]:
frame.columns

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

In [13]:
# Make a MultiIndex from the cartesian product of multiple iterables.
df = pd.DataFrame(np.random.randint(2, 20, [30, 5]), columns=[*"ABCDE"],
                  index=pd.MultiIndex.from_product([['a1', 'a2', 'a3', 'a4', 'a5'],
                                                    ['b1', 'b2'], ['c1', 'c2', 'c3']], names=[*'abc'])) # 5*2*3
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D,E
a,b,c,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
a1,b1,c1,13,13,15,3,8
a1,b1,c2,12,11,10,10,14
a1,b1,c3,4,13,7,15,17
a1,b2,c1,11,10,5,6,9
a1,b2,c2,19,13,17,19,9
a1,b2,c3,19,4,17,10,14
a2,b1,c1,13,16,9,6,6
a2,b1,c2,18,2,8,18,4
a2,b1,c3,10,11,9,2,17
a2,b2,c1,2,2,15,13,12


In [14]:
idx = pd.IndexSlice # 多索引切片
df.loc[idx[['a1', 'a3'], 'b1':'b2', ['c1', 'c3']], 'A':'D']
# 与上等价:df.loc[idx[['a1', 'a3'], 'b1':'b2', ['c1', 'c3']], idx['A':'D']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,A,B,C,D
a,b,c,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
a1,b1,c1,13,13,15,3
a1,b1,c3,4,13,7,15
a1,b2,c1,11,10,5,6
a1,b2,c3,19,4,17,10
a3,b1,c1,7,10,14,10
a3,b1,c3,14,10,10,17
a3,b2,c1,19,16,8,9
a3,b2,c3,8,13,6,13


In [15]:
# Convert arrays to MultiIndex.
new_columns = pd.MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], 
										 ['Green', 'Red', 'Green']],
										names=['state', 'color'])
new_index = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b'],  
									   [1, 2, 3, 4, 5]],
									  names=['key1', 'key2'])
print(new_index)

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 4),
            ('b', 5)],
           names=['key1', 'key2'])


In [16]:
data1 = pd.DataFrame(np.arange(15).reshape((5, 3)), 
					 columns=new_columns, index=new_index)

In [17]:
data1

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
a,3,6,7,8
b,4,9,10,11
b,5,12,13,14


In [18]:
data1.loc['a',:]

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,3,4,5
3,6,7,8


In [19]:
data1.loc[:,'Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
a,3,6,7
b,4,9,10
b,5,12,13


In [20]:
data1.loc[:,'Ohio'].loc[:, 'Red'].loc['a'].loc[1] # 分层索引的依次索引

1

In [21]:
# DataFrame和Series中很多描述性和汇总性统计有一个level选项
data1.sum(level='key2', axis=0)

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0,1,2
2,3,4,5
3,6,7,8
4,9,10,11
5,12,13,14


In [22]:
data1.sum(level='key1', axis=0)

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,9,12,15
b,21,23,25


In [23]:
data1.sum(level='color', axis=1)

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
a,3,14,7
b,4,20,10
b,5,26,13


In [24]:
data1.sum(level='state', axis=1)

Unnamed: 0_level_0,state,Ohio,Colorado
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,2
a,2,7,5
a,3,13,8
b,4,19,11
b,5,25,14


In [25]:
data1.swaplevel('key1', 'key2', axis=0) # 在特定轴上的MultiIndex中交换i和j级别

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
3,a,6,7,8
4,b,9,10,11
5,b,12,13,14


In [26]:
data1.swaplevel('state', 'color', axis=1)

Unnamed: 0_level_0,color,Green,Red,Green
Unnamed: 0_level_1,state,Ohio,Ohio,Colorado
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
a,3,6,7,8
b,4,9,10,11
b,5,12,13,14


In [27]:
data1.swaplevel(0, 1, axis=0).sort_index(level=0) # 对外层进行排序


Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
3,a,6,7,8
4,b,9,10,11
5,b,12,13,14
