In [1]:
import pandas as pd
import numpy as np

### 다중 인덱스
데이터프레임에 여러 계층을 가지는 인덱스를 지정할 수 있음  
데이터프레임 생성 시 `columns` 인수로 다차원 리스트 형태를 지정하면 다중 인덱스로 지정할 수 있음

In [2]:
df = pd.DataFrame(np.random.randn(5, 4).round(2), columns = [['A', 'A', 'B', 'B'], ['C1', 'C2', 'C1', 'C2']])
df

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,C1,C2,C1,C2
0,0.56,-0.24,0.34,1.61
1,0.53,-0.17,1.04,-0.42
2,-1.67,0.11,1.39,1.23
3,-0.77,-0.86,1.27,-0.63
4,-0.35,-0.41,0.58,0.54


데이터프레임의 `columns` 속성의 `names` 속성으로 각 열 인덱스에 대한 이름을 부여할 수 있음

In [3]:
df.columns.names = ['Cidx1', 'Cidx2']
df

Cidx1,A,A,B,B
Cidx2,C1,C2,C1,C2
0,0.56,-0.24,0.34,1.61
1,0.53,-0.17,1.04,-0.42
2,-1.67,0.11,1.39,1.23
3,-0.77,-0.86,1.27,-0.63
4,-0.35,-0.41,0.58,0.54


데이터프레임 생성 시 `index` 인수로 다차원 리스트를 지정하면 다차원 형태의 행 인덱스를 지정할 수 있음  
행 인덱스의 이름은 데이터프레임 인스턴스의 `index` 속성의 `names` 속성으로 지정할 수 있음

In [5]:
df2 = pd.DataFrame(np.random.randn(6, 4).round(2), 
                  columns = [['A', 'A', 'B', 'B'], ['C1', 'C2', 'C1', 'C2']], 
                  index = [['M', 'M', 'M', 'F', 'F', 'F'], 
                           ['id_1', 'id_2', 'id_3', 'id_1', 'id_2', 'id_3']])
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C1,C2,C1,C2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35


In [7]:
df2.index.names = ['Ridx1', 'Ridx2']
df2.columns.names = ['Cidx1', 'Cidx2']
df2

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35


### 열 인덱스와 행 인덱스 교환
`stack()`, `unstack()` 메서드로 열 인덱스를 행 인덱스로 또는 행 인덱스를 열 인덱스로 바꿀 수 있음  
`stack()` 메서드 : 열 인덱스를 행 인덱스로 변경  
`unstack()` 메서드 : 행 인덱스를 열 인덱스로 변경

In [8]:
df2.stack('Cidx1')

  df2.stack('Cidx1')


Unnamed: 0_level_0,Unnamed: 1_level_0,Cidx2,C1,C2
Ridx1,Ridx2,Cidx1,Unnamed: 3_level_1,Unnamed: 4_level_1
M,id_1,A,-0.64,1.18
M,id_1,B,0.6,-0.39
M,id_2,A,0.21,1.38
M,id_2,B,-1.82,-0.38
M,id_3,A,1.39,-1.01
M,id_3,B,-0.2,0.71
F,id_1,A,0.54,-0.66
F,id_1,B,-1.76,-0.03
F,id_2,A,-0.07,0.55
F,id_2,B,0.67,-0.63


In [10]:
df3 = df2.stack(1)
df3

  df3 = df2.stack(1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Cidx1,A,B
Ridx1,Ridx2,Cidx2,Unnamed: 3_level_1,Unnamed: 4_level_1
M,id_1,C1,-0.64,0.6
M,id_1,C2,1.18,-0.39
M,id_2,C1,0.21,-1.82
M,id_2,C2,1.38,-0.38
M,id_3,C1,1.39,-0.2
M,id_3,C2,-1.01,0.71
F,id_1,C1,0.54,-1.76
F,id_1,C2,-0.66,-0.03
F,id_2,C1,-0.07,0.67
F,id_2,C2,0.55,-0.63


In [12]:
df3 = df3.stack(0)
df3

Ridx1  Ridx2  Cidx2  Cidx1
M      id_1   C1     A       -0.64
                     B        0.60
              C2     A        1.18
                     B       -0.39
       id_2   C1     A        0.21
                     B       -1.82
              C2     A        1.38
                     B       -0.38
       id_3   C1     A        1.39
                     B       -0.20
              C2     A       -1.01
                     B        0.71
F      id_1   C1     A        0.54
                     B       -1.76
              C2     A       -0.66
                     B       -0.03
       id_2   C1     A       -0.07
                     B        0.67
              C2     A        0.55
                     B       -0.63
       id_3   C1     A       -0.45
                     B       -2.56
              C2     A        0.57
                     B       -0.35
dtype: float64

In [16]:
df4 = df2.unstack(1)
df4

Cidx1,A,A,A,A,A,A,B,B,B,B,B,B
Cidx2,C1,C1,C1,C2,C2,C2,C1,C1,C1,C2,C2,C2
Ridx2,id_1,id_2,id_3,id_1,id_2,id_3,id_1,id_2,id_3,id_1,id_2,id_3
Ridx1,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
F,0.54,-0.07,-0.45,-0.66,0.55,0.57,-1.76,0.67,-2.56,-0.03,-0.63,-0.35
M,-0.64,0.21,1.39,1.18,1.38,-1.01,0.6,-1.82,-0.2,-0.39,-0.38,0.71


### 다중 인덱스의 인덱싱
다중 인덱스를 가지고 있는 데이터프레임의 경우 하나의 인덱스가 아니라 `()`로 둘러쌓인 튜플이어야 함

In [17]:
df

Cidx1,A,A,B,B
Cidx2,C1,C2,C1,C2
0,0.56,-0.24,0.34,1.61
1,0.53,-0.17,1.04,-0.42
2,-1.67,0.11,1.39,1.23
3,-0.77,-0.86,1.27,-0.63
4,-0.35,-0.41,0.58,0.54


In [18]:
df[('A', 'C2')]

0   -0.24
1   -0.17
2    0.11
3   -0.86
4   -0.41
Name: (A, C2), dtype: float64

In [19]:
df.loc[0, ('A', 'C1')]

0.56

만약 튜플로 지정하지 않고 단일 값으로 지정하면 제일 최상단의 인덱스를 지정한 것으로 봄

In [20]:
df['A'] # 하위 인덱스를 단일 값으로 지정할 경우 에러 발생

Cidx2,C1,C2
0,0.56,-0.24
1,0.53,-0.17
2,-1.67,0.11
3,-0.77,-0.86
4,-0.35,-0.41


단, `iloc` 인덱서를 사용할 때는 다중인덱스로 접근을 할 수 없음

In [22]:
df2

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35


In [23]:
df2.loc[('M', 'id_2')]

Cidx1  Cidx2
A      C1       0.21
       C2       1.38
B      C1      -1.82
       C2      -0.38
Name: (M, id_2), dtype: float64

In [24]:
df2.loc[('M', 'id_2'), ('B', 'C1')]

-1.82

In [25]:
df2.loc[:, ('A', 'C2')]

Ridx1  Ridx2
M      id_1     1.18
       id_2     1.38
       id_3    -1.01
F      id_1    -0.66
       id_2     0.55
       id_3     0.57
Name: (A, C2), dtype: float64

In [26]:
df2.loc[('All', 'All'), :] = df2.sum() # 행 추가
df2

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35
All,All,0.98,2.01,-5.07,-1.07


In [27]:
df2.loc['M']

Cidx1,A,A,B,B
Cidx2,C1,C2,C1,C2
Ridx2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
id_1,-0.64,1.18,0.6,-0.39
id_2,0.21,1.38,-1.82,-0.38
id_3,1.39,-1.01,-0.2,0.71


다중 인덱스 인덱싱의 튜플 내에서 슬라이싱을 하고 싶다면 `:` 대신 `slice()` 메서드를 사용해야 함  
`slice(마지막인덱스)`, `slice(시작인덱스, 마지막인덱스)`, `slice(시작인덱스, 마지막인덱스, 스텝)`

In [28]:
df2.loc[('M', slice(None)), :]

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71


In [29]:
df2.loc[:, ('A', slice(None))]

Unnamed: 0_level_0,Cidx1,A,A
Unnamed: 0_level_1,Cidx2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2
M,id_1,-0.64,1.18
M,id_2,0.21,1.38
M,id_3,1.39,-1.01
F,id_1,0.54,-0.66
F,id_2,-0.07,0.55
F,id_3,-0.45,0.57
All,All,0.98,2.01


### 다중 인덱스의 인덱스 순서 변경
다중 인덱스이 순서를 변경하고 싶으면 `swaplevel(i, j, axis)`메서드를 사용함  
`i`, `j` 인자 : 순서를 변경할 인덱스의 이름 혹은 번호  
`axis` 인자 : 0일 경우 행 인덱스, 1일 경우 열 인덱스

In [30]:
df2

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35
All,All,0.98,2.01,-5.07,-1.07


In [31]:
df2.swaplevel('Ridx1', 'Ridx2', 0)

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx2,Ridx1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
id_1,M,-0.64,1.18,0.6,-0.39
id_2,M,0.21,1.38,-1.82,-0.38
id_3,M,1.39,-1.01,-0.2,0.71
id_1,F,0.54,-0.66,-1.76,-0.03
id_2,F,-0.07,0.55,0.67,-0.63
id_3,F,-0.45,0.57,-2.56,-0.35
All,All,0.98,2.01,-5.07,-1.07


In [32]:
df2.swaplevel('Cidx1', 'Cidx2', 1)

Unnamed: 0_level_0,Cidx2,C1,C2,C1,C2
Unnamed: 0_level_1,Cidx1,A,A,B,B
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35
All,All,0.98,2.01,-5.07,-1.07


### 다중 인덱스의 정렬
다중 인덱스를 가지고 있는 데이터프레임에서 `sort_index`로 정렬할 때 `level` 인수를 사용하여 어떤 인덱스 기준으로 정렬할 지 지정해야 함

In [33]:
df2

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35
All,All,0.98,2.01,-5.07,-1.07


In [34]:
df2.sort_index(level = 0)

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
All,All,0.98,2.01,-5.07,-1.07
F,id_1,0.54,-0.66,-1.76,-0.03
F,id_2,-0.07,0.55,0.67,-0.63
F,id_3,-0.45,0.57,-2.56,-0.35
M,id_1,-0.64,1.18,0.6,-0.39
M,id_2,0.21,1.38,-1.82,-0.38
M,id_3,1.39,-1.01,-0.2,0.71


In [35]:
df2.sort_index(level = 1)

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
All,All,0.98,2.01,-5.07,-1.07
F,id_1,0.54,-0.66,-1.76,-0.03
M,id_1,-0.64,1.18,0.6,-0.39
F,id_2,-0.07,0.55,0.67,-0.63
M,id_2,0.21,1.38,-1.82,-0.38
F,id_3,-0.45,0.57,-2.56,-0.35
M,id_3,1.39,-1.01,-0.2,0.71


In [36]:
df2.sort_index(level = (1, 0)) # 튜플 지정도 가능

Unnamed: 0_level_0,Cidx1,A,A,B,B
Unnamed: 0_level_1,Cidx2,C1,C2,C1,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
All,All,0.98,2.01,-5.07,-1.07
F,id_1,0.54,-0.66,-1.76,-0.03
M,id_1,-0.64,1.18,0.6,-0.39
F,id_2,-0.07,0.55,0.67,-0.63
M,id_2,0.21,1.38,-1.82,-0.38
F,id_3,-0.45,0.57,-2.56,-0.35
M,id_3,1.39,-1.01,-0.2,0.71


In [37]:
df2.sort_index(level = 1, axis = 1)

Unnamed: 0_level_0,Cidx1,A,B,A,B
Unnamed: 0_level_1,Cidx2,C1,C1,C2,C2
Ridx1,Ridx2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
M,id_1,-0.64,0.6,1.18,-0.39
M,id_2,0.21,-1.82,1.38,-0.38
M,id_3,1.39,-0.2,-1.01,0.71
F,id_1,0.54,-1.76,-0.66,-0.03
F,id_2,-0.07,0.67,0.55,-0.63
F,id_3,-0.45,-2.56,0.57,-0.35
All,All,0.98,-5.07,2.01,-1.07
