In [1]:
# data
my_list = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
           ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
my_list

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

In [3]:
my_tuple = list(zip(*my_list))
my_tuple

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [6]:
import pandas as pd
import numpy as np

In [8]:
my_df = pd.DataFrame(my_tuple)
my_df

Unnamed: 0,0,1
0,bar,one
1,bar,two
2,baz,one
3,baz,two
4,foo,one
5,foo,two
6,qux,one
7,qux,two


In [11]:
my_index = pd.MultiIndex.from_tuples(my_tuple, names=['first', 'second'])
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [12]:
my_ss = pd.Series(np.random.randn(8), index=my_index)
my_ss

first  second
bar    one       0.005460
       two       1.596831
baz    one      -0.114147
       two      -0.301745
foo    one       0.349938
       two       0.030804
qux    one      -0.433177
       two       0.441841
dtype: float64

In [13]:
my_ss = pd.Series(np.random.randn(8))
my_ss.index = my_index
my_ss

first  second
bar    one       1.003028
       two       0.057666
baz    one       0.695518
       two      -0.340159
foo    one       0.584329
       two       0.142919
qux    one      -0.010138
       two       0.590941
dtype: float64

In [16]:
my_df.columns=['first', 'second']
my_df['data'] = np.random.randn(8)
my_df

Unnamed: 0,first,second,data
0,bar,one,-0.72046
1,bar,two,0.654503
2,baz,one,-0.550912
3,baz,two,-1.066085
4,foo,one,-0.245493
5,foo,two,0.466606
6,qux,one,-1.549454
7,qux,two,-2.83898


In [19]:
my_df.set_index(['first', 'second'])

Unnamed: 0_level_0,Unnamed: 1_level_0,data
first,second,Unnamed: 2_level_1
bar,one,-0.72046
bar,two,0.654503
baz,one,-0.550912
baz,two,-1.066085
foo,one,-0.245493
foo,two,0.466606
qux,one,-1.549454
qux,two,-2.83898


In [18]:
my_df.set_index(['first', 'second']).index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [20]:
my_ss

first  second
bar    one       1.003028
       two       0.057666
baz    one       0.695518
       two      -0.340159
foo    one       0.584329
       two       0.142919
qux    one      -0.010138
       two       0.590941
dtype: float64

In [21]:
my_list = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
my_list

[['bar', 'baz', 'foo', 'qux'], ['one', 'two']]

In [22]:
my_index = pd.MultiIndex.from_product(my_list)
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [38]:
my_df1 = my_df.iloc[[0, 2, 3, 4, 7]]
my_df1
# my_df2 = my_df1.set_index(['first', 'second'])
# my_df2

Unnamed: 0,first,second,data
0,bar,one,-0.72046
2,baz,one,-0.550912
3,baz,two,-1.066085
4,foo,one,-0.245493
7,qux,two,-2.83898


In [41]:
my_df1['first']

0    bar
2    baz
3    baz
4    foo
7    qux
Name: first, dtype: object

In [48]:
my_values1 = list(my_df1['first'].unique())
my_values1

['bar', 'baz', 'foo', 'qux']

In [49]:
my_values2 = list(my_df1['second'].unique())
my_values2

['one', 'two']

In [51]:
my_index = pd.MultiIndex.from_product([my_values1, my_values2])
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [54]:
my_df2 = my_df1.set_index(['first', 'second'])
my_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,data
first,second,Unnamed: 2_level_1
bar,one,-0.72046
baz,one,-0.550912
baz,two,-1.066085
foo,one,-0.245493
qux,two,-2.83898


In [55]:
my_df2.reindex(my_index)

Unnamed: 0,Unnamed: 1,data
bar,one,-0.72046
bar,two,
baz,one,-0.550912
baz,two,-1.066085
foo,one,-0.245493
foo,two,
qux,one,
qux,two,-2.83898


## resample
- time series
- 시간의 간격을 조절

In [31]:
my_df3 = pd.DataFrame(my_tuple, columns=['first', 'second'])
my_df3

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,baz,one
3,baz,two
4,foo,one
5,foo,two
6,qux,one
7,qux,two


In [33]:
my_index = pd.MultiIndex.from_frame(my_df3)
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [56]:
my_ss

first  second
bar    one       1.003028
       two       0.057666
baz    one       0.695518
       two      -0.340159
foo    one       0.584329
       two       0.142919
qux    one      -0.010138
       two       0.590941
dtype: float64

In [57]:
my_ss['bar']

second
one    1.003028
two    0.057666
dtype: float64

In [58]:
my_ss['bar']['one']

1.0030275772983641

In [59]:
my_ss['bar']

second
one    1.003028
two    0.057666
dtype: float64

In [60]:
my_ss[:, 'one']

first
bar    1.003028
baz    0.695518
foo    0.584329
qux   -0.010138
dtype: float64

In [None]:
# slice(None) == :

In [63]:
my_ss[(slice(None), 'one')]

first
bar    1.003028
baz    0.695518
foo    0.584329
qux   -0.010138
dtype: float64

In [64]:
my_index.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [69]:
my_index.names = ['first', 'second']
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [66]:
my_index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object')

In [67]:
my_index.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object')

In [70]:
my_index.get_level_values('second')

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

# 멀티인덱스의 인덱싱: 4 방법
- ('first', 'second', ...): 행과 열의 인덱싱 구분
- 문제: (:, 'one') => 파이썬 문법 에러

- 멀티인덱스의 인덱싱:
  - slice(None): **:**
    - 만약 level=4일때, 
      - [slice(None)] == [(slice(None), slice(None), slice(None), slice(None))]
  - xs:
    - my_df.xs("one", level=1)
  
  - pandas.IndexSlice:
    - my_df.loc[pd.IndexSlice[:, 'one'], pd.IndexSlice[:]]
    ```python
    idx = pd.IndexSlice
    my_df.loc[idx[:, 'one'], idx[:]]
    ```
  - my_df.loc(axis='index')[(:, 'one')]

In [71]:
my_df1

Unnamed: 0,first,second,data
0,bar,one,-0.72046
2,baz,one,-0.550912
3,baz,two,-1.066085
4,foo,one,-0.245493
7,qux,two,-2.83898


In [72]:
my_df1['first'].unique()

array(['bar', 'baz', 'foo', 'qux'], dtype=object)

In [74]:
my_df1['second'].unique()

array(['one', 'two'], dtype=object)

In [76]:
my_index = pd.MultiIndex.from_product([my_df1['first'].unique(), my_df1['second'].unique()])
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [77]:
my_df2 = my_df1.set_index(['first', 'second'])
my_df2.reindex(my_index)

Unnamed: 0,Unnamed: 1,data
bar,one,-0.72046
bar,two,
baz,one,-0.550912
baz,two,-1.066085
foo,one,-0.245493
foo,two,
qux,one,
qux,two,-2.83898


In [78]:
def mklbl(prefix, n):
  return [f"{prefix}{i}" for i in range(n)]

In [79]:
my_index1 = pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B",2),mklbl("C", 4), mklbl("D",2)])
my_index1

MultiIndex([('A0', 'B0', 'C0', 'D0'),
            ('A0', 'B0', 'C0', 'D1'),
            ('A0', 'B0', 'C1', 'D0'),
            ('A0', 'B0', 'C1', 'D1'),
            ('A0', 'B0', 'C2', 'D0'),
            ('A0', 'B0', 'C2', 'D1'),
            ('A0', 'B0', 'C3', 'D0'),
            ('A0', 'B0', 'C3', 'D1'),
            ('A0', 'B1', 'C0', 'D0'),
            ('A0', 'B1', 'C0', 'D1'),
            ('A0', 'B1', 'C1', 'D0'),
            ('A0', 'B1', 'C1', 'D1'),
            ('A0', 'B1', 'C2', 'D0'),
            ('A0', 'B1', 'C2', 'D1'),
            ('A0', 'B1', 'C3', 'D0'),
            ('A0', 'B1', 'C3', 'D1'),
            ('A1', 'B0', 'C0', 'D0'),
            ('A1', 'B0', 'C0', 'D1'),
            ('A1', 'B0', 'C1', 'D0'),
            ('A1', 'B0', 'C1', 'D1'),
            ('A1', 'B0', 'C2', 'D0'),
            ('A1', 'B0', 'C2', 'D1'),
            ('A1', 'B0', 'C3', 'D0'),
            ('A1', 'B0', 'C3', 'D1'),
            ('A1', 'B1', 'C0', 'D0'),
            ('A1', 'B1', 'C0', 'D1'),
            

In [87]:
my_tuples = [("a", "foo"),
             ("a", "bar"),
             ("b", "foo"),
             ("b", "bar")]
my_tuples

[('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bar')]

In [90]:
my_columns = pd.MultiIndex.from_tuples(my_tuples)
my_columns.names = ["lvl1", "lvl2"]
my_columns

MultiIndex([('a', 'foo'),
            ('a', 'bar'),
            ('b', 'foo'),
            ('b', 'bar')],
           names=['lvl1', 'lvl2'])

In [91]:
my_df4 = pd.DataFrame(np.random.randn(64,4))
my_df4.index = my_index1
my_df4.columns = my_columns
my_df4

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl2,foo,bar,foo,bar
A0,B0,C0,D0,1.422259,0.590082,-0.825660,0.187919
A0,B0,C0,D1,-0.647367,0.295819,0.132092,-0.900961
A0,B0,C1,D0,-0.304251,0.687993,0.220666,0.346400
A0,B0,C1,D1,-1.339309,0.419649,-2.196751,-2.364795
A0,B0,C2,D0,-0.341583,-1.026412,-0.616736,1.336479
...,...,...,...,...,...,...,...
A3,B1,C1,D1,0.842709,0.284690,-1.369467,-0.106890
A3,B1,C2,D0,2.598058,-0.939699,1.063707,-0.952107
A3,B1,C2,D1,0.017764,-0.393298,-0.258957,-0.403932
A3,B1,C3,D0,-1.917656,0.020950,1.076381,0.686760


In [92]:
my_df4.loc[(slice(None), "B0"), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl2,foo,bar,foo,bar
A0,B0,C0,D0,1.422259,0.590082,-0.82566,0.187919
A0,B0,C0,D1,-0.647367,0.295819,0.132092,-0.900961
A0,B0,C1,D0,-0.304251,0.687993,0.220666,0.3464
A0,B0,C1,D1,-1.339309,0.419649,-2.196751,-2.364795
A0,B0,C2,D0,-0.341583,-1.026412,-0.616736,1.336479
A0,B0,C2,D1,1.520603,0.141936,0.132799,-0.216547
A0,B0,C3,D0,0.185525,0.455075,-0.37124,0.298843
A0,B0,C3,D1,0.113742,2.296014,0.272868,-2.788831
A1,B0,C0,D0,0.555333,-0.798306,-1.135883,-0.528272
A1,B0,C0,D1,1.397261,-0.461011,0.255026,0.434483


## Cross Section (단면)

In [96]:
my_df4.xs("B0", level=1).shape

(32, 4)

In [97]:
my_df4.xs("B0", level=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl2,foo,bar,foo,bar
A0,C0,D0,1.422259,0.590082,-0.82566,0.187919
A0,C0,D1,-0.647367,0.295819,0.132092,-0.900961
A0,C1,D0,-0.304251,0.687993,0.220666,0.3464
A0,C1,D1,-1.339309,0.419649,-2.196751,-2.364795
A0,C2,D0,-0.341583,-1.026412,-0.616736,1.336479
A0,C2,D1,1.520603,0.141936,0.132799,-0.216547
A0,C3,D0,0.185525,0.455075,-0.37124,0.298843
A0,C3,D1,0.113742,2.296014,0.272868,-2.788831
A1,C0,D0,0.555333,-0.798306,-1.135883,-0.528272
A1,C0,D1,1.397261,-0.461011,0.255026,0.434483


In [94]:
my_df4.loc[pd.IndexSlice[:, "B0"], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl2,foo,bar,foo,bar
A0,B0,C0,D0,1.422259,0.590082,-0.82566,0.187919
A0,B0,C0,D1,-0.647367,0.295819,0.132092,-0.900961
A0,B0,C1,D0,-0.304251,0.687993,0.220666,0.3464
A0,B0,C1,D1,-1.339309,0.419649,-2.196751,-2.364795
A0,B0,C2,D0,-0.341583,-1.026412,-0.616736,1.336479
A0,B0,C2,D1,1.520603,0.141936,0.132799,-0.216547
A0,B0,C3,D0,0.185525,0.455075,-0.37124,0.298843
A0,B0,C3,D1,0.113742,2.296014,0.272868,-2.788831
A1,B0,C0,D0,0.555333,-0.798306,-1.135883,-0.528272
A1,B0,C0,D1,1.397261,-0.461011,0.255026,0.434483


In [98]:
my_df4.xs(('A2', 'B0'), level=(0, 1))

Unnamed: 0_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,lvl2,foo,bar,foo,bar
C0,D0,-0.496394,-0.386589,-1.549845,0.506384
C0,D1,1.097179,0.919374,0.310395,0.350585
C1,D0,0.362606,1.586947,-1.471473,-0.003251
C1,D1,0.559896,-0.383799,0.516352,2.028165
C2,D0,0.089436,-0.538223,-0.217019,-1.871428
C2,D1,-0.682038,-2.785028,-0.254811,-0.407834
C3,D0,0.768992,-0.090472,-0.074354,-0.424894
C3,D1,0.435417,-0.787503,-0.603149,0.036424


### take 메서드

In [104]:
my_df4.take?

In [102]:
my_df4.take([0, 16, 32, 48], axis="index")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl2,foo,bar,foo,bar
A0,B0,C0,D0,1.422259,0.590082,-0.82566,0.187919
A1,B0,C0,D0,0.555333,-0.798306,-1.135883,-0.528272
A2,B0,C0,D0,-0.496394,-0.386589,-1.549845,0.506384
A3,B0,C0,D0,0.240642,0.700154,0.955237,1.104982


In [103]:
my_df4.iloc[[0, 16, 32, 48]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl1,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl2,foo,bar,foo,bar
A0,B0,C0,D0,1.422259,0.590082,-0.82566,0.187919
A1,B0,C0,D0,0.555333,-0.798306,-1.135883,-0.528272
A2,B0,C0,D0,-0.496394,-0.386589,-1.549845,0.506384
A3,B0,C0,D0,0.240642,0.700154,0.955237,1.104982


In [106]:
my_df1

Unnamed: 0,first,second,data
0,bar,one,-0.72046
2,baz,one,-0.550912
3,baz,two,-1.066085
4,foo,one,-0.245493
7,qux,two,-2.83898


In [108]:
my_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,data
first,second,Unnamed: 2_level_1
bar,one,-0.72046
baz,one,-0.550912
baz,two,-1.066085
foo,one,-0.245493
qux,two,-2.83898


In [111]:
my_df2.loc[("bar","one"):("baz","two")]

Unnamed: 0_level_0,Unnamed: 1_level_0,data
first,second,Unnamed: 2_level_1
bar,one,-0.72046
baz,one,-0.550912
baz,two,-1.066085


### slice
- slice(3) == [:3]
- slice(1,3) == [1:3]
- slice(1,3,2) == [1:3:2]

In [112]:
my_df2.loc[slice(("bar", "one"), ("baz", "two"))]

Unnamed: 0_level_0,Unnamed: 1_level_0,data
first,second,Unnamed: 2_level_1
bar,one,-0.72046
baz,one,-0.550912
baz,two,-1.066085
