In [1]:
import numpy as np
import pandas as pd

In [2]:
# data
my_dict = {
  'kor': {
    'a': 10, 
    'b': 30,
    'c': 50,
    'd': 70
  },
  'eng': {
    'a': 20, 
    'b': 40, 
    'c': 60, 
    'd': 80
  },
  'math': {
    'a': 33, 
    'b': 44, 
    'c': 55, 
    'd': 66
  },
}

In [4]:
my_df = pd.DataFrame(my_dict)
my_df

Unnamed: 0,kor,eng,math
a,10,20,33
b,30,40,44
c,50,60,55
d,70,80,66


# Index Duplication
- -> 중복 인덱스 생성 (O)
- -> 선택 (X)

In [5]:
my_df.reindex(["a", "aa"])

Unnamed: 0,kor,eng,math
a,10.0,20.0,33.0
aa,,,


In [6]:
my_df.reindex(["a", "a", "b"])

Unnamed: 0,kor,eng,math
a,10,20,33
a,10,20,33
b,30,40,44


In [7]:
my_df1 = my_df.copy()
my_df1.index = ["a", "b", "a", "c"]
my_df1

Unnamed: 0,kor,eng,math
a,10,20,33
b,30,40,44
a,50,60,55
c,70,80,66


In [9]:
my_df1.reindex(["a", "b"])

ValueError: ignored

In [11]:
my_df1.index.is_unique

False

In [12]:
my_df.index.is_unique

True

In [13]:
my_df.index.unique()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [14]:
my_df.index.nunique()

4

In [16]:
my_df.shape[0] == my_df.index.nunique()

True

In [17]:
my_df1.shape[0] == my_df1.index.nunique()

False

In [18]:
my_df1.index.drop_duplicates()

Index(['a', 'b', 'c'], dtype='object')

# 중첩 인덱스 (Multi Index)
- Series
- DataFrame
- index & columns

In [30]:
# data
my_list = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
           ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

my_list1 = [['bar', 'baz', 'foo', 'qux', 'bar', 'baz', 'foo', 'qux'],
           ['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two']]
my_list

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

## MultiIndex의 생성

In [19]:
my_df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [21]:
my_tuple = list(zip(*my_list))
my_tuple

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [23]:
my_df1 = pd.DataFrame(my_tuple)
my_df1

Unnamed: 0,0,1
0,bar,one
1,bar,two
2,baz,one
3,baz,two
4,foo,one
5,foo,two
6,qux,one
7,qux,two


In [24]:
my_index = pd.MultiIndex.from_tuples(my_tuple, names=['first', 'second'])
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [25]:
my_df1.index = my_index
my_df1

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,bar,one
bar,two,bar,two
baz,one,baz,one
baz,two,baz,two
foo,one,foo,one
foo,two,foo,two
qux,one,qux,one
qux,two,qux,two


In [32]:
my_tuple1 = list(zip(*my_list1))
my_df2 = my_df1.copy()
my_index1 = pd.MultiIndex.from_tuples(my_tuple1, names=['first', 'second'])
my_df2.index = my_index1
my_df2

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,bar,one
baz,one,bar,two
foo,one,baz,one
qux,one,baz,two
bar,two,foo,one
baz,two,foo,two
foo,two,qux,one
qux,two,qux,two


In [34]:
my_ss = pd.Series(np.random.randint(0, 100, size=8), index=my_index1)
my_ss

first  second
bar    one       20
baz    one       56
foo    one       19
qux    one       15
bar    two       98
baz    two       19
foo    two       37
qux    two       34
dtype: int64

In [38]:
my_df = pd.DataFrame(np.random.randint(0, 100, size=(8, 3)), index=my_index1)
my_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,8,97,92
baz,one,4,41,2
foo,one,48,11,71
qux,one,50,82,14
bar,two,1,2,54
baz,two,84,33,23
foo,two,84,76,91
qux,two,28,47,99


In [39]:
my_tuple1

[('bar', 'one'),
 ('baz', 'one'),
 ('foo', 'one'),
 ('qux', 'one'),
 ('bar', 'two'),
 ('baz', 'two'),
 ('foo', 'two'),
 ('qux', 'two')]

In [42]:
my_list2 = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
my_index2 = pd.MultiIndex.from_product(my_list2)
my_index2

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [43]:
my_df1 = pd.DataFrame(my_df.values, index=my_index2)
my_df1

Unnamed: 0,Unnamed: 1,0,1,2
bar,one,8,97,92
bar,two,4,41,2
baz,one,48,11,71
baz,two,50,82,14
foo,one,1,2,54
foo,two,84,33,23
qux,one,84,76,91
qux,two,28,47,99


In [46]:
my_df3 = pd.DataFrame(my_tuple)
my_df3

Unnamed: 0,0,1
0,bar,one
1,bar,two
2,baz,one
3,baz,two
4,foo,one
5,foo,two
6,qux,one
7,qux,two


In [47]:
my_index3 = pd.MultiIndex.from_frame(my_df3)
my_index3

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=[0, 1])

In [48]:
my_ss

first  second
bar    one       20
baz    one       56
foo    one       19
qux    one       15
bar    two       98
baz    two       19
foo    two       37
qux    two       34
dtype: int64

In [51]:
my_ss1 = my_ss.copy()
my_ss1.index = ['bar', 'bar', 'baz', 'barz', 'foo', 'foo', 'qux', 'qux']
my_ss1

bar     20
bar     56
baz     19
barz    15
foo     98
foo     19
qux     37
qux     34
dtype: int64

In [49]:
my_ss.bar

second
one    20
two    98
dtype: int64

In [56]:
my_ss['bar']

second
one    20
two    98
dtype: int64

In [57]:
my_ss['bar']['one']

20

In [54]:
my_ss1['bar'].bar

bar    20
bar    56
dtype: int64

In [59]:
my_ss.bar.one

20

In [60]:
my_ss.one

AttributeError: ignored

In [61]:
my_ss.loc['bar']

second
one    20
two    98
dtype: int64

In [62]:
my_ss.loc[('bar','one')] 

20

In [73]:
my_df.values[:, :2]

array([[ 8, 97],
       [ 4, 41],
       [48, 11],
       [50, 82],
       [ 1,  2],
       [84, 33],
       [84, 76],
       [28, 47]])

In [74]:
my_df2 = pd.DataFrame(my_df.values[:, :2])
my_df2.index = ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']
my_df2.columns = ['one', 'two']
my_df2

Unnamed: 0,one,two
bar,8,97
bar,4,41
baz,48,11
baz,50,82
foo,1,2
foo,84,33
qux,84,76
qux,28,47


In [77]:
my_ss

first  second
bar    one       20
baz    one       56
foo    one       19
qux    one       15
bar    two       98
baz    two       19
foo    two       37
qux    two       34
dtype: int64

In [75]:
my_ss.loc['bar', 'one'] # ('bar', 'one')

20

In [78]:
my_df2

Unnamed: 0,one,two
bar,8,97
bar,4,41
baz,48,11
baz,50,82
foo,1,2
foo,84,33
qux,84,76
qux,28,47


In [76]:
my_df2.loc['bar', 'one']

bar    8
bar    4
Name: one, dtype: int64

In [79]:
my_ss.loc[('bar', 'one')]

20

In [80]:
my_ss.one

AttributeError: ignored

In [85]:
my_ss.loc[:, 'one']

first
bar    20
baz    56
foo    19
qux    15
dtype: int64

In [86]:
my_index = pd.MultiIndex.from_tuples(my_tuple, names=['first', 'second'])
my_index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [87]:
my_ss

first  second
bar    one       20
baz    one       56
foo    one       19
qux    one       15
bar    two       98
baz    two       19
foo    two       37
qux    two       34
dtype: int64

In [88]:
my_ss.index.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [89]:
my_ss.index.get_level_values('first')

Index(['bar', 'baz', 'foo', 'qux', 'bar', 'baz', 'foo', 'qux'], dtype='object', name='first')

In [90]:
my_ss.index.get_level_values(0)

Index(['bar', 'baz', 'foo', 'qux', 'bar', 'baz', 'foo', 'qux'], dtype='object', name='first')

In [91]:
my_ss.index.get_level_values('second')

Index(['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'], dtype='object', name='second')

In [92]:
my_ss.index.get_level_values(1)

Index(['one', 'one', 'one', 'one', 'two', 'two', 'two', 'two'], dtype='object', name='second')

In [109]:
my_list10 = list(range(20))
my_list10[slice(0, 10)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [110]:
my_list10[slice(None)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [111]:
my_list10[slice(0, 10, 2)]

[0, 2, 4, 6, 8]

In [96]:
my_ss.loc[(slice(None), 'one')]

first
bar    20
baz    56
foo    19
qux    15
dtype: int64

In [98]:
import seaborn as sns
my_titanic = sns.load_dataset('titanic')
my_titanic # csv, excel

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [114]:
my_titanic1 = my_titanic.set_index(['sex', 'class']).sort_index()
my_titanic1

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,pclass,age,sibsp,parch,fare,embarked,who,adult_male,deck,embark_town,alive,alone
sex,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
female,First,1,1,38.0,1,0,71.2833,C,woman,False,C,Cherbourg,yes,False
female,First,1,1,35.0,1,0,53.1000,S,woman,False,C,Southampton,yes,False
female,First,1,1,58.0,0,0,26.5500,S,woman,False,C,Southampton,yes,True
female,First,1,1,,1,0,146.5208,C,woman,False,B,Cherbourg,yes,False
female,First,1,1,49.0,1,0,76.7292,C,woman,False,D,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
male,Third,0,3,19.0,0,0,7.8958,S,man,True,,Southampton,no,True
male,Third,0,3,,0,0,7.8958,S,man,True,,Southampton,no,True
male,Third,0,3,33.0,0,0,7.8958,S,man,True,,Southampton,no,True
male,Third,0,3,25.0,0,0,7.0500,S,man,True,,Southampton,no,True


In [119]:
my_titanic1.loc['female']

Unnamed: 0_level_0,survived,pclass,age,sibsp,parch,fare,embarked,who,adult_male,deck,embark_town,alive,alone
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
First,1,1,38.0,1,0,71.2833,C,woman,False,C,Cherbourg,yes,False
First,1,1,35.0,1,0,53.1000,S,woman,False,C,Southampton,yes,False
First,1,1,58.0,0,0,26.5500,S,woman,False,C,Southampton,yes,True
First,1,1,,1,0,146.5208,C,woman,False,B,Cherbourg,yes,False
First,1,1,49.0,1,0,76.7292,C,woman,False,D,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Third,0,3,,8,2,69.5500,S,woman,False,,Southampton,no,False
Third,1,3,15.0,0,0,7.2250,C,child,False,,Cherbourg,yes,True
Third,0,3,22.0,0,0,10.5167,S,woman,False,,Southampton,no,True
Third,0,3,39.0,0,5,29.1250,Q,woman,False,,Queenstown,no,False


In [122]:
my_titanic1.loc[('male', 'First')].age.mean()

41.28138613861386

In [123]:
my_titanic1.loc[('male', 'Second')].age.mean()

30.74070707070707

In [124]:
my_titanic1.loc[('male', 'Third')].age.mean()

26.507588932806325

In [125]:
my_titanic1.loc[('female', 'First')].age.mean()

34.61176470588235

In [126]:
my_titanic1.loc['female', 'Second'].age.mean()

28.722972972972972

In [127]:
my_titanic1.loc['female', 'Third'].age.mean()

21.75

In [101]:
my_iris = sns.load_dataset('iris')
my_iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [105]:
my_iris1 = my_iris.set_index(['species']).stack().sort_index()
my_iris1.name = 'length'
my_iris1

species                
setosa     petal_length    1.4
           petal_length    1.4
           petal_length    1.3
           petal_length    1.5
           petal_length    1.4
                          ... 
virginica  sepal_width     3.0
           sepal_width     2.5
           sepal_width     3.0
           sepal_width     3.4
           sepal_width     3.0
Name: length, Length: 600, dtype: float64

In [107]:
my_iris1.index.get_level_values(0).unique()

Index(['setosa', 'versicolor', 'virginica'], dtype='object', name='species')

In [128]:
my_iris1['setosa']

petal_length    1.4
petal_length    1.4
petal_length    1.3
petal_length    1.5
petal_length    1.4
               ... 
sepal_width     3.0
sepal_width     3.8
sepal_width     3.2
sepal_width     3.7
sepal_width     3.3
Name: length, Length: 200, dtype: float64

### slicer

In [130]:
my_list10 

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [129]:
my_list10[1:3] # 1:3 => slicer

[1, 2]

In [131]:
my_list10[slice(1,3)]

[1, 2]

In [132]:
my_list10[:]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [133]:
my_list10[slice(None)]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [136]:
my_ss

first  second
bar    one       20
baz    one       56
foo    one       19
qux    one       15
bar    two       98
baz    two       19
foo    two       37
qux    two       34
dtype: int64

In [135]:
my_ss.loc[('bar', 'one')]

20

In [137]:
my_ss.loc['bar']

second
one    20
two    98
dtype: int64

In [140]:
my_ss.loc[(:, 'one')]

SyntaxError: ignored

In [141]:
my_ss.loc[(slice(None), 'one')]

first
bar    20
baz    56
foo    19
qux    15
dtype: int64

In [142]:
my_dict = {
  'A': np.random.randn(8),
  'B': np.random.randn(8)
}
my_df4 = pd.DataFrame(my_dict, index=my_index)
my_df4

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.988888,-1.399319
bar,two,-2.599131,0.990346
baz,one,-1.359675,0.488584
baz,two,-0.566705,-1.168578
foo,one,-1.497872,0.172147
foo,two,-0.888719,0.152825
qux,one,-0.255619,-0.079363
qux,two,-0.75279,0.671324


In [145]:
my_df4['A'].loc[('bar', 'one')]

-0.9888884800672966

In [147]:
my_df4.loc['bar', 'one'] # 사용 X

A   -0.988888
B   -1.399319
Name: (bar, one), dtype: float64

In [148]:
my_df4.loc[('bar', 'one')]

A   -0.988888
B   -1.399319
Name: (bar, one), dtype: float64

In [150]:
my_df4.loc['bar', 'A'] # 사용 X

second
one   -0.988888
two   -2.599131
Name: A, dtype: float64

In [151]:
my_df4.loc[('bar', ), 'A']

second
one   -0.988888
two   -2.599131
Name: A, dtype: float64

In [152]:
my_df4.loc['bar', 'one', 'A']

IndexingError: ignored

In [156]:
my_df4.loc[('bar', 'one'), 'A']

-0.9888884800672966

In [157]:
my_ss2 = pd.Series([1, 2, 3, 4, 5, 6],
                   index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]))
my_ss2

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int64

In [159]:
my_ss2.loc[("A", "c")]

1

In [160]:
my_ss2.loc[("B", "d")]

5

In [161]:
my_ss2.loc[[("A", "c"), ("B", "d")]]

A  c    1
B  d    5
dtype: int64

In [162]:
my_ss2.loc[(["A", "B"], "d")]

A  d    2
B  d    5
dtype: int64

In [163]:
my_ss2.loc[(["A", "B"], ["d", "c"])]

A  d    2
   c    1
B  d    5
   c    4
dtype: int64

In [164]:
def mklbl(prefix, n):
  return [f"{prefix}{i}" for i in range(n)]

In [165]:
my_index = pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)])
my_index

MultiIndex([('A0', 'B0', 'C0', 'D0'),
            ('A0', 'B0', 'C0', 'D1'),
            ('A0', 'B0', 'C1', 'D0'),
            ('A0', 'B0', 'C1', 'D1'),
            ('A0', 'B0', 'C2', 'D0'),
            ('A0', 'B0', 'C2', 'D1'),
            ('A0', 'B0', 'C3', 'D0'),
            ('A0', 'B0', 'C3', 'D1'),
            ('A0', 'B1', 'C0', 'D0'),
            ('A0', 'B1', 'C0', 'D1'),
            ('A0', 'B1', 'C1', 'D0'),
            ('A0', 'B1', 'C1', 'D1'),
            ('A0', 'B1', 'C2', 'D0'),
            ('A0', 'B1', 'C2', 'D1'),
            ('A0', 'B1', 'C3', 'D0'),
            ('A0', 'B1', 'C3', 'D1'),
            ('A1', 'B0', 'C0', 'D0'),
            ('A1', 'B0', 'C0', 'D1'),
            ('A1', 'B0', 'C1', 'D0'),
            ('A1', 'B0', 'C1', 'D1'),
            ('A1', 'B0', 'C2', 'D0'),
            ('A1', 'B0', 'C2', 'D1'),
            ('A1', 'B0', 'C3', 'D0'),
            ('A1', 'B0', 'C3', 'D1'),
            ('A1', 'B1', 'C0', 'D0'),
            ('A1', 'B1', 'C0', 'D1'),
            

In [167]:
my_columns = pd.MultiIndex.from_tuples([("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bar")], names=["lvl0", "lvl1"])
my_columns

MultiIndex([('a', 'foo'),
            ('a', 'bar'),
            ('b', 'foo'),
            ('b', 'bar')],
           names=['lvl0', 'lvl1'])

In [169]:
my_titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [168]:
my_df5 = pd.DataFrame(np.random.randint(0, 101, size=(64,4)),
                      index=my_index,
                      columns=my_columns)
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [172]:
# my_df5.loc[(slice("A1","A3"), slice(None), slice(None), slice(None)), :]
my_df5.loc[(slice("A1","A3"), slice(None)), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
A1,B0,C0,D0,20,73,70,80
A1,B0,C0,D1,84,9,49,93
A1,B0,C1,D0,26,79,75,45
A1,B0,C1,D1,100,80,33,5
A1,B0,C2,D0,32,8,57,23
A1,B0,C2,D1,63,62,42,62
A1,B0,C3,D0,30,19,28,70
A1,B0,C3,D1,86,74,72,70
A1,B1,C0,D0,42,0,89,20
A1,B1,C0,D1,2,31,53,95


In [173]:
pd.IndexSlice

<pandas.core.indexing._IndexSlice at 0x7f77f48d4f10>

In [175]:
my_df4

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.988888,-1.399319
bar,two,-2.599131,0.990346
baz,one,-1.359675,0.488584
baz,two,-0.566705,-1.168578
foo,one,-1.497872,0.172147
foo,two,-0.888719,0.152825
qux,one,-0.255619,-0.079363
qux,two,-0.75279,0.671324


In [None]:
my_df5.loc[pd.IndexSlice[:, :, ["C1", "C3"]],  pd.IndexSlice[:, "foo"]]

In [177]:
idx = pd.IndexSlice

In [None]:
my_df5.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]]

In [None]:
# slice

In [181]:
my_df5[("a", "foo")] > 50 # 64

A0  B0  C0  D0     True
            D1    False
        C1  D0     True
            D1    False
        C2  D0    False
                  ...  
A3  B1  C1  D1     True
        C2  D0     True
            D1     True
        C3  D0     True
            D1    False
Name: (a, foo), Length: 64, dtype: bool

In [183]:
my_mask = my_df5[("a", "foo")] > 50

In [184]:
my_df5.loc[idx[my_mask, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,89,23
A0,B1,C1,D0,64,2
A0,B1,C3,D1,83,12
A1,B0,C1,D1,100,33
A1,B0,C3,D1,86,72
A1,B1,C1,D0,100,20
A1,B1,C1,D1,62,22
A2,B0,C1,D0,57,42
A2,B1,C1,D0,81,31
A2,B1,C3,D0,51,26


In [None]:
my_df5.loc[:, :, ["C1", "C3"]] # my_df5.loc[(slice(None), slice(None), ["C1", "C3"])]

In [None]:
my_df5.loc(axis=0)[:, :, ["C1", "C3"]]

In [None]:
my_df5.loc(axis='index')[:, :, ["C1", "C3"]]

In [191]:
# [(), ()] == [MultiIndex, MultiColumn]
# [slice(None)] == [:] (slicer)
my_df5.loc(axis='columns')['a']

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,foo,bar
A0,B0,C0,D0,71,19
A0,B0,C0,D1,13,67
A0,B0,C1,D0,89,24
A0,B0,C1,D1,18,81
A0,B0,C2,D0,5,2
...,...,...,...,...,...
A3,B1,C1,D1,54,16
A3,B1,C2,D0,69,98
A3,B1,C2,D1,81,13
A3,B1,C3,D0,83,14


In [None]:
# MultiIndex Indexing
# [(ROW), (COLUMNS)]

In [193]:
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [197]:
my_df5.loc[(slice(None),), 'a']

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,foo,bar
A0,B0,C0,D0,71,19
A0,B0,C0,D1,13,67
A0,B0,C1,D0,89,24
A0,B0,C1,D1,18,81
A0,B0,C2,D0,5,2
...,...,...,...,...,...
A3,B1,C1,D1,54,16
A3,B1,C2,D0,69,98
A3,B1,C2,D1,81,13
A3,B1,C3,D0,83,14


In [199]:
my_df5.loc[pd.IndexSlice[:, :, :, :], 'a']

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,foo,bar
A0,B0,C0,D0,71,19
A0,B0,C0,D1,13,67
A0,B0,C1,D0,89,24
A0,B0,C1,D1,18,81
A0,B0,C2,D0,5,2
...,...,...,...,...,...
A3,B1,C1,D1,54,16
A3,B1,C2,D0,69,98
A3,B1,C2,D1,81,13
A3,B1,C3,D0,83,14


In [198]:
my_df5.loc(axis='columns')['a']

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,foo,bar
A0,B0,C0,D0,71,19
A0,B0,C0,D1,13,67
A0,B0,C1,D0,89,24
A0,B0,C1,D1,18,81
A0,B0,C2,D0,5,2
...,...,...,...,...,...
A3,B1,C1,D1,54,16
A3,B1,C2,D0,69,98
A3,B1,C2,D1,81,13
A3,B1,C3,D0,83,14


In [200]:
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [201]:
my_df5.xs("C0", level=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,bar,foo,bar
A0,B0,D0,71,19,85,1
A0,B0,D1,13,67,24,7
A0,B1,D0,41,14,49,19
A0,B1,D1,37,83,25,36
A1,B0,D0,20,73,70,80
A1,B0,D1,84,9,49,93
A1,B1,D0,42,0,89,20
A1,B1,D1,2,31,53,95
A2,B0,D0,52,89,69,24
A2,B0,D1,92,41,97,99


In [202]:
my_df5.index.names=["first", "second", "third", "forth"]
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [203]:
my_df5.xs("C0", level="third")

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,bar,foo,bar
first,second,forth,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A0,B0,D0,71,19,85,1
A0,B0,D1,13,67,24,7
A0,B1,D0,41,14,49,19
A0,B1,D1,37,83,25,36
A1,B0,D0,20,73,70,80
A1,B0,D1,84,9,49,93
A1,B1,D0,42,0,89,20
A1,B1,D1,2,31,53,95
A2,B0,D0,52,89,69,24
A2,B0,D1,92,41,97,99


In [204]:
my_df5.loc[(slice(None), slice(None), "C0", slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,bar,foo,bar
first,second,forth,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A0,B0,D0,71,19,85,1
A0,B0,D1,13,67,24,7
A0,B1,D0,41,14,49,19
A0,B1,D1,37,83,25,36
A1,B0,D0,20,73,70,80
A1,B0,D1,84,9,49,93
A1,B1,D0,42,0,89,20
A1,B1,D1,2,31,53,95
A2,B0,D0,52,89,69,24
A2,B0,D1,92,41,97,99


In [206]:
my_df5.xs("foo", level="lvl1", axis="columns")

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
first,second,third,forth,Unnamed: 4_level_1,Unnamed: 5_level_1
A0,B0,C0,D0,71,85
A0,B0,C0,D1,13,24
A0,B0,C1,D0,89,23
A0,B0,C1,D1,18,25
A0,B0,C2,D0,5,86
...,...,...,...,...,...
A3,B1,C1,D1,54,3
A3,B1,C2,D0,69,21
A3,B1,C2,D1,81,52
A3,B1,C3,D0,83,4


In [207]:
my_df5.xs(("A0", "C0"), level=("first", "third"))

Unnamed: 0_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,lvl1,foo,bar,foo,bar
second,forth,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
B0,D0,71,19,85,1
B0,D1,13,67,24,7
B1,D0,41,14,49,19
B1,D1,37,83,25,36


## 위치를 이용한 인덱싱

In [212]:
my_index = pd.Index(np.random.randint(0, 1000, 20))
my_index

Int64Index([229, 556, 262, 557, 274, 127,  78, 538, 730, 623, 299, 217, 156,
            493,  38, 215, 488, 704, 903, 115],
           dtype='int64')

In [213]:
my_pos = [0, 9, 15]
my_pos

[0, 9, 15]

In [214]:
my_index[my_pos]

Int64Index([229, 623, 215], dtype='int64')

In [215]:
my_index.take(my_pos)

Int64Index([229, 623, 215], dtype='int64')

In [218]:
my_ss = pd.Series(np.random.randn(20))

In [219]:
my_ss.take(my_pos)

0    -2.227059
9    -0.847094
15   -0.207173
dtype: float64

In [220]:
my_ss[my_pos]

0    -2.227059
9    -0.847094
15   -0.207173
dtype: float64

In [221]:
my_df6 = pd.DataFrame(np.random.randn(40, 40))

In [222]:
my_df6[my_pos]

Unnamed: 0,0,9,15
0,-0.200126,0.732973,-0.819133
1,0.706521,1.602799,1.728629
2,0.776728,-1.628379,0.349539
3,-0.672618,0.616867,-0.68557
4,0.350702,-0.079827,0.375356
5,0.512806,0.336438,-0.078533
6,1.062431,-1.113904,0.651794
7,-0.946399,-0.422387,-1.945282
8,-0.465562,-0.180167,-0.585662
9,-0.021858,1.984988,-0.432541


In [223]:
my_df6.take(my_pos)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
0,-0.200126,-0.974093,1.1605,1.22547,-0.711536,-1.02424,-0.108999,0.513907,-1.95947,0.732973,0.370257,1.204507,1.134517,-1.239458,-0.202047,-0.819133,-0.148289,-1.377529,-1.211657,0.821734,-0.525755,-0.002866,0.599213,0.325663,0.097219,1.196197,1.049244,0.008057,-0.136709,1.250278,0.702559,-0.513503,1.187388,0.275638,-1.365975,0.285815,1.367491,-0.06765,-1.607598,-1.192195
9,-0.021858,-0.563515,0.519088,-0.304716,-0.336805,-0.808766,-1.320519,0.41525,1.73906,1.984988,-0.235194,-0.633196,1.472011,0.94351,-0.170293,-0.432541,0.762987,0.755109,0.218518,0.838904,-0.34561,-1.652549,0.009758,1.066074,-1.412008,-2.421256,-2.46281,0.955056,-1.147456,-2.075944,0.3125,1.061241,0.215941,-1.053974,-0.991882,1.657926,-1.211552,1.510837,-0.279851,2.179542
15,-0.090585,-0.711995,-0.149218,1.924439,-1.215193,1.32991,-1.168614,-1.067312,0.103343,-0.021968,1.569121,0.26864,-0.309254,0.031884,0.901793,-0.086835,-1.32949,0.567854,-1.117715,0.598191,1.889195,1.769346,1.759878,-0.500116,0.003557,-1.024864,1.004467,2.424924,0.061505,-2.494563,-0.878984,0.568104,0.928451,0.001736,-0.022271,-0.468265,1.344204,0.417005,-0.988551,-0.402887


In [224]:
my_df6.take(my_pos, axis='columns')

Unnamed: 0,0,9,15
0,-0.200126,0.732973,-0.819133
1,0.706521,1.602799,1.728629
2,0.776728,-1.628379,0.349539
3,-0.672618,0.616867,-0.68557
4,0.350702,-0.079827,0.375356
5,0.512806,0.336438,-0.078533
6,1.062431,-1.113904,0.651794
7,-0.946399,-0.422387,-1.945282
8,-0.465562,-0.180167,-0.585662
9,-0.021858,1.984988,-0.432541


In [225]:
my_ss

0    -2.227059
1    -1.305594
2    -0.455842
3     0.010087
4     0.158844
5    -0.462258
6    -0.364765
7    -0.007782
8    -2.675969
9    -0.847094
10   -1.344618
11    0.216590
12   -2.133762
13    0.543665
14   -1.180038
15   -0.207173
16    1.915575
17    0.836828
18   -1.044256
19   -0.294995
dtype: float64

In [226]:
my_ss = pd.Series(np.random.randn(5))
my_ss

0   -0.529141
1   -0.795178
2    1.121975
3    1.452748
4    1.128347
dtype: float64

In [227]:
my_ss[[False, True, False, True, False]]

1   -0.795178
3    1.452748
dtype: float64

In [229]:
my_ss.take([False, True, False, True, False])

0   -0.529141
1   -0.795178
0   -0.529141
1   -0.795178
0   -0.529141
dtype: float64

In [228]:
my_ss.take([0, 1, 0, 1, 0])

0   -0.529141
1   -0.795178
0   -0.529141
1   -0.795178
0   -0.529141
dtype: float64

In [230]:
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [234]:
my_df5.loc["A0":"A3"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [237]:
my_df5.loc[("A0","B0"):("A3","B1")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [238]:
my_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [241]:
my_df5.loc[(slice(None),), ('a', 'foo')]

first  second  third  forth
A0     B0      C0     D0       71
                      D1       13
               C1     D0       89
                      D1       18
               C2     D0        5
                               ..
A3     B1      C1     D1       54
               C2     D0       69
                      D1       81
               C3     D0       83
                      D1       48
Name: (a, foo), Length: 64, dtype: int64

In [244]:
my_df5.loc[(pd.IndexSlice[:],), ('a', 'foo')]

first  second  third  forth
A0     B0      C0     D0       71
                      D1       13
               C1     D0       89
                      D1       18
               C2     D0        5
                               ..
A3     B1      C1     D1       54
               C2     D0       69
                      D1       81
               C3     D0       83
                      D1       48
Name: (a, foo), Length: 64, dtype: int64

In [245]:
my_df5.loc(axis='columns')[('a', 'foo')]

first  second  third  forth
A0     B0      C0     D0       71
                      D1       13
               C1     D0       89
                      D1       18
               C2     D0        5
                               ..
A3     B1      C1     D1       54
               C2     D0       69
                      D1       81
               C3     D0       83
                      D1       48
Name: (a, foo), Length: 64, dtype: int64

In [246]:
my_df5.xs(('a', 'foo'), level=['lvl0','lvl1'], axis='columns')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo
first,second,third,forth,Unnamed: 4_level_2
A0,B0,C0,D0,71
A0,B0,C0,D1,13
A0,B0,C1,D0,89
A0,B0,C1,D1,18
A0,B0,C2,D0,5
...,...,...,...,...
A3,B1,C1,D1,54
A3,B1,C2,D0,69
A3,B1,C2,D1,81
A3,B1,C3,D0,83


In [248]:
my_df5.loc[(['A0','A3'], ['B0'], slice(None))] # == :

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
A0,B0,C2,D1,18,19,60,57
A0,B0,C3,D0,4,47,71,14
A0,B0,C3,D1,27,35,30,36
A3,B0,C0,D0,38,62,91,83
A3,B0,C0,D1,40,88,54,22


In [256]:
# my_df5.loc[("A0":"A3", "B0":"B1")] ( X )

In [255]:
my_df5.loc[("A0", "B0"):("A3", "B1")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,bar,foo,bar
first,second,third,forth,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,71,19,85,1
A0,B0,C0,D1,13,67,24,7
A0,B0,C1,D0,89,24,23,9
A0,B0,C1,D1,18,81,25,62
A0,B0,C2,D0,5,2,86,34
...,...,...,...,...,...,...,...
A3,B1,C1,D1,54,16,3,75
A3,B1,C2,D0,69,98,21,29
A3,B1,C2,D1,81,13,52,79
A3,B1,C3,D0,83,14,4,55


In [257]:
my_titanic = sns.load_dataset("titanic")
my_titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [258]:
my_titanic.set_index(["sex", "pclass"])

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
male,3,0,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
female,1,1,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
female,3,1,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
female,1,1,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
male,3,0,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
male,...,...,...,...,...,...,...,...,...,...,...,...,...,...
male,2,0,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
female,1,1,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
female,3,0,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
male,1,1,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [259]:
my_index = pd.MultiIndex.from_frame(my_titanic[["sex", "pclass"]])
my_index

MultiIndex([(  'male', 3),
            ('female', 1),
            ('female', 3),
            ('female', 1),
            (  'male', 3),
            (  'male', 3),
            (  'male', 1),
            (  'male', 3),
            ('female', 3),
            ('female', 2),
            ...
            (  'male', 3),
            ('female', 3),
            (  'male', 2),
            (  'male', 3),
            ('female', 3),
            (  'male', 2),
            ('female', 1),
            ('female', 3),
            (  'male', 1),
            (  'male', 3)],
           names=['sex', 'pclass'], length=891)

In [260]:
my_titanic.index = my_index

In [261]:
my_titanic

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
male,3,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
female,1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
female,3,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
female,1,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
male,3,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
male,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
male,2,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
female,1,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
female,3,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
male,1,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [262]:
my_titanic.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
sex,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
female,1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
female,1,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
female,1,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
female,1,1,1,female,,1,0,146.5208,C,First,woman,False,B,Cherbourg,yes,False
female,1,1,1,female,49.0,1,0,76.7292,C,First,woman,False,D,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
male,3,0,3,male,19.0,0,0,7.8958,S,Third,man,True,,Southampton,no,True
male,3,0,3,male,,0,0,7.8958,S,Third,man,True,,Southampton,no,True
male,3,0,3,male,33.0,0,0,7.8958,S,Third,man,True,,Southampton,no,True
male,3,0,3,male,25.0,0,0,7.0500,S,Third,man,True,,Southampton,no,True


In [263]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'exercise',
 'flights',
 'fmri',
 'gammas',
 'geyser',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'tips',
 'titanic']

In [266]:
sns.load_dataset("diamonds") # MultiIndex, MultiColumn

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74


In [267]:
my_titanic.index

MultiIndex([(  'male', 3),
            ('female', 1),
            ('female', 3),
            ('female', 1),
            (  'male', 3),
            (  'male', 3),
            (  'male', 1),
            (  'male', 3),
            ('female', 3),
            ('female', 2),
            ...
            (  'male', 3),
            ('female', 3),
            (  'male', 2),
            (  'male', 3),
            ('female', 3),
            (  'male', 2),
            ('female', 1),
            ('female', 3),
            (  'male', 1),
            (  'male', 3)],
           names=['sex', 'pclass'], length=891)

# DataType
- String
- Categorical
- TimeSeries
  - Timestamp
  - Datetime

- Missing Data

- Reshape
  - merge, concat, join
  - pivot / pivot table
  - GroupBy -> aggregation

- Visulaization
- matplotlib