In [3]:
import pandas as pd
import numpy as np
%config Completer.use_jedi=False

In [5]:
pd.__version__

'1.1.5'

## Creating a MultiIndex (hierarchical index) object
---

In [13]:
# create multiple index from array
arrays = [
         ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
         ["one", "two", "one", "two", "one", "two", "one", "two"],
     ]
tuples = list(zip(*arrays))
print(tuples)
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'seconds'])
index

[('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'), ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')]


MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'seconds'])

In [21]:
df = pd.DataFrame(np.random.randint(10, size=(8,4)), index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3
first,seconds,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bar,one,1,8,1,7
bar,two,3,4,4,2
baz,one,5,6,8,7
baz,two,1,4,0,3
foo,one,5,4,1,2
foo,two,2,7,5,1
qux,one,8,7,9,7
qux,two,5,1,1,2


In [24]:
# every pairing of the elements in two iterables
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
index = pd.MultiIndex.from_product(iterables, names=['first', 'second'])
df = pd.DataFrame(np.random.randint(10, size=(8,4)), index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bar,one,1,8,5,1
bar,two,5,6,6,4
baz,one,9,0,0,9
baz,two,2,8,3,4
foo,one,1,1,6,6
foo,two,2,1,3,6
qux,one,8,6,8,0
qux,two,1,4,8,9


In [26]:
df.index.names

FrozenList(['first', 'second'])

In [35]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.500733,-0.032032,-1.502214,0.402773,-0.363645,0.543302,-0.713799,1.399546
B,-0.183809,0.505289,-0.833484,1.148732,-0.131585,1.031603,-1.305976,0.237982
C,-1.749014,-0.229779,-2.131623,1.084446,-0.384441,1.424387,-1.010909,1.967506


In [37]:
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [40]:
index.get_level_values('second')

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

## Basic indexing on axis with MultiIndex
---

In [43]:
print(df)
df['bar']

first        bar                 baz                 foo                 qux  \
second       one       two       one       two       one       two       one   
A      -0.500733 -0.032032 -1.502214  0.402773 -0.363645  0.543302 -0.713799   
B      -0.183809  0.505289 -0.833484  1.148732 -0.131585  1.031603 -1.305976   
C      -1.749014 -0.229779 -2.131623  1.084446 -0.384441  1.424387 -1.010909   

first             
second       two  
A       1.399546  
B       0.237982  
C       1.967506  


second,one,two
A,-0.500733,-0.032032
B,-0.183809,0.505289
C,-1.749014,-0.229779


In [45]:
df['bar']['one']

A   -0.500733
B   -0.183809
C   -1.749014
Name: one, dtype: float64

## Advanced indexing with hierarchical index
---

In [55]:
df = df.T
print(df)
df.loc[('baz', 'two')]

                     A         B         C
first second                              
bar   one    -0.500733 -0.183809 -1.749014
      two    -0.032032  0.505289 -0.229779
baz   one    -1.502214 -0.833484 -2.131623
      two     0.402773  1.148732  1.084446
foo   one    -0.363645 -0.131585 -0.384441
      two     0.543302  1.031603  1.424387
qux   one    -0.713799 -1.305976 -1.010909
      two     1.399546  0.237982  1.967506


A    0.402773
B    1.148732
C    1.084446
Name: (baz, two), dtype: float64

In [57]:
df.loc[('baz', 'two'), 'A']

0.4027725432724726

In [59]:
df.loc["bar"]

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,-0.500733,-0.183809,-1.749014
two,-0.032032,0.505289,-0.229779


## Using slicers
---

In [63]:
df.T.loc[('A', 'C'),:]

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.500733,-0.032032,-1.502214,0.402773,-0.363645,0.543302,-0.713799,1.399546
C,-1.749014,-0.229779,-2.131623,1.084446,-0.384441,1.424387,-1.010909,1.967506


In [68]:
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

miindex = pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)
micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)
dfmi = (
     pd.DataFrame(
         np.arange(len(miindex) * len(micolumns)).reshape(
                (len(miindex), len(micolumns))
            ),
        index=miindex,
        columns=micolumns,
        )
    .sort_index()
    .sort_index(axis=1)
    )
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [71]:
dfmi.loc[(slice("A1", "A3"), slice("B0"), ["C1", "C3"]), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142
A2,B0,C3,D0,153,152,155,154
A2,B0,C3,D1,157,156,159,158
A3,B0,C1,D0,201,200,203,202
A3,B0,C1,D1,205,204,207,206


## Cross-section
---

In [74]:
print(df)
df.xs('one', level='second')

                     A         B         C
first second                              
bar   one    -0.500733 -0.183809 -1.749014
      two    -0.032032  0.505289 -0.229779
baz   one    -1.502214 -0.833484 -2.131623
      two     0.402773  1.148732  1.084446
foo   one    -0.363645 -0.131585 -0.384441
      two     0.543302  1.031603  1.424387
qux   one    -0.713799 -1.305976 -1.010909
      two     1.399546  0.237982  1.967506


Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,-0.500733,-0.183809,-1.749014
baz,-1.502214,-0.833484,-2.131623
foo,-0.363645,-0.131585,-0.384441
qux,-0.713799,-1.305976,-1.010909


In [79]:
# using slicers
df.loc[slice(None), slice('one'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-0.500733,-0.183809,-1.749014
baz,one,-1.502214,-0.833484,-2.131623
foo,one,-0.363645,-0.131585,-0.384441
qux,one,-0.713799,-1.305976,-1.010909


In [83]:
print(df.T)
df.T.xs('one', level='second', axis=1 )

first        bar                 baz                 foo                 qux  \
second       one       two       one       two       one       two       one   
A      -0.500733 -0.032032 -1.502214  0.402773 -0.363645  0.543302 -0.713799   
B      -0.183809  0.505289 -0.833484  1.148732 -0.131585  1.031603 -1.305976   
C      -1.749014 -0.229779 -2.131623  1.084446 -0.384441  1.424387 -1.010909   

first             
second       two  
A       1.399546  
B       0.237982  
C       1.967506  


first,bar,baz,foo,qux
A,-0.500733,-1.502214,-0.363645,-0.713799
B,-0.183809,-0.833484,-0.131585,-1.305976
C,-1.749014,-2.131623,-0.384441,-1.010909


## Advanced reindexing and alignment
---

## Sorting a MultiIndex
---

In [85]:
df.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-0.500733,-0.183809,-1.749014
bar,two,-0.032032,0.505289,-0.229779
baz,one,-1.502214,-0.833484,-2.131623
baz,two,0.402773,1.148732,1.084446
foo,one,-0.363645,-0.131585,-0.384441
foo,two,0.543302,1.031603,1.424387
qux,one,-0.713799,-1.305976,-1.010909
qux,two,1.399546,0.237982,1.967506


In [87]:
df.sort_index(level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-0.500733,-0.183809,-1.749014
bar,two,-0.032032,0.505289,-0.229779
baz,one,-1.502214,-0.833484,-2.131623
baz,two,0.402773,1.148732,1.084446
foo,one,-0.363645,-0.131585,-0.384441
foo,two,0.543302,1.031603,1.424387
qux,one,-0.713799,-1.305976,-1.010909
qux,two,1.399546,0.237982,1.967506
