In [1]:
import pandas as pd
import numpy as np

# MultiIndex

In [2]:
 arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
    
tuples=list(zip(*arrays))

In [3]:
index=pd.MultiIndex.from_tuples(tuples,names=['first','second'])

In [4]:
s=pd.Series(np.random.randn(8),index=index)
s

first  second
bar    one       1.107267
       two      -0.337880
baz    one       0.797592
       two       0.058503
foo    one       2.037453
       two       0.843734
qux    one       0.387345
       two       0.545007
dtype: float64

In [5]:
iterables=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
pd.MultiIndex.from_product(iterables=iterables,names=['first','second'])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

###### Exploration

In [6]:
df=pd.DataFrame(np.random.randint(3,100,(3,8)),index=list('ABC'),columns=index)
df.rename_axis('letters',axis=0)

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
letters,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
A,17,53,46,59,63,35,73,55
B,45,68,85,93,24,16,42,85
C,48,41,53,50,82,96,56,59


In [7]:
df.index.get_level_values(0)
df.columns.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

# Selection and Slicing

In [8]:
df=df.T
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,17,45,48
bar,two,53,68,41
baz,one,46,85,53
baz,two,59,93,50
foo,one,63,24,82


###### df.loc[():()]

In [9]:
df.loc[('bar',):('foo','two'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,17,45,48
bar,two,53,68,41
baz,one,46,85,53
baz,two,59,93,50
foo,one,63,24,82
foo,two,35,16,96


In [10]:
df.loc[('bar',)]

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,17,45,48
two,53,68,41


In [11]:
df.loc[('bar','two'),'A']

53

In [12]:
df.loc['bar','two'] #not suggested

A    53
B    68
C    41
Name: (bar, two), dtype: int32

In [13]:
df.loc[('bar',):('baz','two'),'A']

first  second
bar    one       17
       two       53
baz    one       46
       two       59
Name: A, dtype: int32

In [14]:
df.loc['bar':'baz']

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,17,45,48
bar,two,53,68,41
baz,one,46,85,53
baz,two,59,93,50


In [15]:
df.loc[[('bar','one'),('baz','two')]]
df.loc[[('bar','one'),('baz','two')],['A','B']]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,17,45
baz,two,59,93


In [16]:
s=pd.Series(np.arange(1,7),index=pd.MultiIndex.from_product(
[list('AB'),list('cde')]))
s

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int32

In [17]:
s.loc[[('A','c'),('B','e')]] #list of tuples grabs those values

A  c    1
B  e    6
dtype: int32

In [18]:
s.loc[(['A','B'],['c','d'])] #tuples of lists

A  c    1
   d    2
B  c    4
   d    5
dtype: int32

In [19]:
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

miindex = pd.MultiIndex.from_product([mklbl('A', 4),
mklbl('B', 2),
mklbl('C', 4),
mklbl('D', 2)])

micolumns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'),
('b', 'foo'), ('b', 'bah')],
names=['lvl0', 'lvl1'])

dfmi = pd.DataFrame(np.arange(len(miindex) * len(micolumns))
.reshape((len(miindex), len(micolumns))),
index=miindex,
columns=micolumns).sort_index().sort_index(axis=1)

dfmi.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [20]:
dfmi.columns.levels
dfmi.index.levels

FrozenList([['A0', 'A1', 'A2', 'A3'], ['B0', 'B1'], ['C0', 'C1', 'C2', 'C3'], ['D0', 'D1']])

In [21]:
dfmi.loc[('A0','B1','C2'):('A3',),('a')].head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,bar,foo
A0,B1,C2,D0,49,48
A0,B1,C2,D1,53,52
A0,B1,C3,D0,57,56
A0,B1,C3,D1,61,60
A1,B0,C0,D0,65,64


###### df.loc(axis=)[]

In [22]:
dfmi.loc(axis=0)[:,:,['C1','C3']].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42


In [23]:
print(dfmi.columns.levels)
print(dfmi.index.levels[0])

[['a', 'b'], ['bah', 'bar', 'foo']]
Index(['A0', 'A1', 'A2', 'A3'], dtype='object')


In [24]:
dfmi.loc(axis=1)['a',:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo
A0,B0,C0,D0,1,0
A0,B0,C0,D1,5,4
A0,B0,C1,D0,9,8
A0,B0,C1,D1,13,12
A0,B0,C2,D0,17,16


###### df.xs

In [25]:
dfmi.xs('B0',level=1).head()
dfmi.loc(axis=0)[:,'B0',:,:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


###### df.loc[slice()]

In [26]:
dfmi.loc[(slice(None),'B0',slice('C1','C3')),'b'].head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,bah,foo
A0,B0,C1,D0,11,10
A0,B0,C1,D1,15,14
A0,B0,C2,D0,19,18
A0,B0,C2,D1,23,22
A0,B0,C3,D0,27,26


In [27]:
dfmi.loc[(slice(None),'B0',['C1','C3']),'b'].head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,lvl1,bah,foo
A0,B0,C1,D0,11,10
A0,B0,C1,D1,15,14
A0,B0,C3,D0,27,26
A0,B0,C3,D1,31,30
A1,B0,C1,D0,75,74


In [28]:
dfmi.xs('a',axis=1,level=0)
dfmi.loc(axis=0)['A0','B1',:,::2]
dfmi.loc[(slice('A0','A3'),slice(None),),(slice(None),'bar')].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar
A0,B0,C0,D0,1
A0,B0,C0,D1,5
A0,B0,C1,D0,9
A0,B0,C1,D1,13
A0,B0,C2,D0,17


In [29]:
dfmi.index.levels[1][0]
dfmi.index.get_level_values(1)[0]

'B0'

In [30]:
dfmi.loc[:,(['a','b'],['bar','foo'])].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,foo
A0,B0,C0,D0,1,0,2
A0,B0,C0,D1,5,4,6
A0,B0,C1,D0,9,8,10
A0,B0,C1,D1,13,12,14
A0,B0,C2,D0,17,16,18


In [31]:
dfmi.loc[:,[('a','bar'),('b','foo')]].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo
A0,B0,C0,D0,1,2
A0,B0,C0,D1,5,6
A0,B0,C1,D0,9,10
A0,B0,C1,D1,13,14
A0,B0,C2,D0,17,18


### Recap

In [32]:
print(dfmi.index.levels)
print()
print(dfmi.columns.levels)

[['A0', 'A1', 'A2', 'A3'], ['B0', 'B1'], ['C0', 'C1', 'C2', 'C3'], ['D0', 'D1']]

[['a', 'b'], ['bah', 'bar', 'foo']]


###### Select from A0 to A2

In [33]:
dfmi.loc['A0':'A2'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [34]:
dfmi.loc[(slice('A0','A2')),:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [35]:
dfmi.loc(axis=0)['A0':'A2'].head()
#or dfmi.loc(axis=0)['A0':'A2',:,:,:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


###### Select B0

In [36]:
dfmi.loc[(dfmi.index.get_level_values(0).unique(),['B0']),:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [37]:
dfmi.loc(axis=0)[:,'B0'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [38]:
dfmi.loc[(slice(None),'B0'),:].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [39]:
dfmi.xs('B0',axis=0,level=1).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,bar,foo,bah,foo
A0,C0,D0,1,0,3,2
A0,C0,D1,5,4,7,6
A0,C1,D0,9,8,11,10
A0,C1,D1,13,12,15,14
A0,C2,D0,17,16,19,18


###### From dfmi select A1:A3, B0, C1&C3 col: a,foo

In [40]:
dfmi.loc[(slice('A1','A3'),'B0',['C1','C3']),('a','foo')]

A1  B0  C1  D0     72
            D1     76
        C3  D0     88
            D1     92
A2  B0  C1  D0    136
            D1    140
        C3  D0    152
            D1    156
A3  B0  C1  D0    200
            D1    204
        C3  D0    216
            D1    220
Name: (a, foo), dtype: int32

###### from dmfi select the following:
index: every other A, B0, C0-C2
col: every other columns

In [41]:
dfmi.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [42]:
dfmi.rename(columns={'bah':'bar'},level=1,inplace=True)

In [43]:
dfmi.columns.levels

FrozenList([['a', 'b'], ['bar', 'foo']])

In [44]:
dfmi.loc[(slice('A0',None,2),'B0',slice('C0','C2')),
         (slice(None),slice(None,None,2))].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,bar
A0,B0,C0,D0,1,3
A0,B0,C0,D1,5,7
A0,B0,C1,D0,9,11
A0,B0,C1,D1,13,15
A0,B0,C2,D0,17,19


In [45]:
idx=pd.IndexSlice
dfmi.loc[idx[::2,'B0','C0':'C2'],idx[::2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,bar
A0,B0,C0,D0,1,3
A0,B0,C0,D1,5,7
A0,B0,C1,D0,9,11
A0,B0,C1,D1,13,15
A0,B0,C2,D0,17,19
A0,B0,C2,D1,21,23
A2,B0,C0,D0,129,131
A2,B0,C0,D1,133,135
A2,B0,C1,D0,137,139
A2,B0,C1,D1,141,143


# Indexing

In [46]:
df=df.T
df['bar']

second,one,two
A,17,53
B,45,68
C,48,41


In [47]:
df['bar','one']
df['bar']['one']

A    17
B    45
C    48
Name: one, dtype: int32

In [48]:
df.columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [49]:
df[['bar','foo']].columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [50]:
df[['bar','foo']].columns.get_level_values(1)

Index(['one', 'two', 'one', 'two'], dtype='object', name='second')

In [51]:
df[['bar','foo']].columns.remove_unused_levels()

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [52]:
s+s[:-2]
s+s[::2]

A  c     2.0
   d     NaN
   e     6.0
B  c     NaN
   d    10.0
   e     NaN
dtype: float64

In [53]:
s

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int32

In [54]:
s.reindex([('foo','two'),('bar','one')])

foo  two   NaN
bar  one   NaN
dtype: float64

In [55]:
s.reindex([('qux','one')])

qux  one   NaN
dtype: float64

###### reindexing and alignment

In [56]:
midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']],
codes=[[1, 1, 0, 0], [1, 0, 1, 0]])
df=pd.DataFrame(np.random.randn(4,2),index=midx)
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.826139,0.348684
one,x,-1.004535,-1.350939
zero,y,-0.697014,-0.393423
zero,x,-0.704517,-0.614802


In [57]:
df2=df.mean(level=0)
df2

Unnamed: 0,0,1
one,-0.915337,-0.501127
zero,-0.700766,-0.504113


In [58]:
df2.reindex(df.index,level=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.915337,-0.501127
one,x,-0.915337,-0.501127
zero,y,-0.700766,-0.504113
zero,x,-0.700766,-0.504113


In [59]:
df_align, df2_align = df.align(df2, level=0)

In [60]:
df2_align

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.915337,-0.501127
one,x,-0.915337,-0.501127
zero,y,-0.700766,-0.504113
zero,x,-0.700766,-0.504113


In [61]:
df_align

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.826139,0.348684
one,x,-1.004535,-1.350939
zero,y,-0.697014,-0.393423
zero,x,-0.704517,-0.614802


In [62]:
dfmi.index.levels
dfmi.swaplevel(0,1,axis=0).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bar,foo
B0,A0,C0,D0,1,0,3,2
B0,A0,C0,D1,5,4,7,6
B0,A0,C1,D0,9,8,11,10
B0,A0,C1,D1,13,12,15,14
B0,A0,C2,D0,17,16,19,18


In [63]:
dfmi.index.levels
dfmi.rename({'a':'c','b':'d'},axis=1,level=0).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,c,c,d,d
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bar,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18


In [64]:
dfmi.index.levels
dfmi.rename({'D0':'D15'},axis=0,level=3).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bar,foo
A0,B0,C0,D15,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D15,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D15,17,16,19,18


In [65]:
dfmi.rename_axis(index=['yep','nope','maybe','certainly']).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bar,foo
yep,nope,maybe,certainly,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
