In [25]:
import numpy as np
import pandas as pd

In [6]:
ser1 = pd.Series([1,2,3,4,5,6],index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])
ser1

1  a    1
   b    2
   c    3
2  a    4
   b    5
   c    6
dtype: int64

In [7]:
#give name to the index
ser1.index.names = ['indx_0','indx_1']
ser1

indx_0  indx_1
1       a         1
        b         2
        c         3
2       a         4
        b         5
        c         6
dtype: int64

In [8]:
#access the first level index
ser1[2]

indx_1
a    4
b    5
c    6
dtype: int64

In [9]:
#access the second level index
ser1[:,'b']

indx_0
1    2
2    5
dtype: int64

In [11]:
#convert series to dataframe
df1 = ser1.unstack()
df1

indx_1,a,b,c
indx_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,2,3
2,4,5,6


In [12]:
#swap series index hierarchy level
ser2 = ser1.swaplevel()
ser2

indx_1  indx_0
a       1         1
b       1         2
c       1         3
a       2         4
b       2         5
c       2         6
dtype: int64

In [15]:
#sort by level
ser2.sort_index(level=1)

indx_1  indx_0
a       1         1
b       1         2
c       1         3
a       2         4
b       2         5
c       2         6
dtype: int64

In [16]:
# sum by index level
ser2.sum(level=0)

indx_1
a    5
b    7
c    9
dtype: int64

In [17]:
#documentation practice

In [18]:
#create a multiindex object/hierarchical index object

In [20]:
arrays = [['bar','bar','baz','baz','foo','foo','qux','qux'],['one','two','one','two','one','two','one','two']]
arrays

[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

In [22]:
tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [23]:
index = pd.MultiIndex.from_tuples(tuples,names = ['first','second'])
index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [26]:
s = pd.Series(np.random.randn(8),index=index)
s

first  second
bar    one       1.062978
       two       0.392678
baz    one       0.729432
       two      -0.653065
foo    one       0.474418
       two       0.390492
qux    one       0.980406
       two       1.367457
dtype: float64

In [27]:
# when you want every pairing of the elements in two iterables, it can be easier to use MultiIndex.from_product function

In [28]:
iterables = [['bar','baz','foo','qux'],['one','two']]
pd.MultiIndex.from_product(iterables,names = ['first','second'])

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [30]:
# as a convenience, you can pass a list of arrays directly into Series or DataFrame to construct a MultiIndex automatically

In [32]:
arrays2 = [np.array(['bar','bar','baz','baz','foo','foo','qux','qux']),np.array(['one','two','one','two','one','two','one','two'])]
arrays2

[array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
       dtype='<U3'),
 array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'],
       dtype='<U3')]

In [34]:
s2 = pd.Series(np.random.randn(8),index = arrays2)
s2

bar  one    0.882767
     two   -0.719687
baz  one   -0.326042
     two    0.245634
foo  one    2.390919
     two   -1.359706
qux  one   -0.340451
     two    0.323132
dtype: float64

In [37]:
df = pd.DataFrame(np.random.randn(8,4),index = arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,1.654252,1.561003,1.37519,-0.607563
bar,two,-0.431884,-0.04377,0.719518,1.306263
baz,one,-1.009813,1.690677,0.238754,0.719005
baz,two,-0.051324,0.085624,-1.146399,-2.375576
foo,one,0.788803,-1.581009,-0.132827,0.985994
foo,two,1.397707,0.575415,-0.396853,1.999711
qux,one,-0.449804,-1.67122,0.550144,0.376116
qux,two,0.243971,-0.497937,-0.353006,1.037626


In [40]:
#all MultiIndex constructors accept a names argument
df.index.names

FrozenList([None, None])

In [42]:
myindex = index
myindex

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
           names=['first', 'second'])

In [47]:
#the idex can back any axis of a pandas object, and the number of levels of the index is up to you
df2 = pd.DataFrame(np.random.rand(3,8),index = ['A','B','C'],columns = myindex)
df2

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.329359,0.310132,0.676117,0.642419,0.612827,0.447766,0.781977,0.0087
B,0.479223,0.969563,0.532211,0.232376,0.570149,0.132221,0.440721,0.196496
C,0.083846,0.378106,0.465778,0.199056,0.481449,0.037925,0.280699,0.385155


In [48]:
df3 = pd.DataFrame(np.random.rand(6,6),index = myindex[:6],columns = myindex[:6])
df3

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,0.013064,0.670857,0.655333,0.580942,0.140543,0.541575
bar,two,0.549939,0.782956,0.558696,0.458235,0.525633,0.601901
baz,one,0.141638,0.492958,0.248358,0.436475,0.563497,0.016653
baz,two,0.886926,0.540217,0.478328,0.377767,0.102218,0.124334
foo,one,0.049952,0.296752,0.727032,0.535139,0.222899,0.791794
foo,two,0.2967,0.293899,0.38572,0.943861,0.263966,0.005518


In [49]:
#index display can be controlled using the  multi_sparse option
with pd.option_context('display.multi_sparse',False):
    df

In [50]:
pd.Series(np.random.randn(8),index = tuples)

(bar, one)    0.741868
(bar, two)    0.208719
(baz, one)   -0.667570
(baz, two)   -0.382724
(foo, one)   -1.202890
(foo, two)   -0.434948
(qux, one)    0.315438
(qux, two)   -0.112831
dtype: float64

In [53]:
#Reconstructing the level labels
myindex.get_level_values(0) #by index level

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [54]:
myindex.get_level_values('second') #by index name

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')