In [2]:
#***4.1 Hierarchical indexing ***
#***4.1.1 Creating multiple index ***

import pandas as pd
data = pd.Series([10, 20, 30, 40, 15, 25, 35, 25], 
                 index=[['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'],
                       ['obj1', 'obj2', 'obj3', 'obj4', 'obj1', 'obj2', 'obj3', 'obj4']])
data

a  obj1    10
   obj2    20
   obj3    30
   obj4    40
b  obj1    15
   obj2    25
   obj3    35
   obj4    25
dtype: int64

In [3]:
data.index

MultiIndex(levels=[['a', 'b'], ['obj1', 'obj2', 'obj3', 'obj4']],
           codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]])

In [4]:
#***4.1.2 Partial indexing***
data['b']

obj1    15
obj2    25
obj3    35
obj4    25
dtype: int64

In [5]:
data[:, 'obj2']

a    20
b    25
dtype: int64

In [6]:
data['b','obj2']

25

In [7]:
#***4.1.3 Unstack the data***
#unstack based on first level i.e. a, b
#note that data row-labels are a and b
data.unstack(0)

Unnamed: 0,a,b
obj1,10,15
obj2,20,25
obj3,30,35
obj4,40,25


In [8]:
#unstack based on second level i.e. 'obj'
data.unstack(1)

Unnamed: 0,obj1,obj2,obj3,obj4
a,10,20,30,40
b,15,25,35,25


In [9]:
#by default innermost level is used for unstacking
d = data.unstack()
d

Unnamed: 0,obj1,obj2,obj3,obj4
a,10,20,30,40
b,15,25,35,25


In [10]:
#***4.1.4 Column indexing***
import numpy as np
df = pd.DataFrame (np.arange(12).reshape(4, 3),
                  index = [['a','a','b','b'],['one','two','three','four']],
                  columns = [['num1','num2','num3'],['red','green','red']])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,num1,num2,num3
Unnamed: 0_level_1,Unnamed: 1_level_1,red,green,red
a,one,0,1,2
a,two,3,4,5
b,three,6,7,8
b,four,9,10,11


In [11]:
#display row index
df.index

MultiIndex(levels=[['a', 'b'], ['four', 'one', 'three', 'two']],
           codes=[[0, 0, 1, 1], [1, 3, 2, 0]])

In [12]:
#display column index
df.columns

MultiIndex(levels=[['num1', 'num2', 'num3'], ['green', 'red']],
           codes=[[0, 1, 2], [1, 0, 1]])

In [13]:
df.index.names=['key1', 'key2']
df.columns.names=['n','color']
df

Unnamed: 0_level_0,n,num1,num2,num3
Unnamed: 0_level_1,color,red,green,red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,one,0,1,2
a,two,3,4,5
b,three,6,7,8
b,four,9,10,11


In [14]:
#accessing the column for num1
df['num1'] #df.ix[:,'num1']

Unnamed: 0_level_0,color,red
key1,key2,Unnamed: 2_level_1
a,one,0
a,two,3
b,three,6
b,four,9


In [16]:
#accessing the column for a
df.loc['a']

n,num1,num2,num3
color,red,green,red
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
one,0,1,2
two,3,4,5


In [17]:
#accessing row 0 only
df.iloc[0]

n     color
num1  red      0
num2  green    1
num3  red      2
Name: (a, one), dtype: int32

In [18]:
#***4.1.5 Swap and sort level***
df.swaplevel('key1','key2')

Unnamed: 0_level_0,n,num1,num2,num3
Unnamed: 0_level_1,color,red,green,red
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,a,0,1,2
two,a,3,4,5
three,b,6,7,8
four,b,9,10,11


In [19]:
df.sort_index(level='key2')

Unnamed: 0_level_0,n,num1,num2,num3
Unnamed: 0_level_1,color,red,green,red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
b,four,9,10,11
a,one,0,1,2
b,three,6,7,8
a,two,3,4,5


In [20]:
#***4.1.6 Summary statistic by level***
#add all rows with similar key1 name
df.sum(level = 'key1')

n,num1,num2,num3
color,red,green,red
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [21]:
#add all the columns based on similar color
df.sum(level='color', axis=1)

Unnamed: 0_level_0,color,red,green
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,2,1
a,two,8,4
b,three,14,7
b,four,20,10
