# Data Frames: Hierarchy & Multi level Index

In [13]:
import numpy as np
import pandas as pd

In [14]:
from numpy.random import randn

In [15]:
np.random.seed(101)

In [16]:
#Index level
outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [17]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [18]:
inside

[1, 2, 3, 1, 2, 3]

In [19]:
list(zip(outside, inside))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [20]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [24]:
df = pd.DataFrame(randn(6,2), hier_index, ['A', 'B'])

In [25]:
df


Unnamed: 0,Unnamed: 1,A,B
G1,1,0.188695,-0.758872
G1,2,-0.933237,0.955057
G1,3,0.190794,1.978757
G2,1,2.605967,0.683509
G2,2,0.302665,1.693723
G2,3,-1.706086,-1.159119


In [26]:
#Select 'G1'
df.loc['G1']

Unnamed: 0,A,B
1,0.188695,-0.758872
2,-0.933237,0.955057
3,0.190794,1.978757


In [27]:
#Select 'G2'
df.loc['G2']

Unnamed: 0,A,B
1,2.605967,0.683509
2,0.302665,1.693723
3,-1.706086,-1.159119


In [30]:
#Return series
df.loc['G1'].loc[2]

A   -0.933237
B    0.955057
Name: 2, dtype: float64

In [32]:
#Return series
df.loc['G2'].loc[3]

A   -1.706086
B   -1.159119
Name: 3, dtype: float64

In [34]:
#Naming indices [outer index, inner index]
df.index.names = ['Groups', 'Num']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.188695,-0.758872
G1,2,-0.933237,0.955057
G1,3,0.190794,1.978757
G2,1,2.605967,0.683509
G2,2,0.302665,1.693723
G2,3,-1.706086,-1.159119


In [35]:
#Selecting specific value. Ex. grab 0.302665 in G2
df.loc['G2'].loc[2]['A']

0.3026654485851825

In [38]:
# Cross section. Grab G2
df.xs('G2')


Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.605967,0.683509
2,0.302665,1.693723
3,-1.706086,-1.159119


In [41]:
df.xs(3, level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.190794,1.978757
G2,-1.706086,-1.159119


In [43]:

df.xs(1, level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.188695,-0.758872
G2,2.605967,0.683509
