In [1]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

from numpy.random import randn

In [2]:
#Now we'll learn about Index Hierarchy

#pandas allows you to have multiple index levels, which is very clear with this example:

ser = Series(np.random.randn(6),index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])

In [6]:
#Show Series with multiple index levels
ser

1  a    0.907737
   b   -0.098681
   c   -0.319739
2  a    0.750118
   b   -0.153430
   c    0.200578
dtype: float64

In [7]:
# We can check the multiple levels
ser.index

MultiIndex([(1, 'a'),
            (1, 'b'),
            (1, 'c'),
            (2, 'a'),
            (2, 'b'),
            (2, 'c')],
           )

In [10]:
#Now we can sleect specific subsets
ser[2]

a    0.750118
b   -0.153430
c    0.200578
dtype: float64

In [13]:
# We can also select from an internal index level
ser[:,"a"]

1    0.907737
2    0.750118
dtype: float64

In [14]:
# We can also create Data Frames from Series with multiple levels
dframe=ser.unstack()

#Show
dframe

Unnamed: 0,a,b,c
1,0.907737,-0.098681,-0.319739
2,0.750118,-0.15343,0.200578


In [17]:
#Can also reverse
dframe.unstack()

a  1    0.907737
   2    0.750118
b  1   -0.098681
   2   -0.153430
c  1   -0.319739
   2    0.200578
dtype: float64

In [18]:
# We can also apply multiple level indexing to DataFrames
dframe2 = DataFrame(np.arange(16).reshape(4,4),
                    index=[['a','a','b','b'],[1,2,1,2]],
                    columns=[['NY','NY','LA','SF'],['cold','hot','hot','cold']])
                                                   
dframe2                                                

Unnamed: 0_level_0,Unnamed: 1_level_0,NY,NY,LA,SF
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,hot,cold
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [19]:
# We can also give these index levels names

#Name the index levels
dframe2.index.names = ['INDEX_1','INDEX_2']

#Name the column levels
dframe2.columns.names = ['Cities','Temp']

dframe2

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [20]:
# We can also interchange level orders (note the axis=1 for columns)
dframe2.swaplevel('Cities','Temp',axis=1)

Unnamed: 0_level_0,Temp,cold,hot,hot,cold
Unnamed: 0_level_1,Cities,NY,NY,LA,SF
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11
b,2,12,13,14,15


In [25]:
#We can also sort levels
dframe2.sort_values(by=['INDEX_2'])

Unnamed: 0_level_0,Cities,NY,NY,LA,SF
Unnamed: 0_level_1,Temp,cold,hot,hot,cold
INDEX_1,INDEX_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
b,1,8,9,10,11
a,2,4,5,6,7
b,2,12,13,14,15


In [26]:
#Note the change in sorting, now the Dframe index is sorted by the INDEX_2

In [28]:
#We can also perform operations on particular levels
dframe2.groupby(level=1).sum()

Cities,NY,NY,LA,SF
Temp,cold,hot,hot,cold
INDEX_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,8,10,12,14
2,16,18,20,22


In [29]:
#Thats the end of this section! Next up, Section 5: Working with Data Part 1 !!!