In [None]:
print('Hello, Advanced Pandas: Hierarchical Index & Cross-section!')
import numpy as np
import pandas as pd 
from numpy.random import randn
np.random.seed(101)

Hello, Advanced Pandas: Hierarchical Index & Cross-section!


## Pandas - MultiIndex & Advanced Index

As a convenience, we can pass a list of arrays directly into a special method below to construct a MultiIndex automatically:

In [None]:
outside=['G1','G1','G1','G2','G2','G2']
inside=[1,2,3,1,2,3]
hier_index=list(zip(outside, inside))
hier_index=pd.MultiIndex.from_tuples(hier_index)

In [None]:
df=pd.DataFrame(randn(6,2), hier_index, ['A', 'B'])

In [None]:
df


Unnamed: 0,Unnamed: 1,A,B
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


The reason that the MultiIndex matters is that it can allow you to do grouping, selection, and reshaping operations such as:
##Calling Data

In [None]:
# If we want everything that is under 'G1', type df.loc():
# We will get a sub-set 'G1' of the DataFrame; check it out:
df.loc['G1']

Unnamed: 0,A,B
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [None]:
# We can continue to indexing off this, going deeper...
df.loc['G1'].loc[1]

A    2.706850
B    0.628133
Name: 1, dtype: float64

In [None]:
# ...and deeper, and deeper...
# The basic idea we can from the outside index continue calling inside deeper!
df.loc['G1'].loc[1][0]

2.706849839399938

## Naming Columns

In [None]:
# Indexes do not have any names
df.index.names

FrozenList([None, None])

In [None]:
# But you can pass a list of names, for instance:
df.index.names = ['Groups', 'Nums']

In [None]:
# Now when we call it we have the outside label as 'Groups' and inside 'Nums'
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [None]:
  # Location and  grab the group 'G2', nuns 2, 'B' column like this:
  df.loc['G2'].loc[2]['B']

0.7401220570561068

##Cross Section - Multi Level Index

In [None]:
# Let's say we want to grab everything under 'Nums' = 1 with both groups;
# What differentiates it from the loc method is that we can skip or go inside the multi-level index
# This is to say: grab a cross-section where the level is equal 1 and level is equal 'Nums'
df.xs(1, level='Nums')



Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,2.70685,0.628133
G2,-0.848077,0.605965


In [None]:
# TODO[THE LINK TO THE WEB PAGE GOES HERE!]
print("Thank you for Reading this Post! See you in the next PySeries Episode o/")

Thank you for Reading this Post! See you in the next PySeries Episode o/
