In [None]:
# Name: Chaithra Kopparam Cheluvaiah
# Nov 27,2021

# PANDAS DATAFRAME - PART 3

Multi Index and Index Hierarchy

In [None]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [None]:
# index labels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
paired_indexes = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(paired_indexes)
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [None]:
# creating sample dataframe
df = pd.DataFrame(randn(6,2), hier_index, ['A','B']) # notice the number of rows is 6(inside index size; not the ourside index)
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.360963,-0.656333
G1,2,1.339084,-0.876578
G1,3,-0.710697,1.54853
G2,1,0.490378,-0.72826
G2,2,-0.567499,0.03615
G2,3,0.353656,0.454126


### SELECTING ROWS IN MULTILEVEL INDEX

In [None]:
df.loc['G1'] # it returns dataframe

Unnamed: 0,A,B
1,0.360963,-0.656333
2,1.339084,-0.876578
3,-0.710697,1.54853


In [None]:
df.loc['G1'].loc[1] # it returns a series

A    0.360963
B   -0.656333
Name: 1, dtype: float64

**Note:Call from the outside index and continue calling inside deeper**

In [None]:
df # notice that indexes do not have names of its columns

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.360963,-0.656333
G1,2,1.339084,-0.876578
G1,3,-0.710697,1.54853
G2,1,0.490378,-0.72826
G2,2,-0.567499,0.03615
G2,3,0.353656,0.454126


In [None]:
df.index.names

# it returs frozed list
# The frozenset() is an inbuilt function in Python which takes an iterable object as input and makes them immutable. 
# Simply it freezes the iterable objects and makes them unchangeable

FrozenList([None, None])

In [None]:
df.index.names = ['Groups','Num'] #it is inplace as it does not lead to loosing any data accidentally
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.360963,-0.656333
G1,2,1.339084,-0.876578
G1,3,-0.710697,1.54853
G2,1,0.490378,-0.72826
G2,2,-0.567499,0.03615
G2,3,0.353656,0.454126


In [None]:
# lets grab -0.207147
df.loc['G2'].loc[2]['B']

0.03614995469182296

### CROSS SECTION

In [None]:
# useful when we have multi level index
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.360963,-0.656333
2,1.339084,-0.876578
3,-0.710697,1.54853


In [None]:
# it can skip outer level and go inside

# lets get row 1 from both the groups G1 and G2
df.xs(1, level='Num')

# this is main usage of xs()

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.360963,-0.656333
G2,0.490378,-0.72826


### QUICK REVIEW:
1. creating hierarchial indicies - `pd.MultiIndex.from_tuples(list of tuple)`
2. traversing multi index using - `df.loc['row_name'].loc['row_name']`
3. updating names to index columns - `df.index.names = list`
4. cross section - `df.xs('row_name', level=index_name)`