In [None]:
import numpy as np
import pandas as pd

The Pandas MultiIndex

In [None]:
index = [('California', 2010), ('California', 2020),
         ('New York', 2010), ('New York', 2020),
         ('Texas', 2010), ('Texas', 2020)]
index = pd.MultiIndex.from_tuples(index)

In [None]:
populations = [37253956, 39538223,
               19378102, 20201249,
               25145561, 29145505]

In [None]:
pop = pd.Series(populations, index=index)
pop

In [None]:
pop[:, 2020]

The Pandas MultiIndex as Extra Dimension

In [None]:
pop_df = pop.unstack()
pop_df

In [None]:
pop_df.stack()

In [None]:
pop_df = pd.DataFrame({'total': pop, 'under18': [9284094, 8898092,
                                                 4318033, 4181528,
                                                 6879014, 7432474]})
pop_df

In [None]:
f_u18 = pop_df['under18'] / pop_df['total']
f_u18

In [None]:
f_u18.unstack()

Methods of MultiIndex creation

In [None]:
df = pd.DataFrame(np.random.rand(4, 2), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns=['data1', 'data2'])
df

In [None]:
data = {('California', 2010): 37253956,
        ('California', 2020): 39538223,
        ('New York', 2010): 19378102,
        ('New York', 2020): 20201249,
        ('Texas', 2010): 25145561,
        ('Texas', 2020): 29145505}
pd.Series(data)

MultiIndex Constructors

In [None]:
# from array
pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])

In [None]:
# from tuple
pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)])

In [None]:
# from Cartesian product
pd.MultiIndex.from_product([['a', 'b'], [1, 2]])

In [None]:
# by passing levels
pd.MultiIndex(levels=[['a', 'b'], [1, 2]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

MultiIndex Level Names

In [None]:
pop.index.names = ['states', 'year']
pop

MultiIndex for Columns

In [None]:
# hierarchical indices and columns
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]], names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']], names=['subject', 'type'])

#mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

#create the DataFrame
health_data = pd.DataFrame(data, index= index, columns=columns)
health_data

In [None]:
# get Guido's health records
health_data['Guido']

Indexing and Slicing a MultiIndex

Multiply Indexed Series

In [None]:
pop

In [None]:
# access single elements
pop['California', 2010]

In [None]:
# partial indexing or indexing just one of the levels, lower level indexes are maintained
pop['Texas']

In [None]:
# partial slicing is available when indexes are sorted
pop.loc['California': 'New York']

In [None]:
# with sorted indexes, partial slicing can be done on lower index levels by passing an empty slice for the higher indexes
pop[:, 2010]

In [None]:
# selection based on Boolean masks
pop[pop > 22000000]

In [None]:
# selection based on fancy indexing
pop[['California', 'Texas']]

Multiply Indexed DataFrames

In [None]:
health_data

In [None]:
# get Guido's HR data
health_data['Guido', 'HR']

In [None]:
health_data.iloc[:2, :2]

In [None]:
health_data.loc[:, ('Bob', 'HR')]

In [None]:
idx = pd.IndexSlice
health_data.loc[idx[:, 1], idx[:, 'HR']]

Re-arranging MultiIndexes

In [None]:
index = pd.MultiIndex.from_product([['a', 'c', 'b'], [1, 2]])
data = pd.Series(np.random.rand(6), index= index)
data.index.names = ['char', 'int']
data

In [None]:
# uncommenting the following line will produce an exception since the indices are not sorted
# data['a': 'b']

In [None]:
data = data.sort_index()
data

In [None]:
data['a': 'b']

Stacking and Unstacking Indices

In [None]:
pop

In [None]:
pop.unstack()

In [None]:
pop.unstack(level=0)

In [None]:
pop.unstack(level=1)

Index Setting and Resetting

In [None]:
pop_flat = pop.reset_index(name='population')
pop_flat

In [None]:
pop_flat.set_index(['states', 'year'])