# MULTIINDEXING

In [1]:
import pandas as pd
df = pd.read_csv('../data/medals.csv')
df

Unnamed: 0,Year,Medal Type,US,Canada,England,Australia
0,2001,Gold,278,188,39,44
1,2001,Silver,324,235,82,66
2,2001,Bronze,446,399,100,15
3,2002,Gold,301,298,42,66
4,2002,Silver,378,222,228,88
5,2002,Bronze,502,245,165,173
6,2003,Gold,321,276,86,163
7,2003,Silver,322,263,76,184
8,2003,Bronze,423,165,97,136
9,2004,Gold,298,146,43,152


In [3]:
dfIndexed = df.set_index(['Year', 'Medal Type'])
index = dfIndexed.index
index

MultiIndex(levels=[[2001, 2002, 2003, 2004, 2005, 2006], ['Bronze', 'Gold', 'Silver']],
           labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5], [1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0]],
           names=['Year', 'Medal Type'])

In [4]:
dfIndexed

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Year,Medal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,Gold,278,188,39,44
2001,Silver,324,235,82,66
2001,Bronze,446,399,100,15
2002,Gold,301,298,42,66
2002,Silver,378,222,228,88
2002,Bronze,502,245,165,173
2003,Gold,321,276,86,163
2003,Silver,322,263,76,184
2003,Bronze,423,165,97,136
2004,Gold,298,146,43,152


Applying the get_level_values function with the appropriate argument produces a list of the labels for each level of the index:

In [5]:
index.get_level_values(0)

Int64Index([2001, 2001, 2001, 2002, 2002, 2002, 2003, 2003, 2003, 2004, 2004,
            2004, 2005, 2005, 2005, 2006, 2006, 2006],
           dtype='int64', name='Year')

IndexError will be thrown if the value passed to get_level_values() is
invalid or out of range:

In [16]:
index.get_level_values(2)

IndexError: Too many levels: Index has only 2 levels, not 3

In [7]:
dfIndexed

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Year,Medal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001,Gold,278,188,39,44
2001,Silver,324,235,82,66
2001,Bronze,446,399,100,15
2002,Gold,301,298,42,66
2002,Silver,378,222,228,88
2002,Bronze,502,245,165,173
2003,Gold,321,276,86,163
2003,Silver,322,263,76,184
2003,Bronze,423,165,97,136
2004,Gold,298,146,43,152


You can achieve hierarchical indexing with a MultiIndexed DataFrame:

In [8]:
dfIndexed.ix[2004]

Unnamed: 0_level_0,US,Canada,England,Australia
Medal Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Gold,298,146,43,152
Silver,256,184,173,148
Bronze,422,226,75,152


In [9]:
dfIndexed.ix[2005, 'Silver']

US           257
Canada       251
England       61
Australia     83
Name: (2005, Silver), dtype: int64

We can slice using a MultiIndex:

In [17]:
dfIndexed.ix[2003:2004]

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Year,Medal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003,Gold,321,276,86,163
2003,Silver,322,263,76,184
2003,Bronze,423,165,97,136
2004,Gold,298,146,43,152
2004,Silver,256,184,173,148
2004,Bronze,422,226,75,152


We can try slicing at a lower level:

To be on the safe side, sort first before slicing with a
MultiIndex

In [10]:
dfIndexed.sortlevel(0).ix[(2002, 'Silver'):(2006, 'Silver')]

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Year,Medal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002,Silver,378,222,228,88
2003,Bronze,423,165,97,136
2003,Gold,321,276,86,163
2003,Silver,322,263,76,184
2004,Bronze,422,226,75,152
2004,Gold,298,146,43,152
2004,Silver,256,184,173,148
2005,Bronze,419,295,51,57
2005,Gold,311,248,83,73
2005,Silver,257,251,61,83


We can also pass a list of tuples:

In [11]:
dfIndexed.ix[[(2003, 'Gold'), (2005, 'Silver'), (2006, 'Bronze')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Year,Medal Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003,Gold,321,276,86,163
2005,Silver,257,251,61,83
2006,Bronze,345,265,93,72


## Swapping and reordering levels
The swaplevel function enables levels within the MultiIndex to be swapped:

In [12]:
dfIndexed.swaplevel(0, 1, axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Medal Type,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gold,2001,278,188,39,44
Silver,2001,324,235,82,66
Bronze,2001,446,399,100,15
Gold,2002,301,298,42,66
Silver,2002,378,222,228,88
Bronze,2002,502,245,165,173
Gold,2003,321,276,86,163
Silver,2003,322,263,76,184
Bronze,2003,423,165,97,136
Gold,2004,298,146,43,152


The reorder_levels function is more general, allowing you to specify the order of
the levels:

In [13]:
dfIndexed.reorder_levels(['Medal Type', 'Year'], axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,US,Canada,England,Australia
Medal Type,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Gold,2001,278,188,39,44
Silver,2001,324,235,82,66
Bronze,2001,446,399,100,15
Gold,2002,301,298,42,66
Silver,2002,378,222,228,88
Bronze,2002,502,245,165,173
Gold,2003,321,276,86,163
Silver,2003,322,263,76,184
Bronze,2003,423,165,97,136
Gold,2004,298,146,43,152


## Cross section
The xs method provides a shortcut means of selecting data based on a particular
index level value:

In [14]:
dfIndexed.xs('Gold', level='Medal Type')

Unnamed: 0_level_0,US,Canada,England,Australia
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001,278,188,39,44
2002,301,298,42,66
2003,321,276,86,163
2004,298,146,43,152
2005,311,248,83,73
2006,378,176,83,47


The more long-winded alternative to the preceding command would be to use
swaplevel to switch between the TradingDate and PriceType levels and then,
perform the selection as follows:

In [18]:
dfIndexed.swaplevel(0, 1, axis=0).ix['Gold']

Unnamed: 0_level_0,US,Canada,England,Australia
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001,278,188,39,44
2002,301,298,42,66
2003,321,276,86,163
2004,298,146,43,152
2005,311,248,83,73
2006,378,176,83,47
