In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Changing index of a DataFrame

In [8]:
df = pd.read_csv('data/sales.csv', index_col='month')
df.head()

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52


In [9]:
new_idx = [index.upper() for index in df.index]
new_idx

['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN']

In [10]:
df.index = new_idx
df.head()

Unnamed: 0,eggs,salt,spam
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52


# Changing index name labels

In [11]:
df.index.name = 'MONTHS'
df.columns.name = 'PRODUCT'

df.head()

PRODUCT,eggs,salt,spam
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52


# Building an index, then a DataFrame

In [12]:
df = pd.DataFrame(range(6))
df.index = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

df.head()

Unnamed: 0,0
Jan,0
Feb,1
Mar,2
Apr,3
May,4


# Extracting data with a MultiIndex

In [58]:
df = pd.DataFrame(range(6), columns=['value'])
df['month'] = ['Jan', 'Jan', 'Mar', 'Mar', 'Jun', 'Jun']
df['state'] = ['a', 'b', 'c', 'd', 'e', 'f']
df

Unnamed: 0,value,month,state
0,0,Jan,a
1,1,Jan,b
2,2,Mar,c
3,3,Mar,d
4,4,Jun,e
5,5,Jun,f


In [59]:
df = df.set_index(['month', 'state'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
month,state,Unnamed: 2_level_1
Jan,a,0
Jan,b,1
Mar,c,2
Mar,d,3
Jun,e,4
Jun,f,5


In [60]:
df.loc[('Jan', 'b')]

value    1
Name: (Jan, b), dtype: int64

In [63]:
df.loc[('Mar', 'd'), 'value']

3

# Setting & Sorting a MultiIndex

In [46]:
df = pd.DataFrame(range(6), columns=['value'])
df['month'] = ['Jan', 'Jan', 'Mar', 'Mar', 'Jun', 'Jun']
df['state'] = ['a', 'b', 'c', 'd', 'e', 'f']
df

Unnamed: 0,value,month,state
0,0,Jan,a
1,1,Jan,b
2,2,Mar,c
3,3,Mar,d
4,4,Jun,e
5,5,Jun,f


In [47]:
df = df.set_index(['month', 'state'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,value
month,state,Unnamed: 2_level_1
Jan,a,0
Jan,b,1
Mar,c,2
Mar,d,3
Jun,e,4
Jun,f,5


In [48]:
df.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,value
month,state,Unnamed: 2_level_1
Jan,a,0
Jan,b,1
Jun,e,4
Jun,f,5
Mar,c,2
Mar,d,3


# Using .loc[] with nonunique indexes

In [49]:
df = pd.read_csv('data/sales.csv')
df.head()

Unnamed: 0,month,eggs,salt,spam
0,Jan,47,12.0,17
1,Feb,110,50.0,31
2,Mar,221,89.0,72
3,Apr,77,87.0,20
4,May,132,,52


In [50]:
df = df.set_index('month')
df.head()

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52


In [51]:
df.loc['Mar']

eggs    221.0
salt     89.0
spam     72.0
Name: Mar, dtype: float64

# Indexing multiple levels of a MultiIndex

In [92]:
df = pd.read_csv('data/sales.csv')
df = df.set_index(['state', 'month']).sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,Jan,47,12.0,17
CA,May,132,,52
NE,Apr,77,87.0,20
NE,Feb,77,87.0,20
NE,Jun,205,60.0,55
NY,Apr,205,60.0,55
NY,Feb,110,50.0,31
NY,Mar,221,89.0,72


In [93]:
df.loc['NY']

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Apr,205,60.0,55
Feb,110,50.0,31
Mar,221,89.0,72


In [94]:
df.loc[['NY', 'NE']]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NE,Apr,77,87.0,20
NE,Feb,77,87.0,20
NE,Jun,205,60.0,55
NY,Apr,205,60.0,55
NY,Feb,110,50.0,31
NY,Mar,221,89.0,72


In [95]:
df.loc[('NY', 'Apr'), :]

eggs    205.0
salt     60.0
spam     55.0
Name: (NY, Apr), dtype: float64

In [96]:
df.loc[(['NE', 'NY'], 'Apr'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NE,Apr,77,87.0,20
NY,Apr,205,60.0,55


In [97]:
df.loc[(['NE', 'NY'], ['Apr', 'Feb']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NE,Apr,77,87.0,20
NE,Feb,77,87.0,20
NY,Apr,205,60.0,55
NY,Feb,110,50.0,31


In [100]:
df.loc[(slice(None), 'Apr'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NE,Apr,77,87.0,20
NY,Apr,205,60.0,55


In [102]:
df.loc[(slice(None), ['Apr', 'Feb', 'Mar']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NE,Apr,77,87.0,20
NE,Feb,77,87.0,20
NY,Apr,205,60.0,55
NY,Feb,110,50.0,31
NY,Mar,221,89.0,72
