In [1]:
import numpy as np 
import pandas as pd

In [4]:
sales = pd.read_csv("./datasets/sales.csv", 
                    index_col='month')

In [5]:
sales

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


In [6]:
sales.index

Index(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'], dtype='object', name='month')

In [7]:
type(sales.index )

pandas.core.indexes.base.Index

In [10]:
# sales.index[0] = 'Dec'  thrown an error since indexes are immutable

# Changing the index of a Dataframe

In [12]:
# list comprehension

In [14]:
lst = [1,2,3]
lst1 = [x**2 for x in lst]

In [15]:
lst1 

[1, 4, 9]

In [16]:
sales.index

Index(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'], dtype='object', name='month')

In [19]:
new_idx = [x.upper() for x in sales.index]

In [20]:
new_idx

['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN']

In [21]:
sales.index = new_idx

In [22]:
sales

Unnamed: 0,eggs,salt,spam
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


# Changing Index Name Labels

In [35]:
sales = pd.read_csv("./datasets/sales.csv")
sales.index

RangeIndex(start=0, stop=6, step=1)

In [36]:
print(sales.index.name)

None


In [37]:
print(sales.columns.name)

None


In [38]:
# Assign the string 'MONTHS' to sales.index.name
sales.index.name = 'MONTHS'

# Print the sales DataFrame
print(sales)

# Assign the string 'PRODUCTS' to sales.columns.name 
sales.columns.name = 'PRODUCTS'

# Print the sales dataframe again
print(sales)

       month  eggs  salt  spam
MONTHS                        
0        Jan    47  12.0    17
1        Feb   110  50.0    31
2        Mar   221  89.0    72
3        Apr    77  87.0    20
4        May   132   NaN    52
5        Jun   205  60.0    55
PRODUCTS month  eggs  salt  spam
MONTHS                          
0          Jan    47  12.0    17
1          Feb   110  50.0    31
2          Mar   221  89.0    72
3          Apr    77  87.0    20
4          May   132   NaN    52
5          Jun   205  60.0    55


# Building an Index, then a dataframe

In [44]:
sales = pd.read_csv("./datasets/sales.csv", 
                    usecols=['eggs', 'salt', 'spam'])

In [45]:
sales

Unnamed: 0,eggs,salt,spam
0,47,12.0,17
1,110,50.0,31
2,221,89.0,72
3,77,87.0,20
4,132,,52
5,205,60.0,55


In [47]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

In [48]:
sales.index = months 

In [50]:
print(sales)

     eggs  salt  spam
Jan    47  12.0    17
Feb   110  50.0    31
Mar   221  89.0    72
Apr    77  87.0    20
May   132   NaN    52
Jun   205  60.0    55


In [52]:
sales.index.name = 'MONTHS'

In [53]:
sales

Unnamed: 0_level_0,eggs,salt,spam
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


# Hierrarchical Indexes

In [91]:
sales = pd.read_csv("datasets/sales_new")

In [92]:
sales

Unnamed: 0,state,month,eggs,salt,spam
0,CA,1,47,12.0,17
1,CA,2,110,50.0,31
2,NY,1,221,89.0,72
3,NY,2,77,87.0,20
4,TX,1,132,,52
5,TX,2,205,60.0,55


In [93]:
sales.index

RangeIndex(start=0, stop=6, step=1)

# Setting Indexes

In [96]:
sales.set_index(['state', 'month'], inplace=True)

In [97]:
sales

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [108]:
sales.loc[['CA', 'TX']]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
TX,1,132,,52
TX,2,205,60.0,55


In [109]:
sales['CA' : 'TX']

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


# Setting and Sorting a multiindex

In [111]:
sales = pd.read_csv("./datasets/sales_new")

In [112]:
sales.set_index(['state', 'month'], inplace=True)

In [114]:
sales

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [116]:
sales.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


# Using .loc[] with nonunique indexes

In [117]:
sales = pd.read_csv("./datasets/sales_new")

In [119]:
sales.set_index('state', inplace=True)

In [121]:
sales.index

Index(['CA', 'CA', 'NY', 'NY', 'TX', 'TX'], dtype='object', name='state')

In [123]:
sales.loc['NY']

Unnamed: 0_level_0,month,eggs,salt,spam
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NY,1,221,89.0,72
NY,2,77,87.0,20


Unnamed: 0_level_0,month,eggs,salt,spam
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


# Indexing multiple levels of a MultiIndex

In [125]:
sales = pd.read_csv("./datasets/sales_new")

In [126]:
sales

Unnamed: 0,state,month,eggs,salt,spam
0,CA,1,47,12.0,17
1,CA,2,110,50.0,31
2,NY,1,221,89.0,72
3,NY,2,77,87.0,20
4,TX,1,132,,52
5,TX,2,205,60.0,55


In [128]:
sales.set_index(['state', 'month'], inplace=True)

In [129]:
sales

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [146]:
sales.loc[('NY', 1)]

eggs    221.0
salt     89.0
spam     72.0
Name: (NY, 1), dtype: float64

In [147]:
# Look up data for NY in month 1 in sales: NY_month1
NY_month1 = sales.loc[('NY',1),:]

# Look up data for CA and TX in month 2: CA_TX_month2
CA_TX_month2 = sales.loc[(['CA', 'TX'], 2),:]

# Access the inner month index and look up data for all states in month 2: all_month2
all_month2 = sales.loc[(slice(None),2), :]

In [148]:
all_month2

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,110,50.0,31
NY,2,77,87.0,20
TX,2,205,60.0,55
