In [1]:
import pandas as pd

## Explicit Indexes

In [3]:
dogs = pd.DataFrame()

In [None]:
# setting a column as the index
dogs_ind = dogs.set_index('name')

# Removing an index
dogs_ind.reset_index()

# Dropping an index
dogs_ind.reset_index(drop=True)

In [None]:
# indexes make subsetting simpler
# a normal subsetting.....
dogs[dogs['name'].isin(['Bella', 'Stella'])]

# equivalant when names are the index
dogs_ind.loc[['Bella', 'Stella']]

In [None]:
# Index values don't need to be unique
dogs_ind2 = dogs.set_index('breed')

# subsetting on duplicate index values
dogs_ind2.loc["Labrador"]

In [None]:
# Multi-level/Hierarchical indexs
dogs_ind3 = dogs.set_index(['breed', 'color'])

# how to index a multi-level index
dogs_ind3.loc[['Labrador', 'Chihuahua']]

In [None]:
# subsetting on inner levels - ie: breed and colour
dogs_ind3.loc[[('Labrador', 'Brown'), ('Chihuahua', 'Tan')]] # all conditions need to be met to output something - for example, a dog that is brown and a labrador

In [None]:
# Controlling sort_index
dogs_ind3.sort_index(level=['color', 'breed'], ascending=[True, False])

In [None]:
# Indexing Problems
# Index values are just data
# Indexes violate "tidy data" principles
# you need to learn two syntaxes


## Sliciing Lists

In [1]:
breeds = ['Labrador', 'Poodle',
          'Chow Chow', "Schnauzer",
          'Labrador', 'Chihuahua',
          'St. Bernard']

In [2]:
breeds[2:5] # 5 is not included

['Chow Chow', 'Schnauzer', 'Labrador']

In [3]:
breeds[:3] # starts from the beginning

['Labrador', 'Poodle', 'Chow Chow']

In [4]:
breeds[:] # returns the whole list

['Labrador',
 'Poodle',
 'Chow Chow',
 'Schnauzer',
 'Labrador',
 'Chihuahua',
 'St. Bernard']

In [None]:
# you can slice df but first you need to slort the index
# Sorting index before slice
dogs_srt = dogs.set_index(['breed', 'color']).sort_index()

In [None]:
# sliocing the outer level of the index
dogs_srt.loc['Chow Chow': 'Poodle'] # Poodle will be included
# NOTE: This method does not work on inner index methods - Pandas will not throw an error message

In [None]:
# Slicing inner index levels
dogs_srt.loc[('Labrador', 'Brown'):('Schnauzer', 'Grey')]

In [None]:
# slicing columns
dogs_srt.loc[:, 'name':'height_cm']

In [None]:
# slicing on rows and columns
dogs_srt.loc[('Labrador', 'Brown'):('Schnauzer', 'Grey'), 'name':'height_cm']

In [None]:
# slicing by dates
dogs = dogs.set_index("date_of_birth").sort_index()
dogs.loc['2014-08-25':'2016-09-16']

In [None]:
# sliciing by partial dates
dogs.loc['2014':'2016']

In [None]:
# subsetting by row/column number - like sliciing a list
dogs.iloc[2:5, 1:4]

## Working with Pivot Tables

In [None]:
dog_pack = pd.DataFrame()

In [None]:
# Creating pivot table
dogs_height_by_bread_vs_color = dog_pack.pivot_table('height_cm', index = 'breed', columns='color')

print(dogs_height_by_bread_vs_color)

In [None]:
# .loc + sliciing is a power combo
dogs_height_by_bread_vs_color.loc['Chow Chow': 'Poodle']

In [None]:
# the axis argument
dogs_height_by_bread_vs_color.mean(axis='index') # default value is index, so it means calculate the mean accross the rows 

# calculating across columns
dogs_height_by_bread_vs_color.mean(axis='columns')