# Groupby Operations and Multi-level Index

In [1]:
import numpy as np
import pandas as pd

## Data

In [2]:
df = pd.read_csv('mpg.csv')

In [None]:
df

## groupby() method

In [None]:
# Creates a groupby object waiting for an aggregate method
df.groupby('model_year')

#### Adding an aggregate method call. To use a grouped object, you need to tell pandas how you want to aggregate the data.

In [None]:
# model_year becomes the index! It is NOT a column name,it is now the name of the index
df.groupby('model_year').mean()

In [6]:
avg_year = df.groupby('model_year').mean()

In [None]:
avg_year.index

In [None]:
avg_year.columns

In [None]:
avg_year['mpg']

In [None]:
df.groupby('model_year').mean()['mpg']

In [None]:
df.groupby('model_year').describe()

In [None]:
df.groupby('model_year').describe().transpose()

## Groupby Multiple Columns
Let's explore average mpg per year per cylinder count

In [None]:
df.groupby(['model_year','cylinders']).mean()

In [None]:
df.groupby(['model_year','cylinders']).mean().index

# MultiIndex

## The MultiIndex Object

In [15]:
year_cyl = df.groupby(['model_year','cylinders']).mean()

In [None]:
year_cyl

In [None]:
year_cyl.index

In [None]:
year_cyl.index.levels

In [None]:
year_cyl.index.names

# Indexing with the Hierarchical Index


In [None]:
year_cyl.head()

## Grab Based on Outside Index

In [None]:
year_cyl.loc[70]

In [None]:
year_cyl.loc[[70,72]]

## Grab a Single Row

In [None]:
year_cyl.loc[(70,8)]

### Careful note!


In [None]:
df[df['cylinders'].isin([6,8])].groupby(['model_year','cylinders']).mean()

## Sorting MultiIndex

In [None]:
year_cyl.sort_index(level='model_year',ascending=False)

In [None]:
year_cyl.sort_index(level='cylinders',ascending=False)

# Advanced: agg() method

The agg() method allows you to customize what aggregate functions you want per category

In [None]:
df

## agg() on a DataFrame

In [None]:
# These strings need to match up with built-in method names
df.agg(['median','mean'])

In [None]:
df.agg(['sum','mean'])[['mpg','weight']]

### Specify aggregate methods per column


In [None]:
df.agg({'mpg':['median','mean'],'weight':['mean','std']})

## agg() with groupby()

In [None]:
df.groupby('model_year').agg({'mpg':['median','mean'],'weight':['mean','std']})