# Pandas Idioms

### Vectorization & Method chaining (Pandorable)

In [None]:
import pandas as pd
df = pd.read_csv('census.csv')

In [None]:
df.head()

#### Method Chaining

In [None]:
(df.where(df['SUMLEV'] == 50)
   .dropna()
   .set_index(['STNAME', 'CTYNAME'])
   .rename(columns={'ESTIMATEBASE2010' : 'ESTIMATES BASE 2010'}))

#### Traditional Way (This is faster than the method chaining above)

In [None]:
df2 = df[df['SUMLEV'] == 50]
df2.set_index(['STNAME', 'CTYNAME'], inplace=True)
df2.rename(columns={'ESTIMATESBASE2010' : 'ESTIMATES BASE 2010'})

#### Apply

In [None]:
import numpy as np

def min_max(row):
    # data is the columns in question
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015',]]
    # this is the definition of how everything will be mapped 
    # in the 'apply' method
    return pd.Series({'min' : np.min(data), 'max':np.max(data)})

# apply(function to use, axis to operate on)
# axis -> parameter of the index to use
# to apply across all rows -> axis = 1
df2.apply(min_max, axis=1)

#### Method (that gets passed in to the apply method) that adds new rows

In [None]:
def min_max(row):
    # data is the columns in question
    data = row[['POPESTIMATE2010',
                'POPESTIMATE2011',
                'POPESTIMATE2012',
                'POPESTIMATE2013',
                'POPESTIMATE2014',
                'POPESTIMATE2015',]]
    row['max'] = np.max(data)
    row['min'] = np.min(data)
    return row

df2.apply(min_max, axis=1)

#### Using lambda 