In [1]:
# import necessary modules
import pandas as pd, numpy as np,matplotlib.pyplot as plt

In [10]:
# create a new DataFrame with fake year data
df = pd.DataFrame({'start_year':[2001, 2002, 2005, 2005, 2006], 
                   'end_year':[2002, 2010, 2008, 2006, 2014]})
df

Unnamed: 0,start_year,end_year
0,2001,2002
1,2002,2010
2,2005,2008
3,2005,2006
4,2006,2014


# .map() applies a function element-wise on a Series

In [3]:
# create a function
def get_new_year(x):
    if x > 2004:
        return x + 10
    else:
        return x

# then map it to the series
df['start_year'].map(get_new_year)

0    2001
1    2002
2    2015
3    2015
4    2016
Name: start_year, dtype: int64

In [4]:
# or do the same thing all in one line, using a lambda function as .map()'s argument... you commonly see this in pandas
df['start_year'].map(lambda x: x + 10 if x > 2004 else x)

0    2001
1    2002
2    2015
3    2015
4    2016
Name: start_year, dtype: int64

A lambda function is a simple, one-off, anonymous function. You can't call it again later because it doesn't have a name. It just lets you repeatedly perform some operation across a series of values (in our case, a column in our dataframe) using a minimal amount of code. Also notice that the if-else statement is all on one line:

In [5]:
# you can easily create a new column to contain the results of the function mapping
df['new_year'] = df['start_year'].map(get_new_year)
df.head()

Unnamed: 0,start_year,end_year,new_year
0,2001,2002,2001
1,2002,2010,2002
2,2005,2008,2015
3,2005,2006,2015
4,2006,2014,2016


# .apply() is like .map(), but it works on a row or column basis on an entire DataFrame (specify the axis)

In [6]:
# applies a function to calculate the difference between the min and max values in each column (ie, row-wise)
def get_difference(vector):
    difference = vector.max() - vector.min()
    return difference

df.apply(get_difference, axis=0)

start_year     5
end_year      12
new_year      15
dtype: int64

In [7]:
# same thing again, using a lambda function
df.apply(lambda x: x.max() - x.min(), axis=0)

start_year     5
end_year      12
new_year      15
dtype: int64

In [8]:
# here .apply() finds the difference between the min and max values in each row (ie, column-wise) and saves to a new column
df['difference'] = df.apply(get_difference, axis=1)
df

Unnamed: 0,start_year,end_year,new_year,difference
0,2001,2002,2001,1
1,2002,2010,2002,8
2,2005,2008,2015,10
3,2005,2006,2015,10
4,2006,2014,2016,10


.applymap() works element-wise on an entire DataFrame
This is like doing a .map() to each column in the DataFrame

In [9]:
# divide every value in the dataframe by 2 (use a float so you don't do rounded integer division)
df.applymap(lambda x: x / 2.)

Unnamed: 0,start_year,end_year,new_year,difference
0,1000.5,1001.0,1000.5,0.5
1,1001.0,1005.0,1001.0,4.0
2,1002.5,1004.0,1007.5,5.0
3,1002.5,1003.0,1007.5,5.0
4,1003.0,1007.0,1008.0,5.0
