Apply a function to a column, row or element-wise

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [3]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,0.059148,0.046613,0.67659
Ohio,-0.489985,-0.42152,-0.946906
Texas,0.743818,1.185287,-1.554709
Oregon,1.938135,1.102398,0.667813


In [4]:
# Numpy element-wise operation applied to pandas dataframe
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.059148,0.046613,0.67659
Ohio,0.489985,0.42152,0.946906
Texas,0.743818,1.185287,1.554709
Oregon,1.938135,1.102398,0.667813


In [5]:
# Apply a function to a column or row
f = lambda x: x.max() - x.min()
# Apply f to each column
frame.apply(f)

b    2.428119
d    1.606807
e    2.231299
dtype: float64

In [7]:
# Apply f to each row
frame.apply(f, axis=1)

Utah      0.629977
Ohio      0.525386
Texas     2.739996
Oregon    1.270321
dtype: float64

In [11]:
# Apply f(x)=x+5
def func_increase_5(x):
    return x + 5.0
frame.apply(func_increase_5)

Unnamed: 0,b,d,e
Utah,5.059148,5.046613,5.67659
Ohio,4.510015,4.57848,4.053094
Texas,5.743818,6.185287,3.445291
Oregon,6.938135,6.102398,5.667813


In [14]:
# Apply f(x)=x+5
def func_increase_5(x):
    return x + 5.0
frame.applymap(func_increase_5)
# Note that although the results are the same, 'apply' operates on each column, 
# 'applymap' operates on each entry

Unnamed: 0,b,d,e
Utah,5.059148,5.046613,5.67659
Ohio,4.510015,4.57848,4.053094
Texas,5.743818,6.185287,3.445291
Oregon,6.938135,6.102398,5.667813


In [9]:
# Apply a function that returns multiple values
def func(x):
    # The input x can be a numpy array, or pandas Series or dataframe
    return pd.Series([x.min(), x.max()], index=['min','max'])

frame.apply(func)

Unnamed: 0,b,d,e
min,-0.489985,-0.42152,-1.554709
max,1.938135,1.185287,0.67659


In [16]:
# Format a dataframe
fmt = lambda x: '%.2f' % x
frame.applymap(fmt) # We cannot use 'apply' here

Unnamed: 0,b,d,e
Utah,0.06,0.05,0.68
Ohio,-0.49,-0.42,-0.95
Texas,0.74,1.19,-1.55
Oregon,1.94,1.1,0.67


In [17]:
# For a series
frame['b'].apply(fmt)

Utah       0.06
Ohio      -0.49
Texas      0.74
Oregon     1.94
Name: b, dtype: object

In [19]:
# Use map() instead, we get the same result.
frame['b'].map(fmt)
# When applied to series, map() and apply() give the same result.

Utah       0.06
Ohio      -0.49
Texas      0.74
Oregon     1.94
Name: b, dtype: object

map() is a method of Series, applymap() is a method of DataFrame, and apply() is defined in both DataFrame and Series.