# How do I apply a function to a pandas Series or DataFrame?

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('http://bit.ly/kaggletrain')

In [3]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [11]:
# We can use the pandas.DataFrame.Series.apply(func) to invoce a function on values of a Series.
train['Name_length'] = train.Name.apply(len) # aplying the python function len()

In [18]:
# Showing the new column along the original
train.loc[:4,['Name_length','Name']]

Unnamed: 0,Name_length,Name
0,23,"Braund, Mr. Owen Harris"
1,51,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,22,"Heikkinen, Miss. Laina"
3,44,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,24,"Allen, Mr. William Henry"


In [13]:
import numpy as np

In [16]:
# Changing the values of a Series with pandas.DataFrame.Series.apply(func)
train['Fare_ceil'] = train.Fare.apply(np.ceil) # Applying the numpy function .np.ceil() for rounding up to integers

In [17]:
# Showing the new column along the original
train.loc[:4,['Fare_ceil','Fare']]

Unnamed: 0,Fare_ceil,Fare
0,8.0,8.0
1,72.0,72.0
2,8.0,8.0
3,54.0,54.0
4,9.0,9.0


In [20]:
# Invoking a lambda function to a Series
train.Name.str.split(',').apply(lambda x: x[0]).head() # first we split the Names values into lists of strings

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [22]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [27]:
# Now let's use pandas.DataFrame.apply(func) to apply a function along an axis of the DataFrame.
drinks.loc[:,'beer_servings':'wine_servings'].apply(max) # Apply max along rows (axis=0 by default)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [32]:
# With pandas.DataFrame.applymap(func) we can apply a function to every element of a DataFrame.
drinks.loc[:,'beer_servings':'wine_servings'] = drinks.loc[:,'beer_servings':'wine_servings'].applymap(float)
# converting all values to float

In [33]:
# Showing the changes
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,Albania,89.0,132.0,54.0,4.9,Europe
2,Algeria,25.0,0.0,14.0,0.7,Africa
3,Andorra,245.0,138.0,312.0,12.4,Europe
4,Angola,217.0,57.0,45.0,5.9,Africa
