In [None]:
import pandas as pd
import numpy as np

## Applying functions to a dataframe or series

#### The map method

In [None]:
# load titanic dataset
titanic_df = pd.read_csv('../Data/titanic.csv')

In [None]:
# map sex, female to 1, male to 0
sex_dic = {
    'female': 1,
    'male': 0
}

# map the sex column
titanic_df['Sex'].map(sex_dic)

In [None]:
# function to map sex column
def map_func(sex):
    if sex == 'female':
        return 1
    else:
        return 0

In [None]:
# map the sex column using the funciton
titanic_df['Sex'].map(map_func)

#### Apply method as a series method

In [None]:
# Example 1: find the length of the strings in the name column (titanic)
titanic_df['Name'].apply(len) # note: map works the same as apply in this case

In [None]:
titanic_df['name_length'] = titanic_df['Name'].apply(len)
titanic_df[['Name', 'name_length']].head()

In [None]:
# Example 2: extract the last name of each person
titanic_df['Name'][0].split(',')[0]
titanic_df['Name'].apply(lambda x: x.split(',')[0])
# the above code uses a lambda function, which is a simple function that is not bound to a name
# the input is x, and the operation is x.split(',')[0]
# x in this case is the value in the name column, and the output is the first element of the list, which is the last name

In [None]:
titanic_df['last_name'] = titanic_df['Name'].apply(get_last_name)
# can also use a regular function instead of a lambda function

In [None]:
def get_last_name(name):
    return name.split(',')[0]

#### The apply method (as a DataFrame method)

In [46]:
# load alcohol consumption dataset
alcohol_df = pd.read_csv('../Data/drinks.csv')
alcohol_df.set_index('country', inplace=True)
alcohol_df.drop(columns=['continent'], inplace=True)
alcohol_df

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,0,0,0,0.0
Albania,89,132,54,4.9
Algeria,25,0,14,0.7
Andorra,245,138,312,12.4
Angola,217,57,45,5.9
...,...,...,...,...
Venezuela,333,100,3,7.7
Vietnam,111,2,1,2.0
Yemen,6,0,0,0.1
Zambia,32,19,4,2.5


In [None]:
# Example 1: find the maximum value of each column (using numpy)
alcohol_df.apply(np.max, axis=0) # axis = 0 means apply the function along the rows
# returns the maximum value of each column

In [None]:
alcohol_df.max() # built in pandas function to find the maximum value of each column

In [None]:
# Example 2: find the maximum value of each row (using numpy)
alcohol_df.apply(np.max, axis=1) # axis = 1 means apply the function along the columns
# returns the maximum value of each row

In [None]:
alcohol_df.max(axis=1) # built in pandas function to find the maximum value of each row

In [40]:
alcohol_df.max(axis=1)['USA'] # what drink though?

249.0

In [47]:
alcohol_df.apply(np.argmax, axis=1).map({
    0: 'beer_servings',
    1: 'spirit_servings',
    2: 'wine_servings',
    3: 'total_litres_of_pure_alcohol'
}) # returns the column name of the maximum value of each row

country
Afghanistan      beer_servings
Albania        spirit_servings
Algeria          beer_servings
Andorra          wine_servings
Angola           beer_servings
                    ...       
Venezuela        beer_servings
Vietnam          beer_servings
Yemen            beer_servings
Zambia           beer_servings
Zimbabwe         beer_servings
Length: 193, dtype: object

In [41]:
alcohol_df.loc['USA'].idxmax() # built in pandas function to find the column name of the maximum value

'beer_servings'

In [50]:
# use idxmax to find the column name of the maximum value of each row, or each provided country (country is index)
alcohol_df.apply(pd.Series.idxmax, axis=1)[['USA', 'Spain']]

country
USA      beer_servings
Spain    beer_servings
dtype: object

#### The applymap method - applies a function to EVERY element of the DF

In [51]:
# transform everything to strings
alcohol_silly = alcohol_df.applymap(str)

In [58]:
alcohol_silly.iloc[0, 0]

0