In [1]:
import pandas as pd
import numpy as np

### to figure out the differences between apply, map, and applymap

In [3]:
train = pd.read_csv('http://bit.ly/kaggletrain')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## map method 
- it is a series method.
- it allows you to map an existing values of series to a different set of values.

In [6]:
train['Sex_num'] = train.Sex.map({'male':1, 'female':0})
train.loc[0:6, ['Sex', 'Sex_num']]

Unnamed: 0,Sex,Sex_num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1
5,male,1
6,male,1


## apply method
- is both a series and dataframe method
- it applies a function to each element in a series.

### apply to series

In [11]:
train['Name_length'] = train.Name.apply(len)  # notice no parenthesis just name
train.loc[:,['Name','Name_length']].head(10)

Unnamed: 0,Name,Name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24
5,"Moran, Mr. James",16
6,"McCarthy, Mr. Timothy J",23
7,"Palsson, Master. Gosta Leonard",30
8,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",49
9,"Nasser, Mrs. Nicholas (Adele Achem)",35


Its relatively common to use apply with numpy functions.

In [14]:
# Using numpy's ceil function to Fare column
train['Fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[:,['Fare','Fare_ceil']].head(7)

Unnamed: 0,Fare,Fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0
4,8.05,9.0
5,8.4583,9.0
6,51.8625,52.0


##### Q)to extract last name of each person into its own column i.e., name before comma

In [15]:
train.Name.head()

0                              Braund, Mr. Owen Harris
1    Cumings, Mrs. John Bradley (Florence Briggs Th...
2                               Heikkinen, Miss. Laina
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                             Allen, Mr. William Henry
Name: Name, dtype: object

In [17]:
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [22]:
train.Name.str.split(',')[1]

['Cumings', ' Mrs. John Bradley (Florence Briggs Thayer)']

In [21]:
def get_element(my_list, position):
    return(my_list[position])

In [23]:
train.Name.str.split(',').apply(get_element, position = 0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

can also do this using lambda function so no need to define a seperate function

In [24]:
train.Name.str.split(',').apply(lambda x:x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

### apply to dataframe

- it applies a function to either action of a dataframe

In [25]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [27]:
df = drinks.loc[:,'beer_servings':'wine_servings']
df.head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0,0,0
1,89,132,54
2,25,0,14
3,245,138,312
4,217,57,45


In [28]:
# to see the max values in each columns
df.apply(max)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [31]:
# to see the max values in each rows
df.apply(max, axis = 1).head(11)

0       0
1     132
2      25
3     312
4     217
5     128
6     221
7     179
8     261
9     279
10     46
dtype: int64

In [33]:
# to see which column in a row has the max value
df.apply(np.argmax, axis = 1).head(11)

0       beer_servings
1     spirit_servings
2       beer_servings
3       wine_servings
4       beer_servings
5     spirit_servings
6       wine_servings
7     spirit_servings
8       beer_servings
9       beer_servings
10    spirit_servings
dtype: object

## applymap
- is a dataframe method.
- applies a function to every element of a dataframe.

In [34]:
df.head(10)

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0,0,0
1,89,132,54
2,25,0,14
3,245,138,312
4,217,57,45
5,102,128,45
6,193,25,221
7,21,179,11
8,261,72,212
9,279,75,191


In [36]:
## eg - to change every element of df to float
df.applymap(float).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0


In [37]:
drinks.loc[:,'beer_servings':'wine_servings'] = drinks.loc[:,'beer_servings':'wine_servings'].applymap(float)
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,Albania,89.0,132.0,54.0,4.9,Europe
2,Algeria,25.0,0.0,14.0,0.7,Africa
3,Andorra,245.0,138.0,312.0,12.4,Europe
4,Angola,217.0,57.0,45.0,5.9,Africa
