# Apply( ), Map( ), & ApplyMap( )

In [1]:
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
titanic = pd.read_csv("/content/drive/MyDrive/DEPI/Data/titanic.csv")
titanic["age"] = titanic["age"].replace(["?"], [None]).astype("float")
titanic["fare"] = titanic["fare"].replace(["?"], [None]).astype("float")

## The Series apply( ) method

In [4]:
def years_to_days(yrs):
    return yrs*365


In [6]:
titanic["age"].apply(years_to_days)

Unnamed: 0,age
0,10585.0000
1,334.5955
2,730.0000
3,10950.0000
4,9125.0000
...,...
1304,5292.5000
1305,
1306,9672.5000
1307,9855.0000


In [7]:
titanic["age"] * 365

Unnamed: 0,age
0,10585.0000
1,334.5955
2,730.0000
3,10950.0000
4,9125.0000
...,...
1304,5292.5000
1305,
1306,9672.5000
1307,9855.0000


In [8]:
def get_age_group(age):
    if age < 2:
        return "infant"
    elif age < 12:
        return "child"
    elif age < 18:
        return "teen"
    elif age < 50:
        return "adult"
    else:
        return "senior"

In [9]:
titanic["age_group"]=titanic["age"].apply(get_age_group)

In [10]:
titanic["age_group"]

Unnamed: 0,age_group
0,adult
1,infant
2,child
3,adult
4,adult
...,...
1304,teen
1305,senior
1306,adult
1307,adult


In [11]:
titanic.age_group.value_counts()

Unnamed: 0_level_0,count
age_group,Unnamed: 1_level_1
adult,782
senior,373
child,69
teen,63
infant,22


In [12]:
titanic.groupby("age_group").survived.mean()

Unnamed: 0_level_0,survived
age_group,Unnamed: 1_level_1
adult,0.386189
child,0.492754
infant,0.772727
senior,0.313673
teen,0.47619


In [13]:
titanic.groupby(["age_group", "sex"]).survived.mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
age_group,sex,Unnamed: 2_level_1
adult,female,0.748201
adult,male,0.186508
child,female,0.515152
child,male,0.472222
infant,female,0.888889
infant,male,0.692308
senior,female,0.698276
senior,male,0.140078
teen,female,0.833333
teen,male,0.151515


In [15]:
titanic["fare"] * 50

Unnamed: 0,fare
0,10566.875
1,7577.500
2,7577.500
3,7577.500
4,7577.500
...,...
1304,722.710
1305,722.710
1306,361.250
1307,361.250


In [16]:
titanic["fare"].apply(lambda x: x*50)

Unnamed: 0,fare
0,10566.875
1,7577.500
2,7577.500
3,7577.500
4,7577.500
...,...
1304,722.710
1305,722.710
1306,361.250
1307,361.250


In [18]:
titanic["fare"].apply(lambda x: f"${x*50}")

Unnamed: 0,fare
0,$10566.875
1,$7577.500000000001
2,$7577.500000000001
3,$7577.500000000001
4,$7577.500000000001
...,...
1304,$722.71
1305,$722.71
1306,$361.25
1307,$361.25


In [19]:
def convert_currency(num, multiplier):
    return f"${num*multiplier}"

In [20]:
titanic["fare"].apply(convert_currency, args=(24,))

Unnamed: 0,fare
0,$5072.1
1,$3637.2000000000003
2,$3637.2000000000003
3,$3637.2000000000003
4,$3637.2000000000003
...,...
1304,$346.9008
1305,$346.9008
1306,$173.39999999999998
1307,$173.39999999999998


## The DataFrame apply( ) method

In [21]:
df = titanic[["pclass", "survived", "age", "fare"]]

In [22]:
df

Unnamed: 0,pclass,survived,age,fare
0,1,1,29.0000,211.3375
1,1,1,0.9167,151.5500
2,1,0,2.0000,151.5500
3,1,0,30.0000,151.5500
4,1,0,25.0000,151.5500
...,...,...,...,...
1304,3,0,14.5000,14.4542
1305,3,0,,14.4542
1306,3,0,26.5000,7.2250
1307,3,0,27.0000,7.2250


In [23]:
def get_range(s):
    return s.max() - s.min()

In [24]:
df.apply(get_range)

Unnamed: 0,0
pclass,2.0
survived,1.0
age,79.8333
fare,512.3292


In [25]:
df.apply(get_range, axis=0)

Unnamed: 0,0
pclass,2.0
survived,1.0
age,79.8333
fare,512.3292


In [27]:
df

Unnamed: 0,pclass,survived,age,fare
0,1,1,29.0000,211.3375
1,1,1,0.9167,151.5500
2,1,0,2.0000,151.5500
3,1,0,30.0000,151.5500
4,1,0,25.0000,151.5500
...,...,...,...,...
1304,3,0,14.5000,14.4542
1305,3,0,,14.4542
1306,3,0,26.5000,7.2250
1307,3,0,27.0000,7.2250


In [26]:
df.apply(get_range, axis=1)

Unnamed: 0,0
0,210.3375
1,150.6333
2,151.5500
3,151.5500
4,151.5500
...,...
1304,14.5000
1305,14.4542
1306,26.5000
1307,27.0000


In [28]:
def get_fam_size(s):
    fam_size = s.sibsp + s.parch
    if fam_size == 0:
        return "solo"
    elif fam_size < 5:
        return "average"
    else:
        return "large"

In [29]:
titanic.apply(get_fam_size, axis=1)

Unnamed: 0,0
0,solo
1,average
2,average
3,average
4,average
...,...
1304,average
1305,average
1306,solo
1307,solo


In [30]:
titanic["fam_size"] = titanic.apply(get_fam_size, axis=1)

In [31]:
titanic["fam_size"].value_counts()

Unnamed: 0_level_0,count
fam_size,Unnamed: 1_level_1
solo,790
average,459
large,60


In [32]:
titanic.groupby("fam_size").survived.mean()

Unnamed: 0_level_0,survived
fam_size,Unnamed: 1_level_1
average,0.54902
large,0.15
solo,0.302532


In [33]:
titanic.groupby(["fam_size", "sex"]).survived.mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
fam_size,sex,Unnamed: 2_level_1
average,female,0.771429
average,male,0.294393
large,female,0.296296
large,male,0.030303
solo,female,0.731959
solo,male,0.162752


## The map( ) method

In [34]:
titanic["pclass"]

Unnamed: 0,pclass
0,1
1,1
2,1
3,1
4,1
...,...
1304,3
1305,3
1306,3
1307,3


In [35]:
titanic["pclass"].map({1:"1st", 2:"2nd", 3:"3rd"})

Unnamed: 0,pclass
0,1st
1,1st
2,1st
3,1st
4,1st
...,...
1304,3rd
1305,3rd
1306,3rd
1307,3rd


In [36]:
titanic["age"].map(lambda a: a < 18)

Unnamed: 0,age
0,False
1,True
2,True
3,False
4,False
...,...
1304,True
1305,False
1306,False
1307,False


## The applymap( ) method

In [37]:
titanic[["name", "sex", "age_group"]].applymap(str.upper)

  titanic[["name", "sex", "age_group"]].applymap(str.upper)


Unnamed: 0,name,sex,age_group
0,"ALLEN, MISS. ELISABETH WALTON",FEMALE,ADULT
1,"ALLISON, MASTER. HUDSON TREVOR",MALE,INFANT
2,"ALLISON, MISS. HELEN LORAINE",FEMALE,CHILD
3,"ALLISON, MR. HUDSON JOSHUA CREIGHTON",MALE,ADULT
4,"ALLISON, MRS. HUDSON J C (BESSIE WALDO DANIELS)",FEMALE,ADULT
...,...,...,...
1304,"ZABOUR, MISS. HILENI",FEMALE,TEEN
1305,"ZABOUR, MISS. THAMINE",FEMALE,SENIOR
1306,"ZAKARIAN, MR. MAPRIEDEDER",MALE,ADULT
1307,"ZAKARIAN, MR. ORTIN",MALE,ADULT


In [38]:
df

Unnamed: 0,pclass,survived,age,fare
0,1,1,29.0000,211.3375
1,1,1,0.9167,151.5500
2,1,0,2.0000,151.5500
3,1,0,30.0000,151.5500
4,1,0,25.0000,151.5500
...,...,...,...,...
1304,3,0,14.5000,14.4542
1305,3,0,,14.4542
1306,3,0,26.5000,7.2250
1307,3,0,27.0000,7.2250


In [39]:
df.applymap(lambda el: el * 7)

  df.applymap(lambda el: el * 7)


Unnamed: 0,pclass,survived,age,fare
0,7,7,203.0000,1479.3625
1,7,7,6.4169,1060.8500
2,7,0,14.0000,1060.8500
3,7,0,210.0000,1060.8500
4,7,0,175.0000,1060.8500
...,...,...,...,...
1304,21,0,101.5000,101.1794
1305,21,0,,101.1794
1306,21,0,185.5000,50.5750
1307,21,0,189.0000,50.5750


In [40]:
df * 7

Unnamed: 0,pclass,survived,age,fare
0,7,7,203.0000,1479.3625
1,7,7,6.4169,1060.8500
2,7,0,14.0000,1060.8500
3,7,0,210.0000,1060.8500
4,7,0,175.0000,1060.8500
...,...,...,...,...
1304,21,0,101.5000,101.1794
1305,21,0,,101.1794
1306,21,0,185.5000,50.5750
1307,21,0,189.0000,50.5750


In [41]:
titanic[["name", "sex", "age_group"]].applymap(len)

  titanic[["name", "sex", "age_group"]].applymap(len)


Unnamed: 0,name,sex,age_group
0,29,6,5
1,30,4,6
2,28,6,5
3,36,4,5
4,47,6,5
...,...,...,...
1304,20,6,4
1305,21,6,6
1306,25,4,5
1307,19,4,5
