## Day 30: How do I apply a function to a pandas Series or DataFrame?
https://courses.dataschool.io/view/courses/pandas-in-30-days/2341096-course-videos/7587910-day-30-how-do-i-apply-a-function-to-a-pandas-series-or-dataframe

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv('./data/titanic_train.csv')

In [3]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
# Create a dummy variable for sex: Male => 1 / Female => 0 
train['Sex_num'] = train.Sex.map({'female': 0, 'male': 1})

In [5]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_num
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,1


In [6]:
train.loc[0:10, ['Sex', 'Sex_num']]

Unnamed: 0,Sex,Sex_num
0,male,1
1,female,0
2,female,0
3,female,0
4,male,1
5,male,1
6,male,1
7,male,1
8,female,0
9,female,0


In [7]:
train['Name_length'] = train.Name.apply(len)

In [8]:
train.loc[0:10, ['Name', 'Name_length']]

Unnamed: 0,Name,Name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44
4,"Allen, Mr. William Henry",24
5,"Moran, Mr. James",16
6,"McCarthy, Mr. Timothy J",23
7,"Palsson, Master. Gosta Leonard",30
8,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",49
9,"Nasser, Mrs. Nicholas (Adele Achem)",35


In [11]:
import numpy as np

In [12]:
train['Fare_ceil'] = train.Fare.apply(np.ceil)

In [13]:
train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_num,Name_length,Fare_ceil
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1,23,8.0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,51,72.0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,22,8.0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,44,54.0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,1,24,9.0
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,1,16,9.0
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S,1,23,52.0
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S,1,30,22.0
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S,0,49,12.0
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C,0,35,31.0


In [16]:
# Extract the last name of each person
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [17]:
def last_name(s):
    return s.split(',')[0]

In [18]:
train.Name.apply(last_name)

0         Braund
1        Cumings
2      Heikkinen
3       Futrelle
4          Allen
         ...    
886     Montvila
887       Graham
888     Johnston
889         Behr
890       Dooley
Name: Name, Length: 891, dtype: object

In [19]:
train['Last_name'] = train.Name.apply(last_name)

In [20]:
train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_num,Name_length,Fare_ceil,Last_name
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1,23,8.0,Braund
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,51,72.0,Cumings
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,22,8.0,Heikkinen
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,44,54.0,Futrelle
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,1,24,9.0,Allen
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,1,16,9.0,Moran
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S,1,23,52.0,McCarthy
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S,1,30,22.0,Palsson
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S,0,49,12.0,Johnson
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C,0,35,31.0,Nasser


In [21]:
train['Lambda_name'] = train.Name.apply(lambda s:s.split(',')[0])

In [22]:
train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_num,Name_length,Fare_ceil,Last_name,Lambda_name
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,1,23,8.0,Braund,Braund
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,51,72.0,Cumings,Cumings
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,22,8.0,Heikkinen,Heikkinen
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,44,54.0,Futrelle,Futrelle
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,1,24,9.0,Allen,Allen
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q,1,16,9.0,Moran,Moran
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S,1,23,52.0,McCarthy,McCarthy
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S,1,30,22.0,Palsson,Palsson
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S,0,49,12.0,Johnson,Johnson
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C,0,35,31.0,Nasser,Nasser


In [23]:
def get_element(my_list, position):
    return my_list[position]

In [24]:
train.Name.str.split(',').apply(get_element, position=0)

0         Braund
1        Cumings
2      Heikkinen
3       Futrelle
4          Allen
         ...    
886     Montvila
887       Graham
888     Johnston
889         Behr
890       Dooley
Name: Name, Length: 891, dtype: object

In [26]:
train.Name.str.split(',').apply(lambda x: x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

In [27]:
drinks = pd.read_csv('./data/drinks.csv')

In [28]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [29]:
drinks.loc[:, 'beer_servings': 'wine_servings'].apply(max, axis=0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [30]:
drinks.loc[:, 'beer_servings': 'wine_servings'].apply(max, axis=1)

0        0
1      132
2       25
3      312
4      217
      ... 
188    333
189    111
190      6
191     32
192     64
Length: 193, dtype: int64

In [35]:
drinks.loc[:, 'beer_servings': 'wine_servings'].idxmax(axis=1).head()

0      beer_servings
1    spirit_servings
2      beer_servings
3      wine_servings
4      beer_servings
dtype: object

In [36]:
drinks.loc[:, 'beer_servings': 'wine_servings'].idxmax(axis=0).head()

beer_servings      117
spirit_servings     68
wine_servings       61
dtype: int64

In [37]:
drinks.loc[:, 'beer_servings': 'wine_servings'].applymap(float)

  drinks.loc[:, 'beer_servings': 'wine_servings'].applymap(float)


Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
...,...,...,...
188,333.0,100.0,3.0
189,111.0,2.0,1.0
190,6.0,0.0,0.0
191,32.0,19.0,4.0


In [38]:
drinks.loc[:, 'beer_servings': 'wine_servings'].map(float)

Unnamed: 0,beer_servings,spirit_servings,wine_servings
0,0.0,0.0,0.0
1,89.0,132.0,54.0
2,25.0,0.0,14.0
3,245.0,138.0,312.0
4,217.0,57.0,45.0
...,...,...,...
188,333.0,100.0,3.0
189,111.0,2.0,1.0
190,6.0,0.0,0.0
191,32.0,19.0,4.0


In [43]:
drinks.loc[:, 'beer_servings': 'wine_servings'] = drinks.loc[:, 'beer_servings': 'wine_servings'].map(float)

In [44]:
drinks.head(10)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa
5,Antigua & Barbuda,102,128,45,4.9,North America
6,Argentina,193,25,221,8.3,South America
7,Armenia,21,179,11,3.8,Europe
8,Australia,261,72,212,10.4,Oceania
9,Austria,279,75,191,9.7,Europe


In [45]:
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [42]:
drinks.loc[:, 'beer_servings': 'wine_servings'].map(float).dtypes

beer_servings      float64
spirit_servings    float64
wine_servings      float64
dtype: object

In [46]:
drinks.loc[:, 'beer_servings': 'wine_servings'] = drinks.loc[:, 'beer_servings': 'wine_servings'].map(float)

In [75]:
df = pd.DataFrame([[1,2,3], [4,5,6]], columns=['x','y','z'], index=['a','b'])

In [76]:
df

Unnamed: 0,x,y,z
a,1,2,3
b,4,5,6


In [77]:
df.loc[:, 'x':'z'] = df.loc[:, 'x':'z'].map(float)

In [78]:
df

Unnamed: 0,x,y,z
a,1,2,3
b,4,5,6


In [79]:
df.loc[:, ['x', 'y','z']] = df.loc[:, 'x':'z'].map(float)

In [80]:
df

Unnamed: 0,x,y,z
a,1,2,3
b,4,5,6


In [81]:
df.loc[:, 'x':'z']

Unnamed: 0,x,y,z
a,1,2,3
b,4,5,6


In [69]:
df.loc[:, 'x':'z'].map(float)

Unnamed: 0,x,y,z
a,1.0,2.0,3.0
b,4.0,5.0,6.0


In [70]:
df.loc[:,'x':'z'] = 100

In [60]:
df

Unnamed: 0,x,y,z
a,100,100,100
b,100,100,100


In [71]:
df.loc[:, 'x':'z'] = df.loc[:, 'x':'z'].applymap(float)

  df.loc[:, 'x':'z'] = df.loc[:, 'x':'z'].applymap(float)


In [72]:
df

Unnamed: 0,x,y,z
a,100,100,100
b,100,100,100
