In [1]:
import pandas as pd

ufos = pd.read_csv('../data/nuforc_reports.csv')
titanic = pd.read_csv('../data/titanic.csv')
titanic['age'] = titanic['age'].replace(['?'], [None]).astype(float)
titanic['fare'] = titanic['fare'].replace(['?'], [None]).astype(float)

In [2]:
def years_to_days(yrs):
    return yrs*365

titanic['age'].apply(years_to_days)

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

In [3]:
titanic['age'] * 365

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

In [6]:
def get_age_group(age):
    if age < 2:
        return 'infant'
    elif age < 12:
        return 'child'
    elif age < 18:
        return 'teen'
    elif age < 50:
        return 'adult'
    else:
        return 'senior'

In [7]:
titanic['age'].apply(get_age_group)

0        adult
1       infant
2        child
3        adult
4        adult
         ...  
1304      teen
1305    senior
1306     adult
1307     adult
1308     adult
Name: age, Length: 1309, dtype: object

In [8]:
titanic['age_group'] = titanic['age'].apply(get_age_group)

In [9]:
titanic['age_group'].value_counts()

adult     782
senior    373
child      69
teen       63
infant     22
Name: age_group, dtype: int64

In [10]:
titanic.groupby('age_group').survived.mean()

age_group
adult     0.386189
child     0.492754
infant    0.772727
senior    0.313673
teen      0.476190
Name: survived, dtype: float64

In [16]:
titanic.groupby(['sex', 'age_group']).survived.mean()

sex     age_group
female  adult        0.748201
        child        0.515152
        infant       0.888889
        senior       0.698276
        teen         0.833333
male    adult        0.186508
        child        0.472222
        infant       0.692308
        senior       0.140078
        teen         0.151515
Name: survived, dtype: float64

In [18]:
titanic['fare'].apply(lambda x: f'${x*24}')

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

In [19]:
def get_range(s):
    return s.max() - s.min()

In [20]:
df = titanic[['pclass', 'survived', 'age', 'fare']]

In [21]:
df.apply(get_range)

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

In [22]:
df.apply(get_range, axis=0) #Uses columns

pclass        2.0000
survived      1.0000
age          79.8333
fare        512.3292
dtype: float64

In [23]:
df.apply(get_range, axis=1) #Uses rows

0       210.3375
1       150.6333
2       151.5500
3       151.5500
4       151.5500
          ...   
1304     14.5000
1305     14.4542
1306     26.5000
1307     27.0000
1308     29.0000
Length: 1309, dtype: float64

In [26]:
titanic['pclass']

0       1
1       1
2       1
3       1
4       1
       ..
1304    3
1305    3
1306    3
1307    3
1308    3
Name: pclass, Length: 1309, dtype: int64

In [25]:
titanic['pclass'].map({1: '1st', 2: '2nd', 3: '3rd'})

0       1st
1       1st
2       1st
3       1st
4       1st
       ... 
1304    3rd
1305    3rd
1306    3rd
1307    3rd
1308    3rd
Name: pclass, Length: 1309, dtype: object

In [27]:
titanic[['name', 'sex', 'age_group']].applymap(len)

Unnamed: 0,name,sex,age_group
0,29,6,5
1,30,4,6
2,28,6,5
3,36,4,5
4,47,6,5
...,...,...,...
1304,20,6,4
1305,21,6,6
1306,25,4,5
1307,19,4,5
