# Working with Titanic Survival data

* pclass -- The passenger's cabin class from 1 to 3 where 1 was the highest class
* survived -- 1 if the passenger survived, and 0 if they did not.
* sex -- The passenger's gender
* age -- The passenger's age
* fare -- The amount the passenger paid for their ticket
* embarked -- Either C, Q, or S, to indicate which port the passenger boarded the ship from.

In [1]:
import pandas as pd
import numpy as np

titanic_survival = pd.read_csv("data/titanic_survival.csv")

In [2]:
age_is_null = pd.isnull(titanic_survival["age"])

age_isnt_null = [not x for x in age_is_null]

age_without_null = titanic_survival["age"][age_isnt_null]

correct_mean_age = sum(age_without_null)/len(age_without_null)

In [3]:
print(correct_mean_age)

29.8811345124283


In [4]:
correct_mean_age = titanic_survival["age"].mean()

correct_mean_fare = titanic_survival["fare"].mean()

is_fare_null = titanic_survival["fare"].isnull()

fares = titanic_survival["fare"][is_fare_null == False]

correct_mean_fare2 = sum(fares)/len(fares)

In [5]:
print(titanic_survival.loc[0:10])
print(titanic_survival.iloc[0:10])

    pclass  survived                                             name     sex  \
0      1.0       1.0                    Allen, Miss. Elisabeth Walton  female   
1      1.0       1.0                   Allison, Master. Hudson Trevor    male   
2      1.0       0.0                     Allison, Miss. Helen Loraine  female   
3      1.0       0.0             Allison, Mr. Hudson Joshua Creighton    male   
4      1.0       0.0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   
5      1.0       1.0                              Anderson, Mr. Harry    male   
6      1.0       1.0                Andrews, Miss. Kornelia Theodosia  female   
7      1.0       0.0                           Andrews, Mr. Thomas Jr    male   
8      1.0       1.0    Appleton, Mrs. Edward Dale (Charlotte Lamson)  female   
9      1.0       0.0                          Artagaveytia, Mr. Ramon    male   
10     1.0       0.0                           Astor, Col. John Jacob    male   

        age  sibsp  parch  

In [6]:
def which_class(row):
    pclass = row['pclass']
    if pd.isnull(pclass):
        return "Unknown"
    elif pclass == 1:
        return "First Class"
    elif pclass == 2:
        return "Second Class"
    else:
        return "Third Class"

classes = titanic_survival.apply(which_class, axis=1)

In [7]:
print(type(classes))

<class 'pandas.core.series.Series'>


In [8]:
def is_minor(row):
    age = row["age"]
    if pd.isnull(age):
        return "unknown"
    elif age < 18:
        return "minor"
    else:
        return "adult"
    
    
    
age_labels = titanic_survival.apply(is_minor, axis="columns")



In [9]:
titanic_survival["age_labels"] = age_labels

In [10]:
print(titanic_survival.iloc[:10])

   pclass  survived                                             name     sex  \
0     1.0       1.0                    Allen, Miss. Elisabeth Walton  female   
1     1.0       1.0                   Allison, Master. Hudson Trevor    male   
2     1.0       0.0                     Allison, Miss. Helen Loraine  female   
3     1.0       0.0             Allison, Mr. Hudson Joshua Creighton    male   
4     1.0       0.0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   
5     1.0       1.0                              Anderson, Mr. Harry    male   
6     1.0       1.0                Andrews, Miss. Kornelia Theodosia  female   
7     1.0       0.0                           Andrews, Mr. Thomas Jr    male   
8     1.0       1.0    Appleton, Mrs. Edward Dale (Charlotte Lamson)  female   
9     1.0       0.0                          Artagaveytia, Mr. Ramon    male   

       age  sibsp  parch    ticket      fare    cabin embarked boat   body  \
0  29.0000    0.0    0.0     24160  211.3

In [20]:
pt = titanic_survival.pivot_table()

ValueError: No group keys passed!

In [19]:
print(pt)

                  age  survived
age_labels                     
adult       33.468610  0.387892
minor        9.101732  0.525974
unknown           NaN  0.277567
