In [1]:
import pandas as pd

titanic_survival = pd.read_csv("titanic-survival.csv")
titanic_survival.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1.0,1.0,"Allen, Miss. Elisabeth Walton",female,29.0,0.0,0.0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1.0,1.0,"Allison, Master. Hudson Trevor",male,0.9167,1.0,2.0,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1.0,0.0,"Allison, Miss. Helen Loraine",female,2.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1.0,0.0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1.0,2.0,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1.0,0.0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


#### Prix moyen des billes par classe

In [2]:
import numpy as np
passenger_class_fares = titanic_survival.pivot_table(index="pclass", values="fare", aggfunc=np.mean)

In [3]:
passenger_class_fares

Unnamed: 0_level_0,fare
pclass,Unnamed: 1_level_1
1.0,87.508992
2.0,21.179196
3.0,13.302889


#### Calcul de la moyenne d'age par classe

In [5]:
passenger_age = titanic_survival.pivot_table(index="pclass", values="age", aggfunc=np.mean)
print(passenger_age)

              age
pclass           
1.0     39.159918
2.0     29.506705
3.0     24.816367


#### Pourcentage de survie par classe

In [6]:
passenger_survive = titanic_survival.pivot_table(index="pclass", values="survived")
passenger_survive

Unnamed: 0_level_0,survived
pclass,Unnamed: 1_level_1
1.0,0.619195
2.0,0.429603
3.0,0.255289


#### Statistiques nombre survivant et dépense par port d'embarquation

In [8]:
port_stats = titanic_survival.pivot_table(index="embarked", values=["fare", "survived"], aggfunc=np.sum)
print(port_stats)

                fare  survived
embarked                      
C         16830.7922     150.0
Q          1526.3085      44.0
S         25033.3862     304.0


#### Suppression des lignes avec valeur manquantes pour les colonnes 'age' et 'sex'

In [14]:
new_titanic_survival = titanic_survival.dropna(axis=0, subset=["age", "sex"])
new_titanic_survival.shape

(1046, 14)

#### Reindexation du daframe new_titanic_survival

In [15]:
titanic_reindexed = new_titanic_survival.reset_index(drop=True)
titanic_reindexed.iloc[0:5, 0:3]

Unnamed: 0,pclass,survived,name
0,1.0,1.0,"Allen, Miss. Elisabeth Walton"
1,1.0,1.0,"Allison, Master. Hudson Trevor"
2,1.0,0.0,"Allison, Miss. Helen Loraine"
3,1.0,0.0,"Allison, Mr. Hudson Joshua Creighton"
4,1.0,0.0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)"


### Ajout de la colonne age_labels

In [16]:
def age_label(row):
    age = row["age"]
    if pd.isnull(age):
        return "Unknow"
    elif age < 18:
        return "Minor"
    else:
        return "Adult"

age_labels = new_titanic_survival.apply(age_label, axis=1)
titanic_survival["age_labels"] = age_labels
titanic_survival.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,age_labels
0,1.0,1.0,"Allen, Miss. Elisabeth Walton",female,29.0,0.0,0.0,24160,211.3375,B5,S,2.0,,"St Louis, MO",Adult
1,1.0,1.0,"Allison, Master. Hudson Trevor",male,0.9167,1.0,2.0,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON",Minor
2,1.0,0.0,"Allison, Miss. Helen Loraine",female,2.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",Minor
3,1.0,0.0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1.0,2.0,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON",Adult
4,1.0,0.0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",Adult


#### Pourcentage de survie par groupe d'âge

In [18]:
titanic_survival["age_labels"] = age_labels
titanic_survival.head()
age_group_survival = new_titanic_survival.pivot_table(index="age_labels", values="survived")
age_group_survival

Unnamed: 0_level_0,survived
age_labels,Unnamed: 1_level_1
Adult,0.387892
Minor,0.525974


#### Pourcentage de Survie par sexe

In [20]:
sex_group_survival = new_titanic_survival.pivot_table(index="sex", values="survived")
sex_group_survival

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.752577
male,0.205167
