# Prédiction de la Survie des Passagers du Titanic

## Introduction

### Ce notebook utilise PyCaret pour entraîner et comparer plusieurs modèles de classification afin de prédire la survie des passagers du Titanic.


## Importation des bibliothèques

In [2]:
import pandas as pd
from pycaret.classification import *

## Chargement des données prétraitées


In [3]:
file_path = 'data/titanic.csv'  
data = pd.read_csv(file_path)

In [4]:
# Prétraitement des données
data['Age'].fillna(data['Age'].median(), inplace=True)
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,28.0,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [5]:
data.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
data

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.2500,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.9250,S
3,4,1,1,female,35.0,1,0,53.1000,S
4,5,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
886,887,0,2,male,27.0,0,0,13.0000,S
887,888,1,1,female,19.0,0,0,30.0000,S
888,889,0,3,female,28.0,1,2,23.4500,S
889,890,1,1,male,26.0,0,0,30.0000,C


In [6]:
data = pd.get_dummies(data, columns=['Sex', 'Embarked'], drop_first=True)
data

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,1,0,3,22.0,1,0,7.2500,True,False,True
1,2,1,1,38.0,1,0,71.2833,False,False,False
2,3,1,3,26.0,0,0,7.9250,False,False,True
3,4,1,1,35.0,1,0,53.1000,False,False,True
4,5,0,3,35.0,0,0,8.0500,True,False,True
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,27.0,0,0,13.0000,True,False,True
887,888,1,1,19.0,0,0,30.0000,False,False,True
888,889,0,3,28.0,1,2,23.4500,False,False,True
889,890,1,1,26.0,0,0,30.0000,True,False,False


## Configuration de l'environnement PyCaret

In [8]:
clf1 = setup(data, target='Survived', session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Survived
2,Target type,Binary
3,Original data shape,"(891, 10)"
4,Transformed data shape,"(891, 10)"
5,Transformed train set shape,"(623, 10)"
6,Transformed test set shape,"(268, 10)"
7,Numeric features,6
8,Preprocess,True
9,Imputation type,simple


## Comparaison des modèles


In [9]:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8169,0.8345,0.7071,0.7943,0.7444,0.6034,0.6088,0.058
rf,Random Forest Classifier,0.8105,0.8419,0.6986,0.7917,0.7367,0.5903,0.5977,0.051
ada,Ada Boost Classifier,0.7993,0.8285,0.7408,0.7429,0.7371,0.5756,0.5801,0.047
lr,Logistic Regression,0.7976,0.8514,0.7114,0.7518,0.7264,0.5669,0.5716,0.641
lightgbm,Light Gradient Boosting Machine,0.796,0.8232,0.6779,0.7747,0.7177,0.56,0.5673,0.141
et,Extra Trees Classifier,0.7945,0.8337,0.6904,0.7549,0.7181,0.5575,0.5613,0.063
ridge,Ridge Classifier,0.7912,0.8511,0.6946,0.746,0.7163,0.552,0.5555,0.008
qda,Quadratic Discriminant Analysis,0.7896,0.8399,0.7111,0.7349,0.718,0.5511,0.5559,0.026
nb,Naive Bayes,0.7864,0.822,0.7237,0.7223,0.7189,0.5472,0.5515,0.008
lda,Linear Discriminant Analysis,0.7864,0.8508,0.6904,0.7383,0.7106,0.5422,0.5454,0.011


## Conclusion

In [14]:
print(f"Le modèle le plus performant pour prédire la survie des passagers du Titanic est {best_model.__class__.__name__}.")

Le modèle le plus performant pour prédire la survie des passagers du Titanic est GradientBoostingClassifier.
