# Titanic Model

**Objective**: use machine learning to create a model that predicts which passengers survived the Titanic 

**Results**:

- using Ada Boost Classifier model we found Accuracy = 0.7966; F1 = 0.7459

In [30]:
#import librarys
from pycaret.classification import setup, compare_models, create_model, tune_model, predict_model
import pandas as pd

In [6]:
#getting data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [20]:
#ML COM PYCARET
model_setup = setup(data = train,
                  target = 'Survived',
                  numeric_features = ['SibSp','Parch'],
                  train_size = 0.75,
                  normalize = True,
                  ignore_low_variance = True,
                  combine_rare_levels = True,
                  remove_multicollinearity = True,
                  feature_selection = True,
                  fold_strategy = 'stratifiedkfold',
                  fold = 10)

Unnamed: 0,Description,Value
0,session_id,4665
1,Target,Survived
2,Target Type,Binary
3,Label Encoded,
4,Original Data,"(891, 12)"
5,Missing Values,1
6,Numeric Features,5
7,Categorical Features,6
8,Ordinal Features,0
9,High Cardinality Features,0


In [23]:
#choosing the best ML model
model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.81,0.8516,0.6988,0.7826,0.7352,0.5884,0.5935,0.028
ada,Ada Boost Classifier,0.8041,0.8289,0.7577,0.7396,0.7459,0.5871,0.5899,0.053
gbc,Gradient Boosting Classifier,0.8039,0.846,0.6666,0.7908,0.7199,0.5713,0.5793,0.04
lr,Logistic Regression,0.8025,0.8477,0.6955,0.7717,0.726,0.5737,0.5797,0.419
ridge,Ridge Classifier,0.7965,0.0,0.7068,0.7489,0.7241,0.564,0.567,0.017
lda,Linear Discriminant Analysis,0.795,0.8473,0.7068,0.7471,0.7234,0.5615,0.5645,0.016
rf,Random Forest Classifier,0.7935,0.8446,0.6909,0.7534,0.718,0.5561,0.5597,0.17
knn,K Neighbors Classifier,0.7919,0.8214,0.6828,0.7523,0.7131,0.5509,0.5549,0.019
et,Extra Trees Classifier,0.7874,0.8209,0.6906,0.7401,0.7124,0.5445,0.5472,0.105
dt,Decision Tree Classifier,0.7651,0.7524,0.7063,0.6909,0.6966,0.5054,0.5074,0.018


In [26]:
#training the model
ada = create_model('ada')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8955,0.8657,0.84,0.875,0.8571,0.7748,0.7752
1,0.7313,0.839,0.72,0.6207,0.6667,0.4437,0.4472
2,0.7761,0.8152,0.76,0.6786,0.717,0.5328,0.5351
3,0.806,0.8644,0.8077,0.7241,0.7636,0.5999,0.6025
4,0.806,0.8007,0.6923,0.7826,0.7347,0.5827,0.5854
5,0.7463,0.7674,0.6538,0.68,0.6667,0.462,0.4622
6,0.8209,0.8738,0.8846,0.7188,0.7931,0.6382,0.6489
7,0.7164,0.7467,0.5385,0.6667,0.5957,0.3811,0.3863
8,0.9091,0.9249,0.88,0.88,0.88,0.8068,0.8068
9,0.8333,0.7907,0.8,0.7692,0.7843,0.6486,0.6489


In [28]:
#tunning model
ada_tunned = tune_model(ada, optimize = 'F1', n_iter = 15)
ada_tunned

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8657,0.8581,0.8,0.8333,0.8163,0.7105,0.7109
1,0.6866,0.8395,0.6,0.5769,0.5882,0.3354,0.3355
2,0.806,0.8224,0.76,0.7308,0.7451,0.5886,0.5889
3,0.8358,0.8846,0.8077,0.7778,0.7925,0.6567,0.6571
4,0.806,0.8302,0.6154,0.8421,0.7111,0.5703,0.5862
5,0.7313,0.7974,0.6154,0.6667,0.64,0.4263,0.4271
6,0.8209,0.848,0.8462,0.7333,0.7857,0.6332,0.638
7,0.7164,0.7631,0.5,0.6842,0.5778,0.372,0.3824
8,0.8788,0.9127,0.8,0.8696,0.8333,0.7384,0.7399
9,0.8182,0.8278,0.72,0.7826,0.75,0.6075,0.6088


AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.1,
                   n_estimators=240, random_state=4665)

In [31]:
#make predicitons
predictions = predict_model(ada_tunned, data=test)
predictions

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Label,Score
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,0.5199
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S,0,0.5148
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,0.5223
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0.5215
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,1,0.5035
...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S,0,0.5215
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C,1,0.5340
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S,0,0.5221
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S,0,0.5215
