# Titanic in PyCaret (Classification)

In [None]:
import pandas as pd
from pycaret.classification import *

## Data to be used

In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')
train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## Data setup

In [3]:
ignores = ['PassengerId', 'Name', 'Ticket', 'Cabin']

In [4]:
exp = setup(data=train, target='Survived', ignore_features=ignores)

Unnamed: 0,Description,Value
0,session_id,6250
1,Target,Survived
2,Target Type,Binary
3,Label Encoded,
4,Original Data,"(891, 12)"
5,Missing Values,True
6,Numeric Features,2
7,Categorical Features,5
8,Ordinal Features,False
9,High Cardinality Features,False


## Compare models

In [5]:
best_model = compare_models(sort='Accuracy')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8249,0.8454,0.6801,0.8314,0.7414,0.6123,0.6246,0.013
ada,Ada Boost Classifier,0.8106,0.8345,0.7308,0.7597,0.7431,0.5933,0.5952,0.014
rf,Random Forest Classifier,0.8089,0.8394,0.7053,0.7698,0.7332,0.585,0.589,0.049
lightgbm,Light Gradient Boosting Machine,0.8087,0.8519,0.7089,0.7662,0.7334,0.585,0.5888,0.01
lr,Logistic Regression,0.8042,0.8511,0.6928,0.767,0.7258,0.5743,0.578,0.274
lda,Linear Discriminant Analysis,0.8009,0.8509,0.6799,0.7661,0.7175,0.5652,0.5698,0.004
ridge,Ridge Classifier,0.7993,0.0,0.6799,0.7633,0.716,0.5621,0.5669,0.003
et,Extra Trees Classifier,0.78,0.8055,0.6797,0.7207,0.6951,0.524,0.5281,0.046
dt,Decision Tree Classifier,0.7496,0.7378,0.6795,0.668,0.6695,0.4685,0.4725,0.005
knn,K Neighbors Classifier,0.6741,0.6966,0.4967,0.5778,0.5255,0.2832,0.2887,0.159


## Tune model

In [6]:
best_tuned = tune_model(best_model, optimize='Accuracy')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8889,0.9551,0.8333,0.8696,0.8511,0.7625,0.763
1,0.8095,0.8173,0.6667,0.8,0.7273,0.5828,0.5885
2,0.8571,0.8622,0.7917,0.8261,0.8085,0.6947,0.6951
3,0.7581,0.7837,0.6522,0.6818,0.6667,0.4769,0.4772
4,0.8065,0.7965,0.7391,0.7391,0.7391,0.5853,0.5853
5,0.7742,0.825,0.6087,0.7368,0.6667,0.4983,0.5035
6,0.871,0.8233,0.6522,1.0,0.7895,0.7023,0.7356
7,0.7903,0.8501,0.6957,0.7273,0.7111,0.5467,0.547
8,0.8226,0.892,0.6667,0.8421,0.7442,0.6112,0.6209
9,0.8226,0.9167,0.8333,0.7407,0.7843,0.6345,0.6377


## Ensemble Learning

### Bagging

In [7]:
best_bagging = ensemble_model(best_model, method='Bagging')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9048,0.9573,0.8333,0.9091,0.8696,0.7948,0.7967
1,0.7778,0.8205,0.5417,0.8125,0.65,0.4966,0.5185
2,0.873,0.8504,0.7917,0.8636,0.8261,0.7264,0.7281
3,0.7742,0.7781,0.6957,0.6957,0.6957,0.5162,0.5162
4,0.7903,0.8082,0.6957,0.7273,0.7111,0.5467,0.547
5,0.7742,0.806,0.4783,0.8462,0.6111,0.4688,0.5067
6,0.8065,0.8473,0.5652,0.8667,0.6842,0.5534,0.5797
7,0.8226,0.8997,0.6522,0.8333,0.7317,0.6021,0.6122
8,0.8548,0.8575,0.6667,0.9412,0.7805,0.6767,0.6992
9,0.8548,0.9331,0.875,0.7778,0.8235,0.701,0.7045


### Boosting

In [8]:
rf = create_model('rf')
boosted = ensemble_model(rf, method='Boosting')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8889,0.9135,0.7917,0.9048,0.8444,0.7586,0.7627
1,0.746,0.8579,0.5417,0.7222,0.619,0.4343,0.4445
2,0.8413,0.8472,0.7917,0.7917,0.7917,0.6635,0.6635
3,0.7419,0.7882,0.6087,0.6667,0.6364,0.437,0.4381
4,0.7903,0.7709,0.6957,0.7273,0.7111,0.5467,0.547
5,0.7742,0.7826,0.5652,0.7647,0.65,0.4888,0.501
6,0.7903,0.8239,0.6522,0.75,0.6977,0.5384,0.5415
7,0.8226,0.8417,0.7391,0.7727,0.7556,0.6164,0.6168
8,0.7903,0.8969,0.7917,0.7037,0.7451,0.5681,0.5709
9,0.8387,0.8958,0.875,0.75,0.8077,0.6702,0.6761


### Blending

In [9]:
blend = blend_models(estimator_list=compare_models(n_select=3))

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8889,0.9669,0.7917,0.9048,0.8444,0.7586,0.7627
1,0.746,0.8494,0.5,0.75,0.6,0.4247,0.4434
2,0.8571,0.8387,0.75,0.8571,0.8,0.6897,0.6934
3,0.7903,0.8094,0.6957,0.7273,0.7111,0.5467,0.547
4,0.7903,0.7843,0.6957,0.7273,0.7111,0.5467,0.547
5,0.7742,0.7871,0.5217,0.8,0.6316,0.479,0.5017
6,0.8065,0.8082,0.6087,0.8235,0.7,0.5618,0.5758
7,0.8226,0.8501,0.6522,0.8333,0.7317,0.6021,0.6122
8,0.8387,0.9013,0.6667,0.8889,0.7619,0.6437,0.6589
9,0.8548,0.9353,0.875,0.7778,0.8235,0.701,0.7045


## Evaluate model

In [10]:
evaluate_model(best_tuned)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…