# Titanic Challenge

Use the information on the passengers to predict which will survive. I'll be using the sklearn library with Logistic Regression, the Decision Tree classifier, and the Random Forest classifier. The dataset is available on Kaggle.

In [0]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
train, test = train_test_split(sns.load_dataset('titanic').drop(columns=['alive']), random_state=0)
target = 'survived'

print(train.head)

<bound method NDFrame.head of      survived  pclass     sex   age  ...  adult_male  deck  embark_town  alone
105         0       3    male  28.0  ...        True   NaN  Southampton   True
68          1       3  female  17.0  ...       False   NaN  Southampton  False
253         0       3    male  30.0  ...        True   NaN  Southampton  False
320         0       3    male  22.0  ...        True   NaN  Southampton   True
706         1       2  female  45.0  ...       False   NaN  Southampton   True
271         1       3    male  25.0  ...        True   NaN  Southampton   True
424         0       3    male  18.0  ...        True   NaN  Southampton  False
752         0       3    male  33.0  ...        True   NaN  Southampton   True
615         1       2  female  24.0  ...       False   NaN  Southampton  False
2           1       3  female  26.0  ...       False   NaN  Southampton   True
882         0       3  female  22.0  ...       False   NaN  Southampton   True
467         0       1 

In [0]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

#fill missing age values
train.age.fillna(train.age.mean(),inplace=True)
train.age.isnull().sum()

test.age.fillna(train.age.mean(),inplace=True)

#convert sex to integers
train['female'] = (train.sex =='female').astype(int)
test['female'] = (test.sex =='female').astype(int)

print(train.head)

<bound method NDFrame.head of      survived  pclass     sex   age  ...  deck  embark_town  alone female
105         0       3    male  28.0  ...   NaN  Southampton   True      0
68          1       3  female  17.0  ...   NaN  Southampton  False      1
253         0       3    male  30.0  ...   NaN  Southampton  False      0
320         0       3    male  22.0  ...   NaN  Southampton   True      0
706         1       2  female  45.0  ...   NaN  Southampton   True      1
271         1       3    male  25.0  ...   NaN  Southampton   True      0
424         0       3    male  18.0  ...   NaN  Southampton  False      0
752         0       3    male  33.0  ...   NaN  Southampton   True      0
615         1       2  female  24.0  ...   NaN  Southampton  False      1
2           1       3  female  26.0  ...   NaN  Southampton   True      1
882         0       3  female  22.0  ...   NaN  Southampton   True      1
467         0       1    male  56.0  ...   NaN  Southampton   True      0
403     

In [0]:
from sklearn.metrics import mean_absolute_error

features = ['adult_male','pclass','age']

#store models

models = [('Logistic Regression',LogisticRegression()),
         ('Decision Tree Classifier',DecisionTreeClassifier(max_depth=2)),
         ('Random Forest Classifier',RandomForestClassifier())]

#define compare models function

def compare_models():
  
  high_score = 0
  best_model = []
    
  for name, model in models:

    #display name
    print(name, '\n')
  
    #fit model
    model.fit(train[features],train[target])
  
    #calculate mae for training data
    ytrue= train[target]
    ypred= model.predict(train[features])
    trainmae= mean_absolute_error(ytrue,ypred)
  
    #calculate mae for testing data
    y_true= test[target]
    y_pred= model.predict(test[features])
    testmae= mean_absolute_error(y_true,y_pred)
  
    #calculate recall score
    trainrecall = recall_score(ytrue,ypred)
    testrecall = recall_score(y_true,y_pred) 
  
    #calculate accuracy score
    trainaccuracy = accuracy_score(ytrue,ypred)
    testaccuracy = accuracy_score(y_true,y_pred)
    
    #identify best model
    if testaccuracy + trainaccuracy > high_score:
      high_score = (testaccuracy + trainaccuracy) / 2
      best_model = name
    
    
    #print results
    print('TrainMAE:',trainmae)
    print('TestMAE:',testmae)
    print('TrainRecall:',trainrecall)
    print('TestRecall:',testrecall)
    print('TrainAccuracy:',trainaccuracy)
    print('TestAccuracy:',testaccuracy)
    print('\n')
    
  #return the model with the highest average accuracy score  
  return (best_model, high_score)  

best_result = compare_models()
print(best_result)


Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1347305389221557
TestMAE: 0.23766816143497757
TrainRecall: 0.8217054263565892
TestRecall: 0.6547619047619048
TrainAccuracy: 0.8652694610778443
TestAccuracy: 0.7623318385650224


('Random Forest Classifier', 0.8138006498214334)




In [0]:
#Create all possible variable combos
  
  import itertools

  features = ['age', 'female', 'pclass','sibsp','parch','fare','alone']
  feature_combos = []
  for L in range(0, len(features)+1):
    for subset in itertools.combinations(features, L):
        feature_combos.append(subset)
  print(feature_combos)

[(), ('age',), ('female',), ('pclass',), ('sibsp',), ('parch',), ('fare',), ('alone',), ('age', 'female'), ('age', 'pclass'), ('age', 'sibsp'), ('age', 'parch'), ('age', 'fare'), ('age', 'alone'), ('female', 'pclass'), ('female', 'sibsp'), ('female', 'parch'), ('female', 'fare'), ('female', 'alone'), ('pclass', 'sibsp'), ('pclass', 'parch'), ('pclass', 'fare'), ('pclass', 'alone'), ('sibsp', 'parch'), ('sibsp', 'fare'), ('sibsp', 'alone'), ('parch', 'fare'), ('parch', 'alone'), ('fare', 'alone'), ('age', 'female', 'pclass'), ('age', 'female', 'sibsp'), ('age', 'female', 'parch'), ('age', 'female', 'fare'), ('age', 'female', 'alone'), ('age', 'pclass', 'sibsp'), ('age', 'pclass', 'parch'), ('age', 'pclass', 'fare'), ('age', 'pclass', 'alone'), ('age', 'sibsp', 'parch'), ('age', 'sibsp', 'fare'), ('age', 'sibsp', 'alone'), ('age', 'parch', 'fare'), ('age', 'parch', 'alone'), ('age', 'fare', 'alone'), ('female', 'pclass', 'sibsp'), ('female', 'pclass', 'parch'), ('female', 'pclass', 'fare

In [0]:
#Run all possible combinations of variables with all available models to find the highest test accuracy
#return the best model, variable combination, and corresponding score at the end of the output
def score_comparison():
  high_score = 0
  best_model = []
  best_combo = []
  for combo in feature_combos:
    print(combo)
    result = compare_models()
    if result[1] > high_score:
      high_score = result[1]
      best_model = result[0]
      best_combo = combo
   
  print(best_model,'using the variables',best_combo, 'has the highest average accuracy score of', high_score)

score_comparison()

()
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12724550898203593
TestMAE: 0.2062780269058296
TrainRecall: 0.7945736434108527
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7937219730941704


('age',)
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainA



TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.21076233183856502
TrainRecall: 0.7906976744186046
TestRecall: 0.6785714285714286
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7892376681614349


('sibsp',)
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21076233183856502
TrainRecall: 0.8255813953488372
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8712574850299402
TestAccurac



TrainMAE: 0.1317365269461078
TestMAE: 0.19730941704035873
TrainRecall: 0.8178294573643411
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.8026905829596412


('age', 'female')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.18385650224215247
TrainRecall: 0.7984496124031008
TestRecall: 0.7380952380952381
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.8161434977578476


('age', 'pclass')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccu



TrainMAE: 0.12874251497005987
TestMAE: 0.2062780269058296
TrainRecall: 0.8023255813953488
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7937219730941704


('age', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12574850299401197
TestMAE: 0.22869955156950672
TrainRecall: 0.810077519379845
TestRecall: 0.7023809523809523
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.7713004484304933


('age', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy:



TrainMAE: 0.1347305389221557
TestMAE: 0.20179372197309417
TrainRecall: 0.7790697674418605
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8652694610778443
TestAccuracy: 0.7982062780269058


('female', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1347305389221557
TestMAE: 0.21076233183856502
TrainRecall: 0.7829457364341085
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8652694610778443
TestAccuracy: 0.7892376681614349


('female', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAc



TrainMAE: 0.12724550898203593
TestMAE: 0.21076233183856502
TrainRecall: 0.8410852713178295
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7892376681614349


('pclass', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12574850299401197
TestMAE: 0.21076233183856502
TrainRecall: 0.8217054263565892
TestRecall: 0.7261904761904762
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.7892376681614349


('pclass', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainA



TrainMAE: 0.1317365269461078
TestMAE: 0.21076233183856502
TrainRecall: 0.7945736434108527
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7892376681614349


('sibsp', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.2062780269058296
TrainRecall: 0.8333333333333334
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7937219730941704


('sibsp', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccu



TrainMAE: 0.13323353293413173
TestMAE: 0.19730941704035873
TrainRecall: 0.8217054263565892
TestRecall: 0.75
TrainAccuracy: 0.8667664670658682
TestAccuracy: 0.8026905829596412


('fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21524663677130046
TrainRecall: 0.8217054263565892
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7847533632286996


('age', 'female', 'pclass')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccurac



TrainMAE: 0.12874251497005987
TestMAE: 0.19282511210762332
TrainRecall: 0.8178294573643411
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.8071748878923767


('age', 'female', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13323353293413173
TestMAE: 0.19730941704035873
TrainRecall: 0.8062015503875969
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8667664670658682
TestAccuracy: 0.8026905829596412


('age', 'female', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestReca




TestAccuracy: 0.8026905829596412


('age', 'pclass', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13323353293413173
TestMAE: 0.18385650224215247
TrainRecall: 0.7945736434108527
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8667664670658682
TestAccuracy: 0.8161434977578476


('age', 'pclass', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.197309417



TrainMAE: 0.12574850299401197
TestMAE: 0.21076233183856502
TrainRecall: 0.8294573643410853
TestRecall: 0.7023809523809523
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.7892376681614349


('age', 'sibsp', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12724550898203593
TestMAE: 0.21076233183856502
TrainRecall: 0.813953488372093
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7892376681614349


('age', 'parch', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 



TrainMAE: 0.12574850299401197
TestMAE: 0.19282511210762332
TrainRecall: 0.8217054263565892
TestRecall: 0.7142857142857143
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.8071748878923767


('female', 'pclass', 'sibsp')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.21524663677130046
TrainRecall: 0.810077519379845
TestRecall: 0.6666666666666666
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7847533632286996


('female', 'pclass', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
Test



TrainMAE: 0.12724550898203593
TestMAE: 0.20179372197309417
TrainRecall: 0.813953488372093
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7982062780269058


('female', 'sibsp', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21524663677130046
TrainRecall: 0.813953488372093
TestRecall: 0.6785714285714286
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7847533632286996


('female', 'sibsp', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRe



TrainMAE: 0.1317365269461078
TestMAE: 0.2062780269058296
TrainRecall: 0.810077519379845
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7937219730941704


('female', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.21524663677130046
TrainRecall: 0.8217054263565892
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7847533632286996


('female', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRec



TrainMAE: 0.13023952095808383
TestMAE: 0.20179372197309417
TrainRecall: 0.813953488372093
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7982062780269058


('pclass', 'sibsp', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.2062780269058296
TrainRecall: 0.8178294573643411
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7937219730941704


('pclass', 'parch', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRe



TrainMAE: 0.13023952095808383
TestMAE: 0.2062780269058296
TrainRecall: 0.8449612403100775
TestRecall: 0.75
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7937219730941704


('sibsp', 'parch', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.18385650224215247
TrainRecall: 0.8023255813953488
TestRecall: 0.7380952380952381
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.8161434977578476


('sibsp', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
Trai



TrainMAE: 0.13323353293413173
TestMAE: 0.20179372197309417
TrainRecall: 0.8217054263565892
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8667664670658682
TestAccuracy: 0.7982062780269058


('age', 'female', 'pclass', 'sibsp')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.17488789237668162
TrainRecall: 0.7868217054263565
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.8251121076233184


('age', 'female', 'pclass', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.74031



0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.19730941704035873
TrainRecall: 0.8333333333333334
TestRecall: 0.75
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.8026905829596412


('age', 'female', 'sibsp', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21076233183856502
TrainRecall: 0.8178294573643411
TestRecall: 0.7023809523809523
TrainAccuracy:




TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1347305389221557
TestMAE: 0.20179372197309417
TrainRecall: 0.7984496124031008
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8652694610778443
TestAccuracy: 0.7982062780269058


('age', 'female', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904




TestAccuracy: 0.7892376681614349


('age', 'pclass', 'sibsp', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12724550898203593
TestMAE: 0.21076233183856502
TrainRecall: 0.8294573643410853
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7892376681614349


('age', 'pclass', 'sibsp', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
Te



TrainMAE: 0.12724550898203593
TestMAE: 0.21524663677130046
TrainRecall: 0.7984496124031008
TestRecall: 0.6785714285714286
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7847533632286996


('age', 'pclass', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.21076233183856502
TrainRecall: 0.7945736434108527
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7892376681614349


('age', 'sibsp', 'parch', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.740310077



TrainMAE: 0.12874251497005987
TestMAE: 0.21076233183856502
TrainRecall: 0.8333333333333334
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7892376681614349


('age', 'parch', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.19730941704035873
TrainRecall: 0.8410852713178295
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.8026905829596412


('female', 'pclass', 'sibsp', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.740310



TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.2242152466367713
TrainRecall: 0.8217054263565892
TestRecall: 0.6785714285714286
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7757847533632287


('female', 'pclass', 'parch', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.19730941704035873
TrainRecall: 0.7829457364341085
TestRecall: 0.6785714285714286
TrainAccuracy: 0.86



TrainMAE: 0.13023952095808383
TestMAE: 0.21524663677130046
TrainRecall: 0.813953488372093
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7847533632286996


('female', 'sibsp', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.21076233183856502
TrainRecall: 0.7984496124031008
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7892376681614349


('female', 'sibsp', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.74031



TrainMAE: 0.13023952095808383
TestMAE: 0.23766816143497757
TrainRecall: 0.7984496124031008
TestRecall: 0.6666666666666666
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7623318385650224


('pclass', 'sibsp', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.21076233183856502
TrainRecall: 0.8294573643410853
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7892376681614349


('pclass', 'sibsp', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403



TrainMAE: 0.12574850299401197
TestMAE: 0.2062780269058296
TrainRecall: 0.813953488372093
TestRecall: 0.6904761904761905
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.7937219730941704


('age', 'female', 'pclass', 'sibsp', 'parch')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.19730941704035873
TrainRecall: 0.810077519379845
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.8026905829596412


('age', 'female', 'pclass', 'sibsp', 'fare')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRe



TrainMAE: 0.12724550898203593
TestMAE: 0.21524663677130046
TrainRecall: 0.8372093023255814
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8727544910179641
TestAccuracy: 0.7847533632286996


('age', 'female', 'pclass', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21973094170403587
TrainRecall: 0.8178294573643411
TestRecall: 0.7023809523809523
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7802690582959642


('age', 'female', 'pclass', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
Tr



TrainMAE: 0.13023952095808383
TestMAE: 0.21524663677130046
TrainRecall: 0.810077519379845
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.7847533632286996


('age', 'female', 'sibsp', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.1317365269461078
TestMAE: 0.2062780269058296
TrainRecall: 0.7984496124031008
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8682634730538922
TestAccuracy: 0.7937219730941704


('age', 'female', 'parch', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRec





TrainMAE: 0.12574850299401197
TestMAE: 0.22869955156950672
TrainRecall: 0.8178294573643411
TestRecall: 0.7023809523809523
TrainAccuracy: 0.874251497005988
TestAccuracy: 0.7713004484304933


('age', 'pclass', 'sibsp', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.13023952095808383
TestMAE: 0.19730941704035873
TrainRecall: 0.810077519379845
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8697604790419161
TestAccuracy: 0.8026905829596412


('age', 'pclass', 'parch', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
Train




TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.2062780269058296
TrainRecall: 0.8023255813953488
TestRecall: 0.7142857142857143
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7937219730941704


('female', 'pclass', 'sibsp', 'parch', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.21973094170403587
TrainRecall: 0.8372093023255814
TestReca



TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.22869955156950672
TrainRecall: 0.8372093023255814
TestRecall: 0.7261904761904762
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7713004484304933


('pclass', 'sibsp', 'parch', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380




TestAccuracy: 0.7757847533632287


('age', 'female', 'pclass', 'sibsp', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifier 

TrainMAE: 0.20059880239520958
TestMAE: 0.19730941704035873
TrainRecall: 0.5116279069767442
TestRecall: 0.4880952380952381
TrainAccuracy: 0.7994011976047904
TestAccuracy: 0.8026905829596412


Random Forest Classifier 

TrainMAE: 0.12874251497005987
TestMAE: 0.2062780269058296
TrainRecall: 0.8178294573643411
TestRecall: 0.6904761904761905
TrainAccuracy: 0.8712574850299402
TestAccuracy: 0.7937219730941704


('age', 'female', 'pclass', 'parch', 'fare', 'alone')
Logistic Regression 

TrainMAE: 0.20508982035928144
TestMAE: 0.22869955156950672
TrainRecall: 0.7403100775193798
TestRecall: 0.75
TrainAccuracy: 0.7949101796407185
TestAccuracy: 0.7713004484304933


Decision Tree Classifie



# Findings

The random forest classifier model was the best model to use for this analysis. This worked best with the age, female, pclass, and sibsp variables, which returned a train accuracy score of 0.868 and a test accuracy score of 0.825, giving the highest average of 0.847. This makes sense considering age, sex and class were likely to be large factors in deciding who would survive. Sibsp is the number of siblings/spouses aboard, which also makes a lot of sense since its likely that those with a lot of family would survive. 