In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [6]:
test_df.head

<bound method NDFrame.head of      PassengerId  Pclass                                          Name  \
0            892       3                              Kelly, Mr. James   
1            893       3              Wilkes, Mrs. James (Ellen Needs)   
2            894       2                     Myles, Mr. Thomas Francis   
3            895       3                              Wirz, Mr. Albert   
4            896       3  Hirvonen, Mrs. Alexander (Helga E Lindqvist)   
..           ...     ...                                           ...   
413         1305       3                            Spector, Mr. Woolf   
414         1306       1                  Oliva y Ocana, Dona. Fermina   
415         1307       3                  Saether, Mr. Simon Sivertsen   
416         1308       3                           Ware, Mr. Frederick   
417         1309       3                      Peter, Master. Michael J   

        Sex   Age  SibSp  Parch              Ticket      Fare Cabin Embarked  
0 

In [7]:
train_df.head

<bound method NDFrame.head of      PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                     

In [8]:
train_df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [32]:
train_df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [33]:
test_df.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [34]:
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].mean())
test_df['Age'] = test_df['Age'].fillna(test_df['Age'].mean())
test_df['Fare'] = test_df['Fare'].fillna(test_df['Fare'].mean())

In [35]:
train_df = train_df.drop(['Cabin','Name','Ticket', 'PassengerId'], axis = 1)
test_df = test_df.drop(['Cabin','Name','Ticket', 'PassengerId'], axis = 1)

In [36]:
train_df.head

<bound method NDFrame.head of      Survived  Pclass     Sex        Age  SibSp  Parch     Fare Embarked
0           0       3    male  22.000000      1      0   7.2500        S
1           1       1  female  38.000000      1      0  71.2833        C
2           1       3  female  26.000000      0      0   7.9250        S
3           1       1  female  35.000000      1      0  53.1000        S
4           0       3    male  35.000000      0      0   8.0500        S
..        ...     ...     ...        ...    ...    ...      ...      ...
886         0       2    male  27.000000      0      0  13.0000        S
887         1       1  female  19.000000      0      0  30.0000        S
888         0       3  female  29.699118      1      2  23.4500        S
889         1       1    male  26.000000      0      0  30.0000        C
890         0       3    male  32.000000      0      0   7.7500        Q

[891 rows x 8 columns]>

In [37]:
train_df = pd.get_dummies(train_df, columns=['Sex', 'Embarked', 'Pclass'])
test_df = pd.get_dummies(test_df, columns=['Sex', 'Embarked', 'Pclass'])

In [38]:
train_df.head

<bound method NDFrame.head of      Survived        Age  SibSp  Parch     Fare  Sex_female  Sex_male  \
0           0  22.000000      1      0   7.2500           0         1   
1           1  38.000000      1      0  71.2833           1         0   
2           1  26.000000      0      0   7.9250           1         0   
3           1  35.000000      1      0  53.1000           1         0   
4           0  35.000000      0      0   8.0500           0         1   
..        ...        ...    ...    ...      ...         ...       ...   
886         0  27.000000      0      0  13.0000           0         1   
887         1  19.000000      0      0  30.0000           1         0   
888         0  29.699118      1      2  23.4500           1         0   
889         1  26.000000      0      0  30.0000           0         1   
890         0  32.000000      0      0   7.7500           0         1   

     Embarked_C  Embarked_Q  Embarked_S  Pclass_1  Pclass_2  Pclass_3  
0             0      

In [39]:
test_df.head

<bound method NDFrame.head of           Age  SibSp  Parch      Fare  Sex_female  Sex_male  Embarked_C  \
0    34.50000      0      0    7.8292           0         1           0   
1    47.00000      1      0    7.0000           1         0           0   
2    62.00000      0      0    9.6875           0         1           0   
3    27.00000      0      0    8.6625           0         1           0   
4    22.00000      1      1   12.2875           1         0           0   
..        ...    ...    ...       ...         ...       ...         ...   
413  30.27259      0      0    8.0500           0         1           0   
414  39.00000      0      0  108.9000           1         0           1   
415  38.50000      0      0    7.2500           0         1           0   
416  30.27259      0      0    8.0500           0         1           0   
417  30.27259      1      1   22.3583           0         1           1   

     Embarked_Q  Embarked_S  Pclass_1  Pclass_2  Pclass_3  
0        

In [40]:
gnb_model = GaussianNB()
gnb_model.fit(X, y)
print("Gaussian Naive Bayes:", gnb_model.score(X, y).round(3))

Gaussian Naive Bayes: 0.679


In [42]:
gnb_model.fit(X, y)
gnb_pred = gnb_model.predict(X)

print('Accuracy Metrics for Naive Bayes:\n')
print(accuracy_score(y, gnb_pred).round(5), '\n')
print(confusion_matrix(y, gnb_pred), '\n')
print(classification_report(y, gnb_pred))

Accuracy Metrics for Naive Bayes:

0.67901 

[[469  80]
 [206 136]] 

              precision    recall  f1-score   support

           0       0.69      0.85      0.77       549
           1       0.63      0.40      0.49       342

    accuracy                           0.68       891
   macro avg       0.66      0.63      0.63       891
weighted avg       0.67      0.68      0.66       891

