# Titanic Survival Prediction using Naive Bayes

### Importing all libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

### Loading the dataset

In [2]:
data = pd.read_csv("titanicsurvival.csv")

In [3]:
data

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.2500,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.9250,1
3,1,female,35.0,53.1000,1
4,3,male,35.0,8.0500,0
...,...,...,...,...,...
886,2,male,27.0,13.0000,0
887,1,female,19.0,30.0000,1
888,3,female,,23.4500,0
889,1,male,26.0,30.0000,1


### Summarising the dataset

In [4]:
data.shape

(891, 5)

In [5]:
data.columns

Index(['Pclass', 'Sex', 'Age', 'Fare', 'Survived'], dtype='object')

In [6]:
data.head(10)

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.25,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.925,1
3,1,female,35.0,53.1,1
4,3,male,35.0,8.05,0
5,3,male,,8.4583,0
6,1,male,54.0,51.8625,0
7,3,male,2.0,21.075,0
8,3,female,27.0,11.1333,1
9,2,female,14.0,30.0708,1


In [7]:
data.tail(10)

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
881,3,male,33.0,7.8958,0
882,3,female,22.0,10.5167,0
883,2,male,28.0,10.5,0
884,3,male,25.0,7.05,0
885,3,female,39.0,29.125,0
886,2,male,27.0,13.0,0
887,1,female,19.0,30.0,1
888,3,female,,23.45,0
889,1,male,26.0,30.0,1
890,3,male,32.0,7.75,0


### Data Cleaning

In [8]:
data[data==0].count()

Pclass        0
Sex           0
Age           0
Fare         15
Survived    549
dtype: int64

In [9]:
data.isna().sum()

Pclass        0
Sex           0
Age         177
Fare          0
Survived      0
dtype: int64

In [10]:
age=round(data.Age.mean())
age

30

In [11]:
data=data.fillna(age)
data

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.2500,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.9250,1
3,1,female,35.0,53.1000,1
4,3,male,35.0,8.0500,0
...,...,...,...,...,...
886,2,male,27.0,13.0000,0
887,1,female,19.0,30.0000,1
888,3,female,30.0,23.4500,0
889,1,male,26.0,30.0000,1


In [12]:
data.isna().sum()

Pclass      0
Sex         0
Age         0
Fare        0
Survived    0
dtype: int64

In [13]:
data.replace(["male","female"],[1,0],inplace=True)

In [14]:
data

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,1,22.0,7.2500,0
1,1,0,38.0,71.2833,1
2,3,0,26.0,7.9250,1
3,1,0,35.0,53.1000,1
4,3,1,35.0,8.0500,0
...,...,...,...,...,...
886,2,1,27.0,13.0000,0
887,1,0,19.0,30.0000,1
888,3,0,30.0,23.4500,0
889,1,1,26.0,30.0000,1


### Dividing Dataset Into X, Y

In [15]:
X=data.iloc[:,:-1]
X

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,1,22.0,7.2500
1,1,0,38.0,71.2833
2,3,0,26.0,7.9250
3,1,0,35.0,53.1000
4,3,1,35.0,8.0500
...,...,...,...,...
886,2,1,27.0,13.0000
887,1,0,19.0,30.0000
888,3,0,30.0,23.4500
889,1,1,26.0,30.0000


In [16]:
Y=data.iloc[:,-1]
pd.DataFrame(Y)

Unnamed: 0,Survived
0,0
1,1
2,1
3,1
4,0
...,...
886,0
887,1
888,0
889,1


### Splitting the dataset

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)

In [19]:
x_train

Unnamed: 0,Pclass,Sex,Age,Fare
105,3,1,28.0,7.8958
68,3,0,17.0,7.9250
253,3,1,30.0,16.1000
320,3,1,22.0,7.2500
706,2,0,45.0,13.5000
...,...,...,...,...
835,1,0,39.0,83.1583
192,3,0,19.0,7.8542
629,3,1,30.0,7.7333
559,3,0,36.0,17.4000


In [20]:
x_test

Unnamed: 0,Pclass,Sex,Age,Fare
495,3,1,30.0,14.4583
648,3,1,30.0,7.5500
278,3,1,7.0,29.1250
31,1,0,30.0,146.5208
255,3,0,29.0,15.2458
...,...,...,...,...
167,3,0,45.0,27.9000
306,1,0,30.0,110.8833
379,3,1,19.0,7.7750
742,1,0,21.0,262.3750


In [21]:
pd.DataFrame(y_train)


Unnamed: 0,Survived
105,0
68,1
253,0
320,0
706,1
...,...
835,1
192,1
629,0
559,1


In [22]:
pd.DataFrame(y_test)

Unnamed: 0,Survived
495,0
648,0
278,0
31,1
255,1
...,...
167,0
306,1
379,0
742,1


### Training the model

In [23]:
from sklearn.naive_bayes import GaussianNB

In [24]:
model = GaussianNB()

In [25]:
model.fit(x_train,y_train)

### Predicting all records

In [26]:
y_pred=model.predict(x_test)

In [27]:
y_pred

array([0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 1])

In [28]:
print(np.column_stack((y_pred,y_test)))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

### Predicting for specific records

In [29]:
pclassno=int(input("Enter the Passenger Class No.(1-3) : "))
age=int(input("Enter the age : "))
sex=int(input("Enter Age (1-Male,0-Female) : "))
fare=float(input("Enter Fare : "))
newperson=[[pclassno,sex,age,fare]]
result=model.predict(newperson)
print(result)

if(result==1):
    print("Person might be Survive")
else:
    print("person might not be survived")

Enter the Passenger Class No.(1-3) :  2
Enter the age :  34
Enter Age (1-Male,0-Female) :  1
Enter Fare :  78.23


[0]
person might not be survived


### Evaluation

In [30]:
from sklearn.metrics import accuracy_score,precision_score,recall_score

In [31]:
accu=accuracy_score(y_pred,y_test)*100
print("Accuracy of model is : ",accu,"%")

Accuracy of model is :  77.57847533632287 %


In [34]:
precision=precision_score(y_pred,y_test)
print("Precision Score of model is : ",precision)

Precision Score of model is :  0.75


In [35]:
recall=recall_score(y_pred,y_test)
print("Recall Score of model is : ",recall)

Recall Score of model is :  0.6847826086956522
