 # #ML-Mini-4 : Titanic Survival Prediction using Naive Bayes

### 1 - Importing Libraries

In [1]:
import pandas as pd
import numpy as np

### 2 - Uploading the Dataset

In [2]:
dts = pd.read_csv("Titanic_Dataset.csv")
dts

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.2500,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.9250,1
3,1,female,35.0,53.1000,1
4,3,male,35.0,8.0500,0
...,...,...,...,...,...
886,2,male,27.0,13.0000,0
887,1,female,19.0,30.0000,1
888,3,female,,23.4500,0
889,1,male,26.0,30.0000,1


### 3 - Summarizing the Dataset

In [3]:
print(dts.shape)

(891, 5)


In [4]:
print(dts.head(5))

   Pclass     Sex   Age     Fare  Survived
0       3    male  22.0   7.2500         0
1       1  female  38.0  71.2833         1
2       3  female  26.0   7.9250         1
3       1  female  35.0  53.1000         1
4       3    male  35.0   8.0500         0


### 4 - Mapping Salary Data to Binary Value

In [5]:
gen_set = set(dts['Sex'])
dts['Sex'] = dts['Sex'].map({'male':0,'female':1}).astype(int)
print(dts.head(10))

   Pclass  Sex   Age     Fare  Survived
0       3    0  22.0   7.2500         0
1       1    1  38.0  71.2833         1
2       3    1  26.0   7.9250         1
3       1    1  35.0  53.1000         1
4       3    0  35.0   8.0500         0
5       3    0   NaN   8.4583         0
6       1    0  54.0  51.8625         0
7       3    0   2.0  21.0750         0
8       3    1  27.0  11.1333         1
9       2    1  14.0  30.0708         1


### 5  - Segregating Dataset into X [ Input / Independant Variable ] & Y [ Output / Dependant Variable ]

In [6]:
X = dts.drop('Survived',axis='columns')
X

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,0,22.0,7.2500
1,1,1,38.0,71.2833
2,3,1,26.0,7.9250
3,1,1,35.0,53.1000
4,3,0,35.0,8.0500
...,...,...,...,...
886,2,0,27.0,13.0000
887,1,1,19.0,30.0000
888,3,1,,23.4500
889,1,0,26.0,30.0000


In [7]:
Y = dts.Survived
Y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

### 6 - Finding and Removing NA Values from Features

In [8]:
X.columns[X.isna().any()]

Index(['Age'], dtype='object')

In [9]:
X.Age = X.Age.fillna(X.Age.mean())

#### Testing Again to Check for NA Values

In [10]:
X.columns[X.isna().any()]

Index([], dtype='object')

### 7 - Splitting Dataset to Test & Train

In [11]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.25,random_state=0)

### 8 - Training our Model

In [12]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, Y_train)

GaussianNB()

### 9 - Predicting whether the Another Person with a PClass Number, of Particular Gender, Age & Fare would Survive or Not

In [13]:
pclassNo=int(input("Enter Person's Pclass No : "))
gender=int(input("Enter Person's Pclass No(male:0, female:1) : "))
age=int(input("Enter Person's Age : "))
fare=float(input("Enter Person's Fare : "))
person=[[pclassNo,gender,age,fare]]
result=model.predict(person)
print(result)
if result==1:
  print("This Person might have Survived :)")
else:
   print("This Person might have not Survived :(")

Enter Person's Pclass No : 1
Enter Person's Pclass No(male:0, female:1) : 0
Enter Person's Age : 68
Enter Person's Fare : 5
[0]
This Person might have not Survived :(




### 12 - Prediction Results for all Test Data

In [14]:
Y_pred = model.predict(X_test)
print(np.column_stack((Y_pred, Y_test)))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 1]
 [0 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

### 13 - Evaluating the Accuracy of the Model

In [15]:
from sklearn.metrics import accuracy_score
print("Accuracy of the Model is {0}%".format(accuracy_score(Y_test,Y_pred)*100))

Accuracy of the Model is 77.57847533632287%
