# Loading Libraries

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
import matplotlib.pyplot as plt

In [4]:
import seaborn as sns

# Loading Dataset

In [5]:
dataset = pd.read_csv("D:/Pantech Solutions/6_TitanicSurvivalPrediction_NAIVEBAYES/titanicsurvival.csv")

In [6]:
dataset.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.25,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.925,1
3,1,female,35.0,53.1,1
4,3,male,35.0,8.05,0


# Summarize Dataset

In [7]:
dataset.shape

(891, 5)

In [8]:
dataset.size

4455

In [9]:
dataset.ndim

2

In [10]:
dataset.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Pclass,891.0,2.308642,0.836071,1.0,2.0,3.0,3.0,3.0
Age,714.0,29.699118,14.526497,0.42,20.125,28.0,38.0,80.0
Fare,891.0,32.204208,49.693429,0.0,7.9104,14.4542,31.0,512.3292
Survived,891.0,0.383838,0.486592,0.0,0.0,0.0,1.0,1.0


In [11]:
dataset.describe(include="O").T

Unnamed: 0,count,unique,top,freq
Sex,891,2,male,577


In [12]:
dataset.columns

Index(['Pclass', 'Sex', 'Age', 'Fare', 'Survived'], dtype='object')

In [13]:
dataset.groupby(by="Sex", sort=False).size()

Sex
male      577
female    314
dtype: int64

In [14]:
dataset.Sex.value_counts()

male      577
female    314
Name: Sex, dtype: int64

# Mapping Text Data into Binary Value

In [15]:
dataset.Sex = dataset.Sex.map({"male":1,'female':0}).astype(int)

In [16]:
dataset.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,1,22.0,7.25,0
1,1,0,38.0,71.2833,1
2,3,0,26.0,7.925,1
3,1,0,35.0,53.1,1
4,3,1,35.0,8.05,0


In [17]:
dataset.Sex.dtype

dtype('int32')

In [18]:
type(dataset.Sex[4])

numpy.int32

# Segregate Dataset into 2 Parts : Independent & Dependent Variable

In [19]:
x = dataset.drop(columns="Survived", axis=1 )

In [20]:
x.head()

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,1,22.0,7.25
1,1,0,38.0,71.2833
2,3,0,26.0,7.925
3,1,0,35.0,53.1
4,3,1,35.0,8.05


In [21]:
y = dataset.Survived

In [22]:
y.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

# Finding & Removing Null Values from our features X

In [23]:
x.isnull().any()

Pclass    False
Sex       False
Age        True
Fare      False
dtype: bool

In [24]:
x.columns[x.isna().any()]

Index(['Age'], dtype='object')

In [25]:
x.isna().sum()

Pclass      0
Sex         0
Age       177
Fare        0
dtype: int64

In [26]:
x.Age = x.Age.fillna(x.Age.mean())

# Testing to Check any Null Value

In [27]:
x.isna().any()

Pclass    False
Sex       False
Age       False
Fare      False
dtype: bool

# Splitting Dataset into Training and Testing

In [28]:
from sklearn.model_selection import train_test_split

In [29]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0, shuffle=True)

In [30]:
print(f"Count of x_train : {len(x_train)}\t\tCount of y_train : {len(y_train)}")
print(f"Count of x_test  : {len(x_test)}\t\tCount of y_test  : {len(y_test)}")

Count of x_train : 668		Count of y_train : 668
Count of x_test  : 223		Count of y_test  : 223


# Training 

In [31]:
from sklearn.naive_bayes import GaussianNB

In [32]:
model = GaussianNB()

In [33]:
model.fit(x_train, y_train)

GaussianNB()

# Predicting Whether a Person Survived or Not

In [34]:
pclass = int(input("Enter Person's Pclass No.                 : "))
gender = int(input("Enter Person's Gender (Male:1 & Female:0) : "))
age    = int(input("Enter Person's Age                        : "))
fare   = float(input("Enter Person's Fare                       : "))


person = [[pclass, gender, age, fare]]
result = model.predict(person)
print(result)

if result == 1:
    print("Person might be survived")
else:
    print("Person might not be survived")

Enter Person's Pclass No.                 : 1
Enter Person's Gender (Male:1 & Female:0) : 0
Enter Person's Age                        : 19
Enter Person's Fare                       : 263
[1]
Person might be survived


# Prediction for all Test Data

In [35]:
y_prediction = model.predict(x_test)

In [36]:
print(np.column_stack((y_test, y_prediction)))

[[0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 1]
 [1 0]
 [1 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 0]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [0 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]

# Evaluating the Model 

In [37]:
from sklearn.metrics import confusion_matrix

In [38]:
from sklearn.metrics import accuracy_score

In [39]:
print(f"Confusion Matrix of the Model : \n{confusion_matrix(y_test, y_prediction)}")
print(f"Accuracy of the Model : {accuracy_score(y_test, y_prediction)*100}%")

Confusion Matrix of the Model : 
[[110  29]
 [ 21  63]]
Accuracy of the Model : 77.57847533632287%
