# Day - 6 Titanic Survival Prediction using Naive Bayes

Predicts Titanic survival using Naive Bayes classification.
Loads dataset, preprocesses data, trains GaussianNB model,
allows single-person prediction, evaluates accuracy on test data.

In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

In [None]:
# Load Dataset
dataset = pd.read_csv('data/5titanic_survival_prediction_naive_bayes.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'titanicsurvival.csv'

In [None]:
# Summarize Dataset
print(dataset.shape)
print(dataset.head(5))

(891, 5)
   Pclass     Sex   Age     Fare  Survived
0       3    male  22.0   7.2500         0
1       1  female  38.0  71.2833         1
2       3  female  26.0   7.9250         1
3       1  female  35.0  53.1000         1
4       3    male  35.0   8.0500         0


In [None]:
# Map 'Sex' column to binary: female=0, male=1
dataset['Sex'] = dataset['Sex'].map({'female': 0, 'male': 1}).astype(int)
print(dataset.head())

<bound method NDFrame.head of      Pclass  Sex   Age     Fare  Survived
0         3    1  22.0   7.2500         0
1         1    0  38.0  71.2833         1
2         3    0  26.0   7.9250         1
3         1    0  35.0  53.1000         1
4         3    1  35.0   8.0500         0
..      ...  ...   ...      ...       ...
886       2    1  27.0  13.0000         0
887       1    0  19.0  30.0000         1
888       3    0   NaN  23.4500         0
889       1    1  26.0  30.0000         1
890       3    1  32.0   7.7500         0

[891 rows x 5 columns]>


In [None]:
# Separate features and target
X = dataset.drop('Survived', axis='columns')
Y = dataset.Survived

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,1,22.0,7.2500
1,1,0,38.0,71.2833
2,3,0,26.0,7.9250
3,1,0,35.0,53.1000
4,3,1,35.0,8.0500
...,...,...,...,...
886,2,1,27.0,13.0000
887,1,0,19.0,30.0000
888,3,0,,23.4500
889,1,1,26.0,30.0000


In [None]:
# Handle missing values in Age column
X['Age'] = X['Age'].fillna(X['Age'].mean())

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [None]:
# Check for remaining missing values
print(X.columns[X.isna().any()])

Index(['Age'], dtype='object')

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

In [None]:
# Train Gaussian Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)

In [None]:
# Predict survival for a single person (example input)
pclassNo = int(input("Enter Person's Pclass number: "))
gender = int(input("Enter Person's Gender 0-female 1-male (0 or 1): "))
age = int(input("Enter Person's Age: "))
fare = float(input("Enter Person's Fare: "))
person = [[pclassNo, gender, age, fare]]
result = model.predict(person)

Index([], dtype='object')

In [None]:
if result == 1:
    print("Person might be Survived")
else:
    print("Person might not be Survived")

In [None]:
# Predict on test data
y_pred = model.predict(X_test)
print(np.column_stack((y_pred, y_test)))

GaussianNB(priors=None, var_smoothing=1e-09)

In [None]:
# Evaluate accuracy
from sklearn.metrics import accuracy_score
print("Accuracy of the Model: {0}%".format(accuracy_score(y_test, y_pred) * 100))

Enter Person's Pclass number: 1
Enter Person's Gender 0-female 1-male(0 or 1): 1
Enter Person's Age: 26
Enter Person's Fare: 30
[0]
Person might not be Survived
