In [4]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [13]:
# Importing Titanic Dataset from CSV file
titanic_data = pd.read_csv("titanic-dataset.csv")
titanic_data.head()

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,"Braund, Mr. Owen Harris",3,male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,female,26.0,0,0,STON/O2. 3101282,7.925,,S,1
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,female,35.0,1,0,113803,53.1,C123,S,1
4,5,"Allen, Mr. William Henry",3,male,35.0,0,0,373450,8.05,,S,0


In [14]:
# Data Prep
titanic_data.drop(['PassengerId','Name','SibSp','Parch','Cabin','Ticket','Embarked'],axis=1,inplace=True)
titanic_data.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.25,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.925,1
3,1,female,35.0,53.1,1
4,3,male,35.0,8.05,0


In [28]:
# Independent Variables
X = titanic_data.drop(['Survived'],axis=1)
X.head()

Unnamed: 0,Pclass,Sex,Age,Fare
0,3,male,22.0,7.25
1,1,female,38.0,71.2833
2,3,female,26.0,7.925
3,1,female,35.0,53.1
4,3,male,35.0,8.05


In [29]:
# Dependent/Target Variable
y = titanic_data['Survived']
y.shape

(891,)

In [30]:
dummies = pd.get_dummies(X['Sex'])
dummies.head()

Unnamed: 0,female,male
0,0,1
1,1,0
2,1,0
3,1,0
4,0,1


In [31]:
X = pd.concat([X,dummies],axis=1)
X.head()

Unnamed: 0,Pclass,Sex,Age,Fare,female,male
0,3,male,22.0,7.25,0,1
1,1,female,38.0,71.2833,1,0
2,3,female,26.0,7.925,1,0
3,1,female,35.0,53.1,1,0
4,3,male,35.0,8.05,0,1


In [32]:
X = X.drop(['Sex'],axis=1)
X.head()

Unnamed: 0,Pclass,Age,Fare,female,male
0,3,22.0,7.25,0,1
1,1,38.0,71.2833,1,0
2,3,26.0,7.925,1,0
3,1,35.0,53.1,1,0
4,3,35.0,8.05,0,1


In [33]:
X.columns[X.isna().any()]

Index(['Age'], dtype='object')

In [34]:
X['Age'].head(10)

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
5     NaN
6    54.0
7     2.0
8    27.0
9    14.0
Name: Age, dtype: float64

In [38]:
# Filling NA values
import math
X['Age'] = X['Age'].fillna(math.floor(X['Age'].mean()))
X['Age'].head(10)

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
5    29.0
6    54.0
7     2.0
8    27.0
9    14.0
Name: Age, dtype: float64

In [48]:
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X,y,test_size=0.1)

In [49]:
X_train.shape

(801, 5)

In [50]:
X_test.shape

(90, 5)

In [51]:
y_train.shape

(801,)

In [52]:
y_test.shape

(90,)

In [53]:
# Creating Naive Bayes Model
from sklearn.naive_bayes import GaussianNB as GNB
nb_model = GNB()

In [54]:
# Training Naive Bayes Model
nb_model.fit(X_train,y_train)

GaussianNB()

In [55]:
# Scoring Naive Bayes Model
print("Naive Bayes Model Score : {:.4f}".format(nb_model.score(X_test,y_test)))

Naive Bayes Model Score : 0.8000


In [57]:
# Predicting
y_predicted = nb_model.predict(X_test)
y_predicted

array([1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 0], dtype=int64)

In [58]:
# Results
results = pd.DataFrame({'Survived (Actual)':y_test,'Survived (Predicted)':y_predicted})
results.head()

Unnamed: 0,Survived (Actual),Survived (Predicted)
577,1,1
525,0,0
777,1,1
716,1,1
649,1,1


In [59]:
# Exporting Results
results.to_csv('result.csv',index=False)

In [60]:
# Saving Model in Binary File
import joblib
joblib.dump(nb_model,'titanic-survival-prediction-model')

['titanic-survival-prediction-model']