In [12]:
import pandas as pd
df = pd.read_csv("titanic.csv")
df.head()

Unnamed: 0,PassengerId,Name,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,1,"Braund, Mr. Owen Harris",3,male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,female,38.0,1,0,PC 17599,71.2833,C85,C,1
2,3,"Heikkinen, Miss. Laina",3,female,26.0,0,0,STON/O2. 3101282,7.925,,S,1
3,4,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,female,35.0,1,0,113803,53.1,C123,S,1
4,5,"Allen, Mr. William Henry",3,male,35.0,0,0,373450,8.05,,S,0


## Data Exploration

In [13]:
df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis='columns',inplace=True)
df

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.2500,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.9250,1
3,1,female,35.0,53.1000,1
4,3,male,35.0,8.0500,0
...,...,...,...,...,...
886,2,male,27.0,13.0000,0
887,1,female,19.0,30.0000,1
888,3,female,,23.4500,0
889,1,male,26.0,30.0000,1


In [14]:
target = df.Survived
inputs = df.drop('Survived',axis='columns')

In [15]:
dummies = pd.get_dummies(inputs.Sex)
dummies.head(3)

Unnamed: 0,female,male
0,0,1
1,1,0
2,1,0


In [16]:
inputs = pd.concat([inputs,dummies],axis='columns')
inputs.head()

Unnamed: 0,Pclass,Sex,Age,Fare,female,male
0,3,male,22.0,7.25,0,1
1,1,female,38.0,71.2833,1,0
2,3,female,26.0,7.925,1,0
3,1,female,35.0,53.1,1,0
4,3,male,35.0,8.05,0,1


In [17]:
inputs.drop('Sex',axis='columns',inplace=True)
inputs.head()

Unnamed: 0,Pclass,Age,Fare,female,male
0,3,22.0,7.25,0,1
1,1,38.0,71.2833,1,0
2,3,26.0,7.925,1,0
3,1,35.0,53.1,1,0
4,3,35.0,8.05,0,1


In [18]:
inputs.columns[inputs.isna().any()]

Index(['Age'], dtype='object')

In [19]:
inputs.Age = inputs.Age.fillna(inputs.Age.mean())
inputs.head()

Unnamed: 0,Pclass,Age,Fare,female,male
0,3,22.0,7.25,0,1
1,1,38.0,71.2833,1,0
2,3,26.0,7.925,1,0
3,1,35.0,53.1,1,0
4,3,35.0,8.05,0,1


## Training the model

In [62]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(inputs,target,test_size =0.2)

In [63]:
len(X_train)

712

In [64]:
len(X_test)

179

In [65]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

In [66]:
 model.fit(X_train,y_train)

In [67]:
model.score(X_test,y_test)

0.8100558659217877

In [68]:
model.predict(X_test)

array([0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 0, 1], dtype=int64)

In [69]:
model.predict_proba(X_test)

array([[9.16876412e-01, 8.31235885e-02],
       [9.10525307e-01, 8.94746928e-02],
       [9.66814195e-01, 3.31858046e-02],
       [9.86661778e-01, 1.33382218e-02],
       [2.77631394e-02, 9.72236861e-01],
       [2.75700208e-02, 9.72429979e-01],
       [9.89164453e-01, 1.08355467e-02],
       [5.74636000e-02, 9.42536400e-01],
       [9.10525307e-01, 8.94746928e-02],
       [9.88026607e-01, 1.19733925e-02],
       [9.99837708e-09, 9.99999990e-01],
       [8.10348422e-03, 9.91896516e-01],
       [9.87042918e-01, 1.29570818e-02],
       [3.61015546e-02, 9.63898445e-01],
       [6.00715053e-02, 9.39928495e-01],
       [9.84211161e-01, 1.57888387e-02],
       [9.74786354e-01, 2.52136464e-02],
       [7.11829361e-03, 9.92881706e-01],
       [9.73129110e-01, 2.68708903e-02],
       [9.86631707e-01, 1.33682933e-02],
       [2.51861832e-02, 9.74813817e-01],
       [9.73129110e-01, 2.68708903e-02],
       [9.88067514e-01, 1.19324856e-02],
       [9.87813847e-01, 1.21861530e-02],
       [9.751124