# 1) Load the data

In [None]:
import pandas as pd
import numpy as np
data = pd.read_csv('data.csv')

# 2) Fix cell values and column types

In [None]:
data.replace('?', np.nan, inplace= True)
data = data.astype({"age": np.float64, "fare": np.float64})

# 3) Generate various plots using `seaborn` and `matplotlib`

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

fig, axs = plt.subplots(ncols=5, figsize=(30,5))
sns.violinplot(x="survived", y="age", hue="sex", data=data, ax=axs[0])
sns.pointplot(x="sibsp", y="survived", hue="sex", data=data, ax=axs[1])
sns.pointplot(x="parch", y="survived", hue="sex", data=data, ax=axs[2])
sns.pointplot(x="pclass", y="survived", hue="sex", data=data, ax=axs[3])
sns.violinplot(x="survived", y="fare", hue="sex", data=data, ax=axs[4])

# 4) Check how various attributes correlate with survival

In [None]:
data.replace({'male': 1, 'female': 0}, inplace=True)

In [None]:
data.corr().abs()[["survived"]]

In [None]:
data['relatives'] = data.apply (lambda row: int((row['sibsp'] + row['parch']) > 0), axis=1)
data.corr().abs()[["survived"]]

# 5) Keep only the best attributes for training a model

In [None]:
data = data[['sex', 'pclass','age','relatives','fare','survived']].dropna()

# 6) Create and train a model based on the Naïve Bayes algorithm

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data[['sex','pclass','age','relatives','fare']], data.survived, test_size=0.2, random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

In [None]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)

# 7) Check the model's accuracy

In [None]:
from sklearn import metrics
predict_test = model.predict(X_test)
print(metrics.accuracy_score(y_test, predict_test))

# 8) Create a second model, but this time using a neural network

In [None]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()

In [None]:
model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu', input_dim = 5))
model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=32, epochs=50)

# 9) Check the model's accuracy: is it better than Naïve Bayes?

In [None]:
y_pred = model.predict_classes(X_test)
print(metrics.accuracy_score(y_test, y_pred))