# Neural Network with Titanic Data
Date: 17/8/2020

* To contruct multiple NN for Titanic Dataset.
* Will try to use both Pytorch and Tensorflow.
* Resources [Code](https://code.visualstudio.com/docs/python/data-science-tutorial)

In [1]:
# import essential packages
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Neural Network
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import warnings
warnings.filterwarnings("ignore")

In [2]:
# confirm TensorFlow sees the GPU
from tensorflow.python.client import device_lib
assert 'GPU' in str(device_lib.list_local_devices())

In [3]:
# Input train and validation data files
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

#### Preprocessing

In [4]:
exclude_data = train_data[train_data["Sex"]=='male'][train_data['Pclass']==3][train_data['Survived']==0][:207] # simple drop data
train_data_balance =  train_data.drop(exclude_data.index)
train_data_balance.Survived.value_counts()

1    342
0    342
Name: Survived, dtype: int64

In [5]:
from sklearn.compose import make_column_transformer 
from sklearn.preprocessing import OneHotEncoder

In [14]:
# 1. Get the features needed and assigned to X and y.
train_nmf = train_data_balance.loc[:, ['Survived', 'Pclass', 'Sex', 'SibSp', 'Parch']]
y_train = train_nmf['Survived']
train_nmf = train_nmf.drop('Survived', axis='columns')

column_trans = make_column_transformer((OneHotEncoder(), ['Sex']), remainder='passthrough')
X_train = column_trans.fit_transform(train_nmf)

# -------- #

#2. get the test data, X_test
med = test_data.groupby(['SibSp', 'Parch'])['Age'].transform('median')
test_data.Fare = test_data.Fare.fillna(med)
test_data.isna().sum()

X_test = test_data.loc[:,['Pclass', 'Sex', 'SibSp', 'Parch']]

In [15]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.1, random_state=0)

In [16]:
y_train.shape

(615,)

#### Model definition

In [17]:
model = Sequential()

model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu', input_dim = 5))
model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu'))

model.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))

In [18]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 30        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 66
Trainable params: 66
Non-trainable params: 0
_________________________________________________________________


In [19]:
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=200, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x253cb8ad3a0>

#### Predicting

In [20]:
from sklearn import metrics

In [21]:
y_pred = model.predict_classes(x_test)
print(metrics.accuracy_score(y_test, y_pred))

0.782608695652174


In [22]:
def make_submission (predictions, clf):
    submission = pd.DataFrame()
    submission['PassengerId'] = test_data['PassengerId']
    submission['Survived'] = predictions

    submission.to_csv('submission_' + clf + '.csv', index=False)

In [23]:
X_test = test_data.loc[:,['Pclass', 'Sex', 'SibSp', 'Parch']]

In [24]:
predictions = model.predict_classes(column_trans.fit_transform(X_test))

In [25]:
make_submission(predictions, 'nn02')