In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from numpy.random import seed
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold

In [2]:
df = pd.read_csv("titanic_survival_data.csv")
df

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,no_cabin,Label
0,1,3,0,22.0,1,0,7.2500,0,2,0
1,2,1,1,38.0,1,0,71.2833,1,1,1
2,3,3,1,26.0,0,0,7.9250,0,2,1
3,4,1,1,35.0,1,0,53.1000,0,1,1
4,5,3,0,35.0,0,0,8.0500,0,2,0
...,...,...,...,...,...,...,...,...,...,...
886,887,2,0,27.0,0,0,13.0000,0,2,0
887,888,1,1,19.0,0,0,30.0000,0,1,1
888,889,3,1,24.0,1,2,23.4500,0,2,0
889,890,1,0,26.0,0,0,30.0000,1,1,1


In [3]:
# check whether there exist any null values
df.isnull().values.sum()

0

In [4]:
X = df[["PassengerId","Pclass",	"Sex",	"Age",	"SibSp",	"Parch",	"Fare",	"Embarked",	"no_cabin"]]
Y = df[["Label"]]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.20)

In [5]:
model = Sequential()
model.add(Dense(10, input_dim=X.shape[1], activation='relu'))
model.add(Dense(8, activation='linear'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                100       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 88        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 197
Trainable params: 197
Non-trainable params: 0
_________________________________________________________________


In [6]:
model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
training = model.fit(x_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=0)
val_acc = np.mean(training.history['val_accuracy'])
print("\n%s: %.2f%%" % ('Train set accuarcy', val_acc*100))
evaluate = model.evaluate(x_test, y_test)
print("\n%s: %.2f%%" % ('Test set accuracy', evaluate[1]*100))


Train set accuarcy: 71.92%

Test set accuracy: 78.21%


In [7]:
# Exercise 2
# Define the K-fold Cross Validator
kfold = KFold(n_splits=10, shuffle=True)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(X.values, Y.values):
    print(X.values.shape)
    print(train.shape)
    model = Sequential()
    model.add(Dense(10, activation='relu'))
    model.add(Dense(7+fold_no, activation='linear'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
  # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} and hidden unit number {fold_no + 7} ...')

  # Fit data to model
    training = model.fit(x_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=0)

  # Generate generalization metrics
    val_acc = np.mean(training.history['val_accuracy'])
    print("\n%s: %.2f%%" % ('Train set accuarcy', val_acc*100))
    evaluate = model.evaluate(x_test, y_test)
    print("\n%s: %.2f%%" % ('Test set accuracy', evaluate[1]*100))

  # Increase fold number
    fold_no = fold_no + 1

(891, 9)
(801,)
------------------------------------------------------------------------
Training for fold 1 and hidden unit number 8 ...

Train set accuarcy: 73.40%

Test set accuracy: 79.89%
(891, 9)
(802,)
------------------------------------------------------------------------
Training for fold 2 and hidden unit number 9 ...

Train set accuarcy: 74.12%

Test set accuracy: 77.09%
(891, 9)
(802,)
------------------------------------------------------------------------
Training for fold 3 and hidden unit number 10 ...

Train set accuarcy: 69.55%

Test set accuracy: 74.86%
(891, 9)
(802,)
------------------------------------------------------------------------
Training for fold 4 and hidden unit number 11 ...

Train set accuarcy: 72.25%

Test set accuracy: 75.98%
(891, 9)
(802,)
------------------------------------------------------------------------
Training for fold 5 and hidden unit number 12 ...

Train set accuarcy: 73.05%

Test set accuracy: 79.33%
(891, 9)
(802,)
----------------