In [69]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import math

In [70]:
sns.set_theme()

In [71]:
train_data = pd.read_csv('train.csv')

In [72]:
train_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

## Data format
- Passenger id : int uuid
- Survived 0 or 1
- Pclass : 1, 2 or 3
- Sex : 'male' or 'female' **String only**
- Age : has **nan** values, should replace by median age of the rest of the dataset
- SibSp : 0,1,2,3,4,5,8
- Parch : 0,1,2,3,4,5,6
- Ticket : Number or string + number or just string
- Fare : Number
- Cabin : String + Number or **nan**, some have duplicate entries
- Embarked : 'S', 'C', 'Q' or **nan**, **String only**

In [73]:
avg_age = math.floor(train_data['Age'].mean())
train_data['Age'] = train_data['Age'].fillna(avg_age)

In [74]:
train_data['Embarked'] = train_data['Embarked'].fillna('U')

In [75]:
## remove columns that I don't feel like parsing atm

train_data.drop('Cabin', inplace = True, axis = 1)
train_data.drop('Ticket', inplace=True, axis = 1)
train_data.drop('Name', inplace=True, axis=1)

## Encode categorical attributes

In [76]:
one_hot_sex = pd.get_dummies(train_data['Sex'])  # haha !
one_hot_Embarked = pd.get_dummies(train_data['Embarked'])
train_data.drop('Sex', inplace=True, axis=1)
train_data.drop('Embarked', inplace=True, axis=1)

In [77]:
train_data = train_data.join(one_hot_sex)
train_data = train_data.join(one_hot_Embarked)

In [78]:
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare,female,male,C,Q,S,U
0,1,0,3,22.0,1,0,7.25,0,1,0,0,1,0
1,2,1,1,38.0,1,0,71.2833,1,0,1,0,0,0
2,3,1,3,26.0,0,0,7.925,1,0,0,0,1,0
3,4,1,1,35.0,1,0,53.1,1,0,0,0,1,0
4,5,0,3,35.0,0,0,8.05,0,1,0,0,1,0


In [79]:
y_train = train_data.pop('Survived')
X_train = train_data


In [80]:
y_train.head()

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64

In [81]:
X_train.head()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare,female,male,C,Q,S,U
0,1,3,22.0,1,0,7.25,0,1,0,0,1,0
1,2,1,38.0,1,0,71.2833,1,0,1,0,0,0
2,3,3,26.0,0,0,7.925,1,0,0,0,1,0
3,4,1,35.0,1,0,53.1,1,0,0,0,1,0
4,5,3,35.0,0,0,8.05,0,1,0,0,1,0


## Convert to Tensorflow model input

In [82]:
X_train = tf.convert_to_tensor(X_train)
y_train = tf.convert_to_tensor(y_train)

In [88]:
X_train.shape

TensorShape([891, 12])

In [84]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(X_train)

In [85]:
train_ds = tf.data.Dataset.from_tensors((X_train,y_train))

## Create Model

In [91]:
def create_non_seq_model():
    input_layer = tf.keras.layers.Input(shape=[12,])
    hidden1 = tf.keras.layers.Dense(30, activation='relu')(input_layer)
    hidden2 = tf.keras.layers.Dense(30, activation='relu')(hidden1)
    concat = tf.keras.layers.Concatenate()([input_layer, hidden2])
    output = tf.keras.layers.Dense(1)(concat)
    model = tf.keras.Model(inputs=[input_layer], outputs=[output])
    return model

In [92]:
m = create_non_seq_model()

In [102]:
loss_fn = tf.keras.losses.MeanSquaredError()
m.compile(optimizer='adam', loss=loss_fn)

In [103]:
m.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 12)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 30)           390         ['input_1[0][0]']                
                                                                                                  
 dense_1 (Dense)                (None, 30)           930         ['dense[0][0]']                  
                                                                                                  
 concatenate (Concatenate)      (None, 42)           0           ['input_1[0][0]',                
                                                                  'dense_1[0][0]']            

## Training

In [104]:
m.fit(train_ds, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x151ab9fef40>