In [13]:
import numpy as np
import pandas as pd        # For loading and processing the dataset
import tensorflow as tf    # Of course, we need TensorFlow.
from sklearn.model_selection import train_test_split

# Read the CSV input file and show first 5 rows
df_train = pd.read_csv('input/train.csv')
df_train.head(5)

# To make 'Sex' numeric, we replace 'female' by 0 and 'male' by 1
df_train['Sex'] = df_train['Sex'].map({'female':0, 'male':1}).astype(int) 

# We replace 'Embarked' by three dummy variables 'Embarked_S', 'Embarked_C', and 'Embarked Q',
# which are 1 if the person embarked there, and 0 otherwise.
df_train = pd.concat([df_train, pd.get_dummies(df_train['Embarked'], prefix='Embarked')], axis=1)
df_train = df_train.drop('Embarked', axis=1)

# We normalize the age and the fare by subtracting their mean and dividing by the standard deviation
age_mean = df_train['Age'].mean()
age_std = df_train['Age'].std()
df_train['Age'] = (df_train['Age'] - age_mean) / age_std

fare_mean = df_train['Fare'].mean()
fare_std = df_train['Fare'].std()
df_train['Fare'] = (df_train['Fare'] - fare_mean) / fare_std

# In many cases, the 'Age' is missing - which can cause problems. Let's look how bad it is:
print("Number of missing 'Age' values: {:d}".format(df_train['Age'].isnull().sum()))

# A simple method to handle these missing values is to replace them by the mean age.
df_train['Age'] = df_train['Age'].fillna(df_train['Age'].mean())

df_train = df_train.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# With that, we're almost ready for training
df_train.head()

# Finally, we convert the Pandas dataframe to a NumPy array, and split it into a training and test set
X_train = df_train.drop('Survived', axis=1).to_numpy()
y_train = df_train['Survived'].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

# We'll build a classifier with two classes: "survived" and "didn't survive",
# so we create the according labels
# This is taken from https://www.kaggle.com/klepacz/titanic/tensor-flow
labels_train = (np.arange(2) == y_train[:,None]).astype(np.float32)
labels_test = (np.arange(2) == y_test[:,None]).astype(np.float32)

Number of missing 'Age' values: 177


In [14]:
df_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,1,-0.530005,1,0,-0.502163,0,0,1
1,2,1,1,0,0.57143,1,0,0.786404,1,0,0
2,3,1,3,0,-0.254646,0,0,-0.48858,0,0,1
3,4,1,1,0,0.364911,1,0,0.420494,0,0,1
4,5,0,3,1,0.364911,0,0,-0.486064,0,0,1


In [15]:
pd.DataFrame(data=X_train)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,476.0,1.0,1.0,2.003921e-16,0.0,0.0,0.398358,0.0,0.0,1.0
1,410.0,3.0,0.0,2.003921e-16,3.0,1.0,-0.135581,0.0,0.0,1.0
2,463.0,1.0,1.0,1.190988e+00,0.0,0.0,0.126693,0.0,0.0,1.0
3,162.0,2.0,0.0,7.091099e-01,0.0,0.0,-0.331114,0.0,0.0,1.0
4,769.0,3.0,1.0,2.003921e-16,1.0,0.0,-0.162078,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...
707,581.0,2.0,0.0,-3.234859e-01,1.0,1.0,-0.044356,0.0,0.0,1.0
708,861.0,3.0,1.0,7.779496e-01,2.0,0.0,-0.364151,0.0,0.0,1.0
709,213.0,3.0,1.0,-5.300051e-01,0.0,0.0,-0.502163,0.0,0.0,1.0
710,578.0,1.0,0.0,6.402701e-01,1.0,0.0,0.476840,0.0,0.0,1.0


In [16]:
model = tf.keras.Sequential([
    # tf.keras.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2, activation='softmax')
])

model.compile(
  optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
  loss=tf.keras.losses.BinaryCrossentropy(
    from_logits=True, label_smoothing=0, reduction="auto", name="binary_crossentropy"),
  metrics=['accuracy'],
)

In [17]:
x = model.layers[4].get_weights()

In [18]:
tbCallBack = tf.keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
model.fit(
  X_train,
  labels_train, # training targets
  epochs=50,
  callbacks=[tbCallBack]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fafb86e36d0>

In [19]:
print("Evaluate on test data")
results = model.evaluate(X_test, labels_test)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.7125011682510376, 0.5810055732727051]


In [20]:
print("Evaluate on train data")
results = model.evaluate(X_train, labels_train)
print("train loss, train acc:", results)

Evaluate on train data
train loss, train acc: [0.6905739903450012, 0.625]


In [21]:
np.savetxt('test_seen.out', X_test, header="{},{}".format(*X_test.shape), delimiter=",")
np.savetxt('labels_test_seen.out', labels_test, header="{},{}".format(*labels_test.shape), delimiter=",")

In [22]:
w1_eval = np.transpose(model.layers[0].get_weights()[0])
w2_eval = np.transpose(model.layers[2].get_weights()[0])
wo_eval = np.transpose(model.layers[4].get_weights()[0])

b1_eval = model.layers[0].get_weights()[1]
b2_eval = model.layers[2].get_weights()[1]
bo_eval = model.layers[4].get_weights()[1]

np.savetxt('w1.out', w1_eval, header="{},{}".format(*w1_eval.shape), delimiter=",")
np.savetxt('w2.out', w2_eval, header="{},{}".format(*w2_eval.shape), delimiter=",")
np.savetxt('wo.out', wo_eval, header="{},{}".format(*wo_eval.shape), delimiter=",")
np.savetxt('b1.out', b1_eval, header="{},{}".format(b1_eval.shape[0], 1), delimiter=",")
np.savetxt('b2.out', b2_eval, header="{},{}".format(b2_eval.shape[0], 1), delimiter=",")
np.savetxt('bo.out', bo_eval, header="{},{}".format(bo_eval.shape[0], 1), delimiter=",")

In [23]:
X_test.shape

(179, 10)

In [24]:
labels_test

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.

In [17]:
import onnx

In [18]:
import onnxmltools

In [19]:
onnx_model = onnxmltools.convert_keras(model)

tf executing eager_mode: True
tf.keras model eager_mode: False
The ONNX operator number change on the optimization: 15 -> 9
The maximum opset needed by this model is only 9.


In [21]:
onnxmltools.utils.save_model(onnx_model, 'model.onnx')