In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
import os  

In [None]:
titanic_df = pd.read_csv("./data/train.csv")
titanic_df.count()

In [None]:
titanic_df["sex_enc"] = np.where(titanic_df["Sex"]=="male", 1, 0)

In [None]:
mean_age = titanic_df["Age"].mean()
titanic_df["new_age"]= titanic_df["Age"].fillna(mean_age)
titanic_df

In [None]:
input_df = titanic_df[["sex_enc", "SibSp", "Parch", "Survived", "Pclass", "Fare", "new_age"]]

In [None]:
train, val, test = np.split(input_df.sample(frac=1), [int(0.8*len(input_df)), int(0.9*len(input_df))])

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Survived')
  ds = tf.data.Dataset.from_tensor_slices((dataframe[["sex_enc", "SibSp", "Parch", "Pclass", "Fare", "new_age"]], labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [None]:
batch_size = 10
train_ds = df_to_dataset(train, shuffle=False, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
input_shape = (6,)
inputs = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Dense(32, activation="relu")(inputs)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(inputs, output)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
model.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
loss, accuracy = model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
test_data = pd.read_csv("./data/test.csv")
pred_in = test_data.copy()
pred_in["sex_enc"] = np.where(pred_in["Sex"]=="male",1,0)
pred_in["new_age"] = pred_in["Age"].fillna(mean_age)

In [None]:
pred_ds = tf.data.Dataset.from_tensor_slices(pred_in[["sex_enc", "SibSp", "Parch", "Pclass", "Fare", "new_age"]])
pred_ds = pred_ds.batch(batch_size)

In [None]:
predictions = model.predict(pred_ds)
pred_pd = pd.DataFrame(predictions, columns=["pred_survived"])

In [None]:
pred_pd["Survived"] = np.where(pred_pd["pred_survived"]>0.6,1,0)

In [None]:
out = pd.merge(pred_in, pred_pd, left_index=True, right_index=True)

In [None]:
out_pd = out[["PassengerId","Survived"]]

In [None]:
os.makedirs('./data/', exist_ok=True)  
out_pd.to_csv('./data/out.csv',index=False)  