In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
import os  

In [None]:
titanic_df = pd.read_csv("./data/train.csv")
titanic_df.head(10)

In [None]:
titanic_df["sex_enc"] = np.where(titanic_df["Sex"]=="male", 1, 0)

In [None]:
mean_age = titanic_df["Age"].mean()
titanic_df["new_age"] = titanic_df["Age"].fillna(mean_age)

In [None]:
# age_bins = [0, 20, 40, 60, 80, 100]
# age_lbl = [20, 40, 60, 80, 100]
# titanic_df['age_bin'] = pd.cut(titanic_df['new_age'], bins= age_bins, labels=age_lbl)
# titanic_df = pd.get_dummies(titanic_df, columns=["age_bin"])

In [None]:
titanic_df["age_norm"] = (titanic_df["new_age"] - titanic_df["new_age"].mean())/titanic_df["new_age"].std()
titanic_df["sibsp_norm"]=(titanic_df["SibSp"] - titanic_df["SibSp"].min())/(titanic_df["SibSp"].max() - titanic_df["SibSp"].min())
titanic_df["parch_norm"]=(titanic_df["Parch"] - titanic_df["Parch"].min())/(titanic_df["Parch"].max() - titanic_df["Parch"].min())

In [None]:
titanic_df = pd.get_dummies(titanic_df, columns=["Embarked"])
titanic_df["Pclass"].value_counts()

In [None]:
titanic_df = pd.get_dummies(titanic_df, columns=["Pclass"])

In [None]:
train, val, test = np.split(titanic_df.sample(frac=1), [int(0.8*len(titanic_df)), int(0.9*len(titanic_df))])
print("# training samples: " + str(train.shape))
print("# validation samples: " + str(val.shape))
print("# testing samples: " + str(test.shape))
# feature_list = ["sex_enc", "Parch", "SibSp", "Embarked_C", "Embarked_Q", "Embarked_S"
#                 , "Pclass_1", "Pclass_2", "Pclass_3", "age_bin_20", "age_bin_40", "age_bin_60", "age_bin_80", "age_bin_100"]
feature_list = ["sex_enc", "parch_norm", "sibsp_norm", "Embarked_C", "Embarked_Q", "Embarked_S"
                , "Pclass_1", "Pclass_2", "Pclass_3", "age_norm"]

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Survived')
  ds = tf.data.Dataset.from_tensor_slices((dataframe[feature_list], labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [None]:
batch_size = 10
train_ds = df_to_dataset(train, shuffle=False, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
input_shape = (len(feature_list),)
inputs = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Dense(32, activation="relu")(inputs)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(inputs, output)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
model.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
loss, accuracy = model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
test_data = pd.read_csv("./data/test.csv")
pred_in = test_data.copy()
pred_in["sex_enc"] = np.where(pred_in["Sex"]=="male", 1, 0)
pred_in["new_age"] = pred_in["Age"].fillna(mean_age)
pred_in = pd.get_dummies(pred_in, columns=["Embarked"])
pred_in = pd.get_dummies(pred_in, columns=["Pclass"])
# pred_in['age_bin'] = pd.cut(pred_in['new_age'], bins= age_bins, labels=age_lbl)
# pred_in = pd.get_dummies(pred_in, columns=["age_bin"])
pred_in["age_norm"] = (pred_in["new_age"] - titanic_df["new_age"].mean())/titanic_df["new_age"].std()
pred_in["sibsp_norm"]=(pred_in["SibSp"] - titanic_df["SibSp"].min())/(titanic_df["SibSp"].max() - titanic_df["SibSp"].min())
pred_in["parch_norm"]=(pred_in["Parch"] - titanic_df["Parch"].min())/(titanic_df["Parch"].max() - titanic_df["Parch"].min())

In [None]:
pred_ds = tf.data.Dataset.from_tensor_slices(pred_in[feature_list])
pred_ds = pred_ds.batch(batch_size)

In [None]:
predictions = model.predict(pred_ds)
pred_pd = pd.DataFrame(predictions, columns=["pred_survived"])

In [None]:
pred_pd["Survived"] = np.where(pred_pd["pred_survived"]>0.5, 1, 0)

In [None]:
out = pd.merge(pred_in, pred_pd, left_index=True, right_index=True)

In [None]:
out_pd = out[["PassengerId","Survived"]]

In [None]:
os.makedirs('./data/', exist_ok=True)  
out_pd.to_csv('./data/out.csv',index=False) 