In [47]:
import pandas as pd
import tensorflow as tf
import numpy as np
import os  

In [48]:
titanic_df = pd.read_csv("./data/train.csv")
titanic_df.count()

PassengerId    891
Survived       891
Pclass         891
Name           891
Sex            891
Age            714
SibSp          891
Parch          891
Ticket         891
Fare           891
Cabin          204
Embarked       889
dtype: int64

In [49]:
titanic_df["sex_enc"] = np.where(titanic_df["Sex"]=="male", 1, 0)

In [50]:
mean_age = titanic_df["Age"].mean()
titanic_df["new_age"]= titanic_df["Age"].fillna(mean_age)

In [51]:
titanic_df = pd.get_dummies(titanic_df, columns=["Embarked"])
titanic_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,sex_enc,new_age,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,1,22.000000,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,0,38.000000,1,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,0,26.000000,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,0,35.000000,0,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,1,35.000000,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,1,27.000000,0,0,1
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,0,19.000000,0,0,1
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,0,29.699118,0,0,1
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,1,26.000000,1,0,0


In [52]:
train, val, test = np.split(titanic_df.sample(frac=1), [int(0.8*len(titanic_df)), int(0.9*len(titanic_df))])
feature_list = ["sex_enc", "Pclass", "new_age", "Parch", "SibSp", "Embarked_C", "Embarked_Q", "Embarked_S" ]

In [53]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Survived')
  ds = tf.data.Dataset.from_tensor_slices((dataframe[feature_list], labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [54]:
batch_size = 10
train_ds = df_to_dataset(train, shuffle=False, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [55]:
input_shape = (len(feature_list),)
inputs = tf.keras.layers.Input(shape=input_shape)
x = tf.keras.layers.Dense(32, activation="relu")(inputs)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1, activation="sigmoid")(x)

model = tf.keras.Model(inputs, output)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(), metrics=["accuracy"])
model.fit(train_ds, epochs=50, validation_data=val_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1fe11df75c8>

In [56]:
loss, accuracy = model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

Loss:  0.4808747172355652
Accuracy:  0.8222222328186035


In [59]:
test_data = pd.read_csv("./data/test.csv")
pred_in = test_data.copy()
pred_in["sex_enc"] = np.where(pred_in["Sex"]=="male", 1, 0)
pred_in["new_age"] = pred_in["Age"].fillna(mean_age)
pred_in = pd.get_dummies(pred_in, columns=["Embarked"])

In [60]:
pred_ds = tf.data.Dataset.from_tensor_slices(pred_in[feature_list])
pred_ds = pred_ds.batch(batch_size)

In [61]:
predictions = model.predict(pred_ds)
pred_pd = pd.DataFrame(predictions, columns=["pred_survived"])

In [62]:
pred_pd["Survived"] = np.where(pred_pd["pred_survived"]>0.6, 1, 0)

In [63]:
out = pd.merge(pred_in, pred_pd, left_index=True, right_index=True)

In [64]:
out_pd = out[["PassengerId","Survived"]]

In [65]:
os.makedirs('./data/', exist_ok=True)  
out_pd.to_csv('./data/out.csv',index=False)  