In [229]:
import pandas as pd
import numpy as np
import itertools as it
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.layers import Flatten, Conv2D, MaxPool2D, ReLU, Dense
from keras.models import Sequential
from keras.optimizers import Adamax, Adam, Adadelta
from keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy

In [230]:
data = pd.read_csv("train.csv")
submission_input = pd.read_csv("test.csv")

In [231]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [232]:
def convert_cabin(x):
    if x != x:
        return 0
    return ord(x[0]) - ord('B')

def convert_embarked(x):
    if x == 'Q':
        return 0.5
    if x == 'S':
        return 1
    return 0


In [233]:
def clean(data_frame: pd.DataFrame):
    data_frame.drop(['Name','Ticket', 'PassengerId'], axis=1, inplace=True)
    data_frame.dropna()
    data_frame.Age = data_frame.Age.fillna(data_frame.Age.median()) / 80
    data_frame.Sex = pd.get_dummies(data_frame.Sex, drop_first=True) 
    data_frame.Cabin = np.array([convert_cabin(k) for k in data_frame.Cabin]) / 10
    data_frame.Embarked = [convert_embarked(k) for k in data_frame.Embarked] 
    data_frame.Fare = data_frame.Fare / 100.0
    data_frame.Parch /= 10
    data_frame.SibSp /= 10
    data_frame.Pclass /= 10

In [234]:
pId = submission_input.PassengerId
clean(submission_input)
clean(data)

In [235]:
data.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked
0,0,0.3,1,0.275,0.1,0.0,0.0725,0.0,1.0
1,1,0.1,0,0.475,0.1,0.0,0.712833,0.1,0.0
2,1,0.3,0,0.325,0.0,0.0,0.07925,0.0,1.0
3,1,0.1,0,0.4375,0.1,0.0,0.531,0.1,1.0
4,0,0.3,1,0.4375,0.0,0.0,0.0805,0.0,1.0


In [236]:
def shuffled_data(data):
    train, test = train_test_split(data, test_size=0.2)
    return (train.drop(['Survived'],axis=1),train['Survived'],test.drop(['Survived'], axis=1),test['Survived'])

In [237]:
accuracies = {}

In [238]:
model = Sequential()

model.add(Dense(64, activation='relu', input_shape=((8,))))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='sigmoid'))

In [239]:
x, y, x_test, y_test = shuffled_data(data)

In [241]:
model.compile(optimizer="rmsprop", loss=SparseCategoricalCrossentropy(), metrics=["accuracy", "top_k_categorical_accuracy"])


In [263]:
model.fit(x, y, epochs=1, validation_split=0.1)



<keras.callbacks.History at 0x21332e0ca30>

In [264]:
model.evaluate(x_test, y_test)



[0.644848108291626, 0.7932960987091064, 1.0]

In [259]:
p = [np.argmax(x) for x in model.predict(submission_input)]



In [260]:
submission = pd.DataFrame({"PassengerId":pId, "Survived":p})

In [261]:
submission.to_csv("submission.csv", mode='w', index=False)

In [262]:
submission

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0
