In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

import matplotlib.pyplot as plt
%matplotlib inline
# Any results you write to the current directory are saved as output.

In [None]:
def min_max_normalization(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

In [None]:
def prepossessing(X):
    # PassengerId
    X.pop('PassengerId')
    # Sex
    X['Sex'] = X['Sex'].astype('category').cat.codes
    # Embarked
    X['Embarked'] = X['Embarked'].fillna('S')
    X['Embarked'] = X['Embarked'].astype('category').cat.codes
    # Cabin
    X['Cabin'] = X['Cabin'].fillna('NoCabin')
    X['Cabin'] = X['Cabin'].apply(lambda x: x.split()[0])
    X['Cabin'] = X['Cabin'].astype('category').cat.codes
    # Ticket
    X['Ticket'] = X['Ticket'].apply(lambda x : x.replace('.', '').replace('/', '').strip().split()[0] if not x.isdigit() else 'X')
    X['Ticket'] = X['Ticket'].astype('category').cat.codes
    # Name
    X['Name'] = X['Name'].str.split(', ', expand = True)[1].str.split('.', expand = True)[0]
    X['Name'] = X['Name'].replace(['Capt', 'Col', 'Don', 'Dr', 'Jonkheer', 'Major', 'Rev', 'Sir'], 'Rare')
    X['Name'] = X['Name'].replace(['Mlle', 'Ms', 'Lady'], 'Miss')
    X['Name'] = X['Name'].replace(['Mme', 'the Countess', 'Dona'], 'Mrs')
    X['Name'] = X['Name'].astype('category').cat.codes
    # Age
    mean_age = X.groupby('Name')['Age'].median()
    for i in X['Name'].unique():
        X.loc[np.logical_and(X.Age.isnull(), X.Name == i), 'Age'] = mean_age[i]
    # Family
    X['Family'] = X['Parch'] + X['SibSp'] + 1
    # IsAlone
    X['IsAlone'] = 0
    X.loc[X['Family'] == 1, 'IsAlone'] = 1
    # min max normalization
    for k in X.keys():
        X[k] = min_max_normalization(X[k])

    return X

In [None]:
def load_data():
    X_train = train
    Y_train = X_train.pop('Survived')
    X_test = test
    Y_test = submit.pop('Survived')
    
    X_train = prepossessing(X_train)
    X_test = prepossessing(X_test)
    
    return ((np.array(X_train), np.array(Y_train)), (np.array(X_test), np.array(Y_test)))

In [None]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')
submit = pd.read_csv('../input/gender_submission.csv')

In [None]:
(X_train, Y_train), (X_test, Y_test) = load_data()

print('X_train shape: {}'.format(X_train.shape))
print('Y_train shape: {}'.format(Y_train.shape))
print('X_test shape: {}'.format(X_test.shape))
print('Y_test shape: {}'.format(Y_test.shape))

In [None]:
from keras.models import Model
from keras.layers import Dense, Input
from keras.optimizers import Adam

In [None]:
def model(input_shape):
    X_input = Input(shape = input_shape)
    X = Dense(2048, activation = 'relu')(X_input)
    X = Dense(1024, activation = 'relu')(X)
    X = Dense(1024, activation = 'relu')(X)
    X = Dense(512, activation = 'relu')(X)
    X = Dense(256, activation = 'relu')(X)
    X = Dense(128, activation = 'relu')(X)
    X = Dense(64, activation = 'relu')(X)
    X = Dense(32, activation = 'relu')(X)
    X = Dense(1, activation = 'sigmoid')(X)
    
    model = Model(inputs = X_input, outputs = X)
    
    return model

In [None]:
model = model((X_train.shape[1], ))
model.compile(loss = 'binary_crossentropy', optimizer = Adam(lr = 0.000002), metrics = ['accuracy'])
# model.summary()
history = model.fit(X_train, Y_train, epochs = 400, validation_split = 0.1)

In [None]:
def draw(loss, acc):
    plt.subplot(2, 1, 1)
    plt.plot(loss)
    plt.subplot(2, 1, 2)
    plt.plot(acc)
    plt.show()

In [None]:
draw(history.history['loss'], history.history['acc'])

In [None]:
draw(history.history['val_loss'], history.history['val_acc'])

In [None]:
score = model.evaluate(X_test, Y_test)
print('loss: {}'.format(score[0]))
print('accuarcy: {}'.format(score[1]))

In [None]:
predict = model.predict(X_test)
predict = np.where(predict >= 0.5, 1, 0)
submit['Survived'] = predict
submit.to_csv('submission.csv', index = False)