In [None]:

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
train_data = pd.read_csv(r"../input/titanic/train.csv")
test_data = pd.read_csv(r"../input/titanic/test.csv")

In [None]:
train_data

In [None]:
test_data

In [None]:
from sklearn.impute import SimpleImputer

def nan_padding(data, columns):
    for column in columns:
        imputer = SimpleImputer()
        data[column] = imputer.fit_transform(data[column].values.reshape(-1,1))
    return data

nan_columns = ["Age", "SibSp", "Parch"]

train_data = nan_padding(train_data, nan_columns)
test_data = nan_padding(test_data, nan_columns)
        

In [None]:
train_data

In [None]:
test_passenger_id=test_data["PassengerId"]

In [None]:
def drop_not_concerned(data, columns):
    return data.drop(columns, axis=1)

not_concerned_columns = ["PassengerId", "Name", "Ticket", "Fare", "Cabin", "Embarked"]
train_data = drop_not_concerned(train_data, not_concerned_columns)
test_data = drop_not_concerned(test_data, not_concerned_columns)

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
def dummy_data(data, columns):
    for column in columns:
        data = pd.concat([data, pd.get_dummies(data[column], prefix=column)], axis=1)
        data = data.drop(column, axis=1)
    return data

dummy_columns = ["Pclass"]
train_data = dummy_data(train_data, dummy_columns)
test_data= dummy_data(test_data, dummy_columns)


In [None]:
test_data.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

def sex_to_int(data):
    le = LabelEncoder()
    le.fit(["male", "female"])
    data["Sex"]=le.transform(data["Sex"])
    return data

train_data = sex_to_int(train_data)
test_data = sex_to_int(test_data)
train_data.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler

def normalize_age(data):
    scalar = MinMaxScaler()
    data["Age"] = scalar.fit_transform(data["Age"].values.reshape(-1,1))
    return data

train_data = normalize_age(train_data)
test_data = normalize_age(test_data)
train_data.head()

In [None]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

def split_valid_test_data(data, fraction=(1 - 0.8)):
    data_y = data["Survived"]
    lb = LabelBinarizer()
    
    data_y = lb.fit_transform(data_y)    
    data_x = data.drop(["Survived"], axis=1)
    
    train_x, valid_x, train_y, valid_y = train_test_split(data_x, data_y, test_size=fraction)
    return train_x.values, train_y, valid_x, valid_y

train_x, train_y, valid_x, valid_y = split_valid_test_data(train_data)

print("train_x:{}".format(train_x.shape))
print("train_y:{}".format(train_y.shape))
print("train_y content:{}".format(train_y[:3]))

print("valid_x:{}".format(valid_x.shape))
print("valid_y:{}".format(valid_y.shape))
    
    

In [None]:
# from collections import namedtuple
from keras.utils import to_categorical
from sklearn.preprocessing import Binarizer

def build_neural_network(hidden_units=10):
    #tf.reset_default_graph()
    # inputs = tf.Variable(shape=[None, train_x.shape[1]], name="input")
    # print(train_x.shape[1])
    #inputs = tf.placeholder(tf.float32, shape=[None, train_x.shape[1]])
    #labels = tf.placeholder(tf.float32, shape=[None, 1])
    #learning_rate = tf.placeholder(tf.float32)
    #is_training=tf.Variable(True,dtype=tf.bool)
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_units, input_shape=(train_x.shape[1],), kernel_initializer="glorot_uniform", activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])
    model.fit(train_x, train_y, epochs=50)
        
    loss_and_metrics = model.evaluate(valid_x, valid_y, batch_size=100)
    print(loss_and_metrics)

    
    y_data = model.predict(test_data, batch_size=1)
    # print(y_data)
    
    
    binarizer=Binarizer(0.5)
    test_predict_result=binarizer.fit_transform(y_data)
    test_predict_result=test_predict_result.astype(np.int32)
    # test_predict_result[:10]
    
    passenger_id=test_passenger_id.copy()
    evaluation=passenger_id.to_frame()
    evaluation["Survived"] = test_predict_result
    # evaluation[:10]
    

    # print(evaluation)
    evaluation.to_csv("evaluation_submission.csv", index=False)

    
build_neural_network(10)

In [None]:
def get_barch(data_x, data_y, batch_size=32):
    batch_n=len(data_x)//batch_size
    for i in (batch_n):
        batch_x=data_x[i*batch_size:(i+1)*batch_size]
        batch_y=data_y[i*batch_size:(i+1)*batch_size]
        yield batch_x,batch_y
        