In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
%matplotlib inline
from __future__ import print_function
import collections
import math
import numpy as np
import os
import random
import tensorflow as tf
import zipfile
import pandas as pd
from matplotlib import pylab
from six.moves import range
from six.moves.urllib.request import urlretrieve
from sklearn.manifold import TSNE
from sklearn.cross_validation import train_test_split
import time
from tf_utils import random_mini_batches, convert_to_one_hot

seed = 3



In [2]:
def preTreatmentCabin(x):
    try:
        isinstance(float(x),(float))
        return '0'
    except:
        return x[0]
    
def preTreatmentData(treatment_data, with_label = True):
    # sex
    treatment_data['Sex'] = treatment_data['Sex'].replace(['male', 'female'],[0, 1])

    # age
    average_age = np.average(list(filter(lambda x: not np.isnan(x), treatment_data['Age'])))
    treatment_data['Age'] = np.nan_to_num(treatment_data['Age'], average_age)

    # Embarked
    data_embarked_unique = {}.fromkeys(treatment_data['Embarked']).keys()
    treatment_data['Embarked'] = treatment_data['Embarked'].replace(data_embarked_unique, np.arange(len(data_embarked_unique)))

    # Cabin
    data_cabin_string_list = list(map(preTreatmentCabin, list(treatment_data['Cabin'])))
    data_cabin_keys = {}.fromkeys(data_cabin_string_list).keys()
    treatment_data['Cabin'] = data_cabin_string_list
    treatment_data['Cabin'] = treatment_data['Cabin'].replace(data_cabin_keys, np.arange(len(data_cabin_keys)))

    # print(data)
    model_data = treatment_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]
    pre_treatment_data = model_data.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)), axis=0)

    if with_label:
        pre_treatment_data['Survived'] = treatment_data['Survived']
    
    return pre_treatment_data
    

In [16]:
# train_data
raw_train_data = pd.read_csv('train.csv')
train_data = preTreatmentData(raw_train_data)

# test_data
# raw_test_data = pd.read_csv('test.csv')

data = train_data.values
print("data shape:", data.shape)

train_data, test_data = train_test_split(data, test_size=0.2, random_state=seed)
print("train_data shape:", train_data.shape)
print("test_data shape:", test_data.shape)

train_set = train_data[:, 0:8].T
train_label = train_data[:, 8].reshape((-1, 1)).T
print("train_set shape:", train_set.shape)
print("train_label shape:", train_label.shape)

test_set = test_data[:, 0:8].T
test_label = test_data[:, 8].reshape((-1, 1)).T
print("test_set shape:", test_set.shape)
print("test_label shape:", test_label.shape)


data shape: (891, 9)
train_data shape: (712, 9)
test_data shape: (179, 9)
train_set shape: (8, 712)
train_label shape: (1, 712)
test_set shape: (8, 179)
test_label shape: (1, 179)


In [4]:
def mode_sigmoid(train_data, train_label, test_data, test_label, 
         learning_rate = 0.005, batch_size = 32, 
         n_epochs = 1000, seed = 3,
         threshold = 0.9, show_epoch_cost = False,
         is_train = True,
         layers = [10, 5]
        ):
    X = tf.placeholder(dtype = np.float32, shape = [None, 8])
    Y = tf.placeholder(dtype = np.float32, shape = [None, 1])
    
    hidden_layer = tf.layers.dense(inputs=X, units=8, activation=tf.nn.relu)
    
    for layer in layers:
        weight_l2_regularizer = tf.contrib.layers.l2_regularizer(0.001)
        hidden_layer = tf.layers.dense(inputs=hidden_layer, 
                                       units=layer, 
                                       activation=tf.nn.relu,
                                       kernel_regularizer=weight_l2_regularizer,
#                                        bias_regularizer=None,
#                                        activity_regularizer=None,
                                      )

    logits= tf.layers.dense(inputs=hidden_layer, units=1, activation=None)
    
    entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = Y, logits = logits)

    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    
    cost = tf.reduce_mean(entropy) + 0.01 * sum(reg_losses)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    costs = []
    with tf.Session() as sess:
        merged_summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter('../tmp', sess.graph)
        start_time = time.time()
        sess.run(tf.global_variables_initializer())	

        for epoch in range(n_epochs): 
            epoch_cost = 0        
            n_batches = int(train_set.shape[1]/batch_size)
            seed = seed + 1
            minibatches = random_mini_batches(train_set, train_label, batch_size, seed)

            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch

                _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X.T, Y: minibatch_Y.T})

                epoch_cost += minibatch_cost / n_batches

            # Print the cost every epoch
            if epoch % 100 == 0 and show_epoch_cost:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if epoch % 5 == 0 and show_epoch_cost:
                costs.append(epoch_cost)

        print('Total time: {0} seconds'.format(time.time() - start_time))
        print('Optimization Finished!')

        print('test the model:')

        result = tf.cast(tf.greater(logits, threshold), "float")
        correct_prediction = tf.equal(result , Y)

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: train_set.T, Y: train_label.T}))
        
        if is_train:
            print ("Test Accuracy:", accuracy.eval({X: test_set.T, Y: test_label.T}))
        else: 
            predict_label = result.eval({X: test_set.T})
            print ("result Y:", predict_label.T)  
            
        print ("-------------Done----------------------")

In [53]:
mode_sigmoid(train_data, train_label, test_data, test_label, 
     show_epoch_cost = True,
     learning_rate=0.005, 
     n_epochs=500, 
     layers=[8, 16, 32, 16, 8])

Cost after epoch 0: 0.674377
Cost after epoch 100: 0.337653
Cost after epoch 200: 0.285627
Cost after epoch 300: 0.261568
Cost after epoch 400: 0.269086
Total time: 12.2080979347229 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.907303
Test Accuracy: 0.72067
-------------Done----------------------


In [None]:
learning_rates = np.linspace(0.015, 0.005, 11)

for lr in learning_rates:
    print(">>> model with learning rate: ", lr)
    mode_sigmoid(train_data, train_label, test_data, test_label, 
         learning_rate = lr, 
         n_epochs=2000,
         layers=[8, 16, 32, 64, 32, 16, 8])
    print('-----------------------------------')

>>> model with learning rate:  0.015
Total time: 95.87288856506348 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.863764
Test Accuracy: 0.72067
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.014
Total time: 100.58641791343689 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.876405
Test Accuracy: 0.776536
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.013
Total time: 107.92197942733765 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.869382
Test Accuracy: 0.765363
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.012
Total time: 112.46878099441528 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.889045
Test Accuracy: 0.782123
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.011
Total

In [11]:
# train_data
raw_train_data = pd.read_csv('train.csv')
train_data = preTreatmentData(raw_train_data)

data = train_data.values
print("data shape:", data.shape)

train_data, test_data = train_test_split(data, test_size=0.2, random_state=seed)
print("train_data shape:", train_data.shape)
print("test_data shape:", test_data.shape)

train_set = train_data[:, 0:8].T
train_label = train_data[:, 8].reshape((-1, 1)).T
print("train_set shape:", train_set.shape)
print("train_label shape:", train_label.shape)

# test_data
raw_test_data = pd.read_csv('test.csv')
test_data = preTreatmentData(raw_test_data, with_label = False)

test_set = test_data.T
print("test_set shape:", test_set.shape)


data shape: (891, 9)
train_data shape: (712, 9)
test_data shape: (179, 9)
train_set shape: (8, 712)
train_label shape: (1, 712)
test_set shape: (8, 418)


In [13]:
mode_sigmoid(train_data, train_label, test_data, test_label, 
     learning_rate = 0.009, 
     n_epochs=2000, 
     is_train=False,
     layers=[8, 16, 32, 64, 32, 16, 8])

Total time: 42.88404107093811 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.907303
result Y: [[ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]

In [17]:
is_survived = np.array([  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  1,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  1,  1,  0,  1,  1,  0,  0,  0,  0,  1,  1,  1,  0,  0,  1,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,  1,  1,  1,  0,  0,  1,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  1,  0,  0,  1,  0,  0,  0,  1,  0,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  1,  0,  1,  0,  0,  1,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  1,  1,  0,  0,  1,  1,  0,  0,  0,  1,  1,  0,  1,  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1,  1,  0,  0,  1,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  1,  1,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  1,  1,  1,  0,  1,  0,  0,  1,  0,  0,  1,  1,  0,  1,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  0,  0,  0,  1,  0,  0,  1,  1,  0,  1,  1,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0])

predict_label = pd.DataFrame()
predict_label['Survived'] = is_survived
predict_label['PassengerId'] = raw_test_data['PassengerId']
predict_label.to_csv('predict_label.csv',index=None)  

In [18]:
def mode_softmax(train_data, train_label, test_data, test_label, 
         learning_rate = 0.005, batch_size = 32, 
         n_epochs = 1000, seed = 3,
         threshold = 0.9, show_epoch_cost = False,
         is_train = True,
         layers = [10, 5]
        ):
    train_onehot_label = convert_to_one_hot(train_label.astype(int), 2)
    test_onehot_label = convert_to_one_hot(test_label.astype(int), 2)
    
    X = tf.placeholder(dtype = np.float32, shape = [None, 8])
    Y = tf.placeholder(dtype = np.float32, shape = [None, 2])
    
    hidden_layer = tf.layers.dense(inputs=X, units=8, activation=tf.nn.relu)
    
    for layer in layers:
        weight_l2_regularizer = tf.contrib.layers.l2_regularizer(0.001)
        hidden_layer = tf.layers.dense(inputs=hidden_layer, 
                                       units=layer, 
                                       activation=tf.nn.relu,
                                       kernel_regularizer=weight_l2_regularizer,
                                      )

    logits= tf.layers.dense(inputs=hidden_layer, units=2, activation=None)
    
    entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels = Y, logits = logits)
    
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    
    cost = tf.reduce_mean(entropy) + 0.01 * sum(reg_losses)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    costs = []
    with tf.Session() as sess:
        merged_summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter('../tmp', sess.graph)
        start_time = time.time()
        sess.run(tf.global_variables_initializer())	

        for epoch in range(n_epochs): 
            epoch_cost = 0        
            n_batches = int(train_set.shape[1]/batch_size)
            seed = seed + 1
            minibatches = random_mini_batches(train_set, train_onehot_label, batch_size, seed)

            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch

                _ , minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X.T, Y: minibatch_Y.T})

                epoch_cost += minibatch_cost / n_batches

            # Print the cost every epoch
            if epoch % 100 == 0 and show_epoch_cost:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if epoch % 5 == 0 and show_epoch_cost:
                costs.append(epoch_cost)

        print('Total time: {0} seconds'.format(time.time() - start_time))
        print('Optimization Finished!')

        print('test the model:')

        result = tf.cast(tf.greater(logits, threshold), "float")
        correct_prediction = tf.equal(result , Y)

        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: train_set.T, Y: train_onehot_label.T}))
        
        if is_train:
            print ("Test Accuracy:", accuracy.eval({X: test_set.T, Y: test_onehot_label.T}))
        else: 
            predict_label = result.eval({X: test_set.T})
            print ("result Y:", predict_label.T)  
            
        print ("-------------Done----------------------")

In [19]:
mode_softmax(train_data, train_label, test_data, test_label, 
     show_epoch_cost = True,
     learning_rate=0.003, 
     n_epochs=2000, 
     layers=[8, 4])

Cost after epoch 0: 0.691060
Cost after epoch 100: 0.369542
Cost after epoch 200: 0.329291
Cost after epoch 300: 0.319789
Cost after epoch 400: 0.334665
Cost after epoch 500: 0.302398
Cost after epoch 600: 0.291642
Cost after epoch 700: 0.300205
Cost after epoch 800: 0.288355
Cost after epoch 900: 0.278697
Cost after epoch 1000: 0.273979
Cost after epoch 1100: 0.273351
Cost after epoch 1200: 0.279642
Cost after epoch 1300: 0.253140
Cost after epoch 1400: 0.256041
Cost after epoch 1500: 0.263426
Cost after epoch 1600: 0.263023
Cost after epoch 1700: 0.266802
Cost after epoch 1800: 0.240990
Cost after epoch 1900: 0.244029
Total time: 48.33601117134094 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.883427
Test Accuracy: 0.756983
-------------Done----------------------


In [None]:

Train Accuracy: 0.836376
Test Accuracy: 0.72067

In [21]:
learning_rates = np.linspace(0.007, 0.003, 5)

for lr in learning_rates:
    print(">>> model with learning rate: ", lr)
    mode_softmax(train_data, train_label, test_data, test_label, 
         learning_rate = lr, 
         n_epochs=2000,
         layers=[20, 16, 8])
    print('-----------------------------------')

>>> model with learning rate:  0.007
Total time: 65.11208963394165 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.833567
Test Accuracy: 0.662011
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.006
Total time: 66.1773328781128 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.661517
Test Accuracy: 0.600559
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.005
Total time: 71.32303214073181 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.929775
Test Accuracy: 0.740223
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.004
Total time: 70.33622980117798 seconds
Optimization Finished!
test the model:
Train Accuracy: 0.902388
Test Accuracy: 0.75419
-------------Done----------------------
-----------------------------------
>>> model with learning rate:  0.003
Total tim

In [None]:
mode_softmax(train_data, train_label, test_data, test_label, 
     learning_rate = 0.009, 
     n_epochs=2000, 
     is_train=False,
     layers=[8, 16, 32, 64, 32, 16, 8])

In [None]:
is_survived = np.array([  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  1,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  1,  1,  0,  1,  1,  0,  0,  0,  0,  1,  1,  1,  0,  0,  1,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,  1,  1,  1,  0,  0,  1,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  1,  0,  0,  1,  0,  0,  0,  1,  0,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  1,  0,  1,  0,  0,  1,  1,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  1,  0,  0,  1,  0,  1,  1,  0,  0,  1,  1,  0,  0,  0,  1,  1,  0,  1,  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  1,  1,  1,  0,  0,  1,  0,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  0,  1,  0,  0,  0,  0,  0,  1,  0,  0,  1,  1,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  1,  1,  1,  0,  1,  0,  0,  1,  0,  0,  1,  1,  0,  1,  1,  0,  0,  1,  0,  0,  1,  0,  0,  0,  1,  1,  0,  1,  0,  1,  1,  1,  0,  0,  0,  1,  0,  0,  1,  1,  0,  1,  1,  0,  1,  0,  1,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0])

predict_label = pd.DataFrame()
predict_label['Survived'] = is_survived
predict_label['PassengerId'] = raw_test_data['PassengerId']
predict_label.to_csv('predict_label.csv',index=None)  