# Data Training using TensorFlow

In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.python import debug as tf_debug
from sklearn.model_selection import train_test_split

In [2]:
X_train = pd.read_csv('./data/train_processed.csv')
X_predict = pd.read_csv('./data/test_processed.csv')

In [3]:
X_train.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Cabin,Embarked,UnknownAge,Baby,Child,Young,FamilySize,Alone,Title,Survived
0,3,1,0.273456,1,0,0.014151,7,0,0,0,0,1,2,0,2,0
1,1,0,0.473882,1,0,0.139136,2,1,0,0,0,0,2,0,1,1
2,3,0,0.323563,0,0,0.015469,7,0,0,0,0,1,1,1,1,1
3,1,0,0.436302,1,0,0.103644,2,0,0,0,0,0,2,0,1,1
4,3,1,0.436302,0,0,0.015713,7,0,0,0,0,0,1,1,2,0


Split into 3 sets: train, test and valid

In [4]:
y_train = X_train['Survived']
X_train, X_valid, y_train, y_valid = train_test_split(X_train.drop('Survived', axis=1), y_train, test_size=0.2)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

In [5]:
print('Training set', X_train.shape, y_train.shape)
print('Validation set', X_valid.shape, y_valid.shape)
print('Test set', X_test.shape, y_test.shape)

Training set (569, 15) (569,)
Validation set (179, 15) (179,)
Test set (143, 15) (143,)


In [6]:
def reformat(X, y):
    #X = X.reshape((-1, -1)).astype(np.float32)
    y = (np.arange(1) == y[:,None]).astype(np.float32)
    return X, y

In [7]:
X_train, y_train = reformat(X_train, y_train)
X_valid, y_valid = reformat(X_valid, y_valid)
X_test, y_test = reformat(X_test, y_test)

In [8]:
print('Training set', X_train.shape, y_train.shape)
print('Validation set', X_valid.shape, y_valid.shape)
print('Test set', X_test.shape, y_test.shape)

Training set (569, 15) (569, 1)
Validation set (179, 15) (179, 1)
Test set (143, 15) (143, 1)


In [9]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [10]:
batch_size = 16

graph = tf.Graph()
with graph.as_default():

    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, X_train.shape[1]))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y_train.shape[1]))
    tf_valid_dataset = tf.cast(tf.constant(X_valid.values), tf.float32)
    tf_test_dataset = tf.cast(tf.constant(X_test.values), tf.float32)

    weights = tf.Variable(tf.truncated_normal([X_train.shape[1], y_train.shape[1]]))
    biases = tf.Variable(tf.zeros([y_train.shape[1]]))

    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [None]:
num_steps = 1000

with tf.Session(graph=graph) as session:
    
    tf.global_variables_initializer().run()
    
    session = tf_debug.LocalCLIDebugWrapperSession(session)
    #session.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
    
    print("Initialized")
    for step in range(num_steps):

        offset = (step * batch_size) % (X_train.shape[0] - batch_size)

        batch_data = X_train.values[offset:(offset + batch_size), :]
        batch_labels = y_train[offset:(offset + batch_size), :]

        feed_dict = { tf_train_dataset : batch_data, tf_train_labels : batch_labels }
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)

        if (step % 20 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), y_valid))
            print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), y_test))

In [113]:
loss

<tf.Tensor 'Mean:0' shape=() dtype=float32>

In [105]:
batch_labels[0]

array([ 1.], dtype=float32)

In [77]:
X_train.shape

(35, 15)