In [8]:
# 6.1 Example: Learning XOR - GBC Book - Chapter 6 - pp. 166 to 171
# Some parts are inspired by the blog post
# Solving XOR with a Neural Network in TensorFlow
# by Stephen OMAN
# https://github.com/StephenOman/TensorFlowExamples/blob/master/xor%20nn/xor_nn.py

# Activation RELU + sigmoid for binary classification output + MSE loss function
import tensorflow as tf
import time
import numpy as np

# H is number of hidden units
H = 5

X = tf.placeholder(tf.float32, shape=[4,2], name = 'X')
Y = tf.placeholder(tf.float32, shape=[4,1], name = 'Y')

W = tf.Variable(tf.truncated_normal([2,H],stddev=.1), name = "W")
w = tf.Variable(tf.truncated_normal([H,1],stddev=.1), name = "w")

c = tf.Variable(tf.zeros([H]), name = "c")
b = tf.Variable(tf.zeros([1]), name = "b")

with tf.name_scope("hidden_layer") as scope:
    h = tf.nn.relu(tf.add(tf.matmul(X, W),c))

with tf.name_scope("output") as scope:
    y_estimated = tf.sigmoid(tf.add(tf.matmul(h,w),b))

with tf.name_scope("loss") as scope:
    loss = tf.reduce_mean(tf.squared_difference(y_estimated, Y)) 

# For better result with binary classifier, use cross entropy with a sigmoid
#    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_estimated, labels=Y)

#Incorporating an L2 norm into the loss function
# regularizer = .01 * ( tf.nn.l2_loss(W) + tf.nn.l2_loss(w))


# A naïve direct implementation of the loss function
#     n_instances = X.get_shape().as_list()[0]
#     loss = tf.reduce_sum(tf.pow(y_estimated - Y, 2))/ n_instances

# In case of problem with gradient (exploding or vanishing gradient)perform gradient clipping
#     n_instances = X.get_shape().as_list()[0]
#     loss = tf.reduce_sum(tf.pow(tf.clip_by_value(y_estimated,1e-10,1.0) - Y,2))/(n_instances)

with tf.name_scope("train") as scope:
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

INPUT_XOR = [[0,0],[0,1],[1,0],[1,1]]
OUTPUT_XOR = [[0],[1],[1],[0]]
init = tf.global_variables_initializer()

In [9]:
with tf.Session() as sess:
    sess.run(init)
    t_start = time.clock()
    for epoch in range(100001):
        sess.run(train_step, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR})
        if epoch % 10000 == 0:
            print("_"*80)
            print('Epoch: ', epoch)
            print('   y_estimated: ')
            for element in sess.run(y_estimated, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR}):
                print('    ',element)
            print('   w: ')
            for element in sess.run(w):
                print('    ',element)
            print('   b ')
            for element in sess.run(b):
                print('    ',element)
            print('   W: ')
            for element in sess.run(W):
                print('    ',element)
            print('   c: ')
            for element in sess.run(c):
                print('    ',element)
            print('   loss: ', sess.run(loss, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR}))
    t_end = time.clock()
    print("_"*80)
    print('Elapsed time ', t_end - t_start)

________________________________________________________________________________
('Epoch: ', 0)
   y_estimated: 
('    ', array([ 0.50000209], dtype=float32))
('    ', array([ 0.50312966], dtype=float32))
('    ', array([ 0.50357848], dtype=float32))
('    ', array([ 0.50068295], dtype=float32))
   w: 
('    ', array([ 0.03306176], dtype=float32))
('    ', array([ 0.11288327], dtype=float32))
('    ', array([ 0.16865432], dtype=float32))
('    ', array([-0.02655187], dtype=float32))
('    ', array([ 0.07728685], dtype=float32))
   b 
('    ', -9.2877817e-06)
   W: 
('    ', array([-0.0314922 ,  0.03919289, -0.10571464, -0.03876255,  0.12808494], dtype=float32))
('    ', array([-0.0981788 , -0.00032208,  0.10235868,  0.17900729, -0.10121072], dtype=float32))
   c: 
('    ', 0.0)
('    ', -6.0540157e-07)
('    ', 0.00010470759)
('    ', 1.270941e-07)
('    ', -4.1415913e-07)
('   loss: ', 0.24849999)
________________________________________________________________________________
('Epoch