# Theano XOR Example

First, let's set up our imports.

In [1]:
from __future__ import print_function

import theano
import theano.tensor as T
import numpy as np
import time

Initialize our input data `X` and output data `y`

In [2]:
X = theano.shared(value=np.asarray([[0, 1], [1, 0], [0, 0], [1, 1]]), name='X')
y = theano.shared(value=np.asarray([[0], [0], [1], [1]]), name='y')
print('X: {}\ny: {}'.format(X.get_value(), y.get_value()))

X: [[0 1]
 [1 0]
 [0 0]
 [1 1]]
y: [[0]
 [0]
 [1]
 [1]]


Instantiate a Numpy random number generator

In [3]:
rng = np.random.RandomState(1234)

A helper method for generating the matrices (as Theano shared variables) for a single layer

In [4]:
def layer(*shape):
    assert len(shape) == 2
    mag = 4. * np.sqrt(6. / sum(shape))
    W_value = np.asarray(rng.uniform(low=-mag, high=mag, size=shape), dtype=theano.config.floatX)
    b_value = np.asarray(np.zeros(shape[1], dtype=theano.config.floatX), dtype=theano.config.floatX)
    W = theano.shared(value=W_value, name='W_{}'.format(shape), borrow=True, strict=False)
    b = theano.shared(value=b_value, name='b_{}'.format(shape), borrow=True, strict=False)
    return W, b

Use that method to generate our weights

In [5]:
W1, b1 = layer(2, 5)
W2, b2 = layer(5, 1)
print(W1.get_value())

[[-2.28477995  0.90440604 -0.46122329  2.1135257   2.07365784]
 [-1.68430669 -1.65563108  2.23583464  3.39323698  2.78436792]]


Construct the network, using mean squared error for cost and subgradient descent as the optimizer

In [6]:
output = T.nnet.sigmoid(T.dot(T.nnet.relu(T.dot(X, W1) + b1), W2) + b2) # The whole network
cost = T.mean((y - output) ** 2) # Mean squared error
updates = [(p, p - 0.1 * T.grad(cost, p)) for p in [W1, W2, b1, b2]] # Subgradient descent optimizer

Construct training and testing functions

In [7]:
train = theano.function(inputs=[], outputs=[], updates=updates)
test = theano.function(inputs=[], outputs=cost)

Train and evaluate the network

In [8]:
print('Cost before:', test())
start = time.time()
for i in range(10000):
    train()
end = time.time()
print('Cost after:', test())
print('Time (s):', end - start)

Cost before: 0.475550048714
Cost after: 0.000385103087629
Time (s): 0.398609161377
