This notebook contain Theano exercices not related to machine learning.

The exercices work in the following way:

- You have a cell with TODOs that raise errors with a description of what is needed.
- The cell contain a description at the top.
- Run the cell(ctrl-enter) to execute it. At first, it raises an error.
- Modify the cell to implement what is asked in the error.
- If you implement correctly all the todo and run the cell, it should print "Success" at the end (there is validation code in the cell). If not, try again.
- If you want to see the solution, execute the cell that start with "%load" after the exercice.


In [4]:
# %load 01_scalar_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_scalar():
    """
    Returns a new Theano scalar.
    """

    return T.scalar()


def log(x):
    """
    Returns the logarithm of a Theano scalar x.
    """

    return T.log(x)


def add(x, y):
    """
    Adds two theano scalars together and returns the result.
    """

    return x + y

a = make_scalar()
b = make_scalar()
c = log(b)
d = add(a, c)
f = function([a, b], d)
a = np.cast[a.dtype](1.)
b = np.cast[b.dtype](2.)
actual = f(a, b)
expected = 1. + np.log(2.)
assert np.allclose(actual, expected)
print "SUCCESS!"

SUCCESS!


In [None]:
# %load 01_scalar_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_scalar():
    """
    Returns a new Theano scalar.
    """

    return T.scalar()


def log(x):
    """
    Returns the logarithm of a Theano scalar x.
    """

    return T.log(x)


def add(x, y):
    """
    Adds two theano scalars together and returns the result.
    """

    return x + y

a = make_scalar()
b = make_scalar()
c = log(b)
d = add(a, c)
f = function([a, b], d)
a = np.cast[a.dtype](1.)
b = np.cast[b.dtype](2.)
actual = f(a, b)
expected = 1. + np.log(2.)
assert np.allclose(actual, expected)
print "SUCCESS!"


In [6]:
# %load 02_vector_mat_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_vector():
    """
    Returns a new Theano vector.
    """

    return T.vector()


def make_matrix():
    """
    Returns a new Theano matrix.
    """

    return T.matrix()


def elemwise_mul(a, b):
    """
    a: A theano matrix
    b: A theano matrix
    Returns the elementwise product of a and b
    """

    return a * b


def matrix_vector_mul(a, b):
    """
    a: A theano matrix
    b: A theano vector
    Returns the matrix-vector product of a and b
    """

    return T.dot(a, b)

a = make_vector()
b = make_vector()
c = elemwise_mul(a, b)
d = make_matrix()
e = matrix_vector_mul(d, c)

f = function([a, b, d], e)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(5).astype(a.dtype)
b_value = rng.rand(5).astype(b.dtype)
c_value = a_value * b_value
d_value = rng.randn(5, 5).astype(d.dtype)
expected = np.dot(d_value, c_value)

actual = f(a_value, b_value, d_value)

assert np.allclose(actual, expected)
print "SUCCESS!"

SUCCESS!


In [None]:
# %load 02_vector_mat_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_vector():
    """
    Returns a new Theano vector.
    """

    return T.vector()


def make_matrix():
    """
    Returns a new Theano matrix.
    """

    return T.matrix()


def elemwise_mul(a, b):
    """
    a: A theano matrix
    b: A theano matrix
    Returns the elementwise product of a and b
    """

    return a * b


def matrix_vector_mul(a, b):
    """
    a: A theano matrix
    b: A theano vector
    Returns the matrix-vector product of a and b
    """

    return T.dot(a, b)

a = make_vector()
b = make_vector()
c = elemwise_mul(a, b)
d = make_matrix()
e = matrix_vector_mul(d, c)

f = function([a, b, d], e)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(5).astype(a.dtype)
b_value = rng.rand(5).astype(b.dtype)
c_value = a_value * b_value
d_value = rng.randn(5, 5).astype(d.dtype)
expected = np.dot(d_value, c_value)

actual = f(a_value, b_value, d_value)

assert np.allclose(actual, expected)
print "SUCCESS!"


In [8]:
# %load 03_tensor_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_tensor(dim):
    """
    Returns a new Theano tensor with no broadcastable dimensions.
    dim: the total number of dimensions of the tensor.
    """

    return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')()


def broadcasted_add(a, b):
    """
    a: a 3D theano tensor
    b: a 4D theano tensor
    Returns c, a 4D theano tensor, where

    c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]

    for all i, j, k, l
    """

    return a.dimshuffle(2, 'x', 1, 0) + b


def partial_max(a):
    """
    a: a 4D theano tensor

    Returns b, a theano matrix, where

    b[i, j] = max_{k,l} a[i, k, l, j]

    for all i, j
    """

    return a.max(axis=(1, 2))

a = make_tensor(3)
b = make_tensor(4)
c = broadcasted_add(a, b)
d = partial_max(c)

f = function([a, b], d)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(2, 2, 2).astype(a.dtype)
b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
expected = c_value.max(axis=1).max(axis=1)

actual = f(a_value, b_value)

assert np.allclose(actual, expected), (actual, expected)
print "SUCCESS!"


SUCCESS!


In [None]:
# %load 03_tensor_soln.py
import numpy as np
from theano import function
import theano.tensor as T


def make_tensor(dim):
    """
    Returns a new Theano tensor with no broadcastable dimensions.
    dim: the total number of dimensions of the tensor.
    """

    return T.TensorType(broadcastable=tuple([False] * dim), dtype='float32')()


def broadcasted_add(a, b):
    """
    a: a 3D theano tensor
    b: a 4D theano tensor
    Returns c, a 4D theano tensor, where

    c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]

    for all i, j, k, l
    """

    return a.dimshuffle(2, 'x', 1, 0) + b


def partial_max(a):
    """
    a: a 4D theano tensor

    Returns b, a theano matrix, where

    b[i, j] = max_{k,l} a[i, k, l, j]

    for all i, j
    """

    return a.max(axis=(1, 2))

a = make_tensor(3)
b = make_tensor(4)
c = broadcasted_add(a, b)
d = partial_max(c)

f = function([a, b], d)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(2, 2, 2).astype(a.dtype)
b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
expected = c_value.max(axis=1).max(axis=1)

actual = f(a_value, b_value)

assert np.allclose(actual, expected), (actual, expected)
print "SUCCESS!"


In [10]:
# %load 04_function_soln.py
from theano import tensor as T
from theano import function


def evaluate(x, y, expr, x_value, y_value):
    """
    x: A theano variable
    y: A theano variable
    expr: A theano expression involving x and y
    x_value: A numpy value
    y_value: A numpy value

    Returns the value of expr when x_value is substituted for x
    and y_value is substituted for y
    """

    return function([x, y], expr)(x_value, y_value)


x = T.iscalar()
y = T.iscalar()
z = x + y
assert evaluate(x, y, z, 1, 2) == 3
print "SUCCESS!"


SUCCESS!


In [None]:
# %load 04_function_soln.py
from theano import tensor as T
from theano import function


def evaluate(x, y, expr, x_value, y_value):
    """
    x: A theano variable
    y: A theano variable
    expr: A theano expression involving x and y
    x_value: A numpy value
    y_value: A numpy value

    Returns the value of expr when x_value is substituted for x
    and y_value is substituted for y
    """

    return function([x, y], expr)(x_value, y_value)


x = T.iscalar()
y = T.iscalar()
z = x + y
assert evaluate(x, y, z, 1, 2) == 3
print "SUCCESS!"


In [12]:
# %load 05_shared_soln.py
import numpy as np
from collections import OrderedDict
from theano import function
from theano import shared


def make_shared(shape):
    """
    Returns a theano shared variable containing a tensor of the specified
    shape.
    You can use any value you want.
    """
    return shared(np.zeros(shape))


def exchange_shared(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Uses get_value and set_value to swap the values stored in a and b
    """
    temp = a.get_value()
    a.set_value(b.get_value())
    b.set_value(temp)


def make_exchange_func(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Returns f
    where f is a theano function, that, when called, swaps the
    values in a and b
    f should not return anything
    """

    updates = OrderedDict()
    updates[a] = b
    updates[b] = a
    f = function([], updates=updates)
    return f


a = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
b = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
exchange_shared(a, b)
assert np.all(a.get_value() == 1.)
assert np.all(b.get_value() == 0.)
f = make_exchange_func(a, b)
rval = f()
assert isinstance(rval, list)
assert len(rval) == 0
assert np.all(a.get_value() == 0.)
assert np.all(b.get_value() == 1.)

print "SUCCESS!"


SUCCESS!


In [None]:
# %load 05_shared_soln.py
import numpy as np
from collections import OrderedDict
from theano import function
from theano import shared


def make_shared(shape):
    """
    Returns a theano shared variable containing a tensor of the specified
    shape.
    You can use any value you want.
    """
    return shared(np.zeros(shape))


def exchange_shared(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Uses get_value and set_value to swap the values stored in a and b
    """
    temp = a.get_value()
    a.set_value(b.get_value())
    b.set_value(temp)


def make_exchange_func(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Returns f
    where f is a theano function, that, when called, swaps the
    values in a and b
    f should not return anything
    """

    updates = OrderedDict()
    updates[a] = b
    updates[b] = a
    f = function([], updates=updates)
    return f


a = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
b = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
exchange_shared(a, b)
assert np.all(a.get_value() == 1.)
assert np.all(b.get_value() == 0.)
f = make_exchange_func(a, b)
rval = f()
assert isinstance(rval, list)
assert len(rval) == 0
assert np.all(a.get_value() == 0.)
assert np.all(b.get_value() == 1.)

print "SUCCESS!"


In [14]:
# %load 06_grad_soln.py
# Fill in the TODOs in this exercise, then run
# python 01_grad.py to see if your solution works!
#
from theano import tensor as T


def grad_sum(x, y, z):
    """
    x: A theano variable
    y: A theano variable
    z: A theano expression involving x and y

    Returns dz / dx + dz / dy
    """

    return sum(T.grad(z, [x, y]))

x = T.scalar()
y = T.scalar()
z = x + y
s = grad_sum(x, y, z)
assert s.eval({x: 0, y: 0}) == 2
print "SUCCESS!"


SUCCESS!


In [None]:
# %load 06_grad_soln.py
# Fill in the TODOs in this exercise, then run
# python 01_grad.py to see if your solution works!
#
from theano import tensor as T


def grad_sum(x, y, z):
    """
    x: A theano variable
    y: A theano variable
    z: A theano expression involving x and y

    Returns dz / dx + dz / dy
    """

    return sum(T.grad(z, [x, y]))

x = T.scalar()
y = T.scalar()
z = x + y
s = grad_sum(x, y, z)
assert s.eval({x: 0, y: 0}) == 2
print "SUCCESS!"


In [16]:
# Exercice 7
# This code has a bug. Run this cell to see it.
# Use Theano flag (easy in shell, harder in ipython) or extra parameter to a function 
# to find the cause and fix it.
# Do not find the bug by inspecting the code. This is to show you how find a bug
# in more complicated case when code inspection isn't working well.
#

import numpy as np
from theano import function
from theano import tensor as T
a = T.vector()
b = T.log(a)
c = T.nnet.sigmoid(b)
d = T.sqrt(c)
e = T.concatenate((d, c), axis=0)
f = b * c * d
g = e + f
h = g / c
fn = function([a], h)
fn(np.ones((3,)).astype(a.dtype))


ValueError: Input dimension mis-match. (input[0].shape[0] = 6, input[1].shape[0] = 3)
Apply node that caused the error: Elemwise{Composite{((i0 / i1) + (i2 * i3))}}[(0, 0)](Join.0, sigmoid.0, Elemwise{log,no_inplace}.0, Elemwise{sqrt,no_inplace}.0)
Toposort index: 4
Inputs types: [TensorType(float32, vector), TensorType(float32, vector), TensorType(float32, vector), TensorType(float32, vector)]
Inputs shapes: [(6,), (3,), (3,), (3,)]
Inputs strides: [(4,), (4,), (4,), (4,)]
Inputs values: ['not shown', array([ 0.5,  0.5,  0.5], dtype=float32), array([ 0.,  0.,  0.], dtype=float32), array([ 0.70710677,  0.70710677,  0.70710677], dtype=float32)]
Outputs clients: [['output']]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [None]:
# %load 07_mode.py
import numpy as np
from theano import function
from theano import tensor as T
from theano import config

a = T.vector()
b = T.log(a)
c = T.nnet.sigmoid(b)
d = T.sqrt(c)
e = T.concatenate((d, c), axis=0)
f = b * c * d
# This is the first bad line
g = e + f
h = g / c
fn = function([a], h, mode='FAST_COMPILE')
fn(np.ones((3,)).astype(a.dtype))


In [17]:
# Exercice 8
# This exercice is different. The initial version already works.
# You must modify it as described below and it should still give the same output.

# Modify and execute the polynomial example to have the reduction(the sum() call) done by scan.
import numpy
import theano
import theano.tensor as T

coefficients = theano.tensor.vector("coefficients")
x = T.scalar("x")
max_coefficients_supported = 10000

# Generate the components of the polynomial
full_range=theano.tensor.arange(max_coefficients_supported)
components, updates = theano.scan(fn=lambda coeff, power, free_var:
                                   coeff * (free_var ** power),
                                outputs_info=None,
                                sequences=[coefficients, full_range],
                                non_sequences=x)

polynomial = components.sum()
calculate_polynomial = theano.function(inputs=[coefficients, x],
                                     outputs=polynomial)

test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print calculate_polynomial(test_coeff, 3)
# 19.0

19.0


In [None]:
# %load 08_scan_polynomial_soln.py
import numpy

import theano
import theano.tensor as tt

coefficients = tt.vector("coefficients")
x = tt.scalar("x")
max_coefficients_supported = 10000

# Generate the components of the polynomial
full_range = tt.arange(max_coefficients_supported)


outputs_info = tt.as_tensor_variable(numpy.asarray(0, 'float64'))

components, updates = theano.scan(
    fn=lambda coeff, power, prior_value, free_var:
    prior_value + (coeff * (free_var ** power)),
    sequences=[coefficients, full_range],
    outputs_info=outputs_info,
    non_sequences=x)

polynomial = components[-1]
calculate_polynomial = theano.function(
    inputs=[coefficients, x],
    outputs=polynomial, updates=updates)

test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print calculate_polynomial(test_coeff, 3)
# 19.0
