In [1]:
import numpy
import theano.tensor as T
from theano import function

Using cuDNN version 5103 on context None
Preallocating 10867/11439 Mb (0.950000) on cuda
Mapped name None to device cuda: Tesla K40c (0000:81:00.0)


# Basics: Algebra

In [2]:
x = T.dscalar('x') #dscalar is 0 dimensional arrays (scalar) of doubles
y = T.dscalar('y')
z = x + y
f = function([x,y],z) #f outputs a numpy ndarray w/ 0 dimensions

In [3]:
f(2,3)

array(5.0)

In [4]:
numpy.allclose(f(16.3,12.1),28.4)

True

In [5]:
#z just a var, can use pp (pretty print)
from theano import pp
pp(z)

'(x + y)'

Now, let's add two matrices!

In [6]:
x = T.dmatrix('x') #dmatrix is the type for matrices of doubles
y = T.dmatrix('y')
z = x+y
f = function([x,y],z)

In [7]:
f([[1,2],[3,4]],[[10,20],[30,40]])

array([[ 11.,  22.],
       [ 33.,  44.]])

In [8]:
#can also use numpy arrays directly as inputs
a = numpy.array([[1,2],[3,4]])
b = numpy.array([[10,20],[30,40]])
f(a,b)

array([[ 11.,  22.],
       [ 33.,  44.]])

Exercise: The code below is intended to compute $ a^2 + b^2 + 2 \cdot a \cdot b $

In [9]:
a = T.vector()
b = T.vector()
out = a**2 + b**2 + 2*a*b
f = function([a,b],out)
f([1,2],[4,5]) #will print out [25,49] since 1+4=5 and 2+5=7

array([ 25.,  49.], dtype=float32)

# More examples!

First will be the logistic function (sigmoid)

In [10]:
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = function([x],s)
logistic([[0,1],[-1,-2]]) #will output another 2x2 matrix where we've computed the sigmoid of each of the entries elemntwise

array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])

We'll now verify the following equation: $$ s(x) = \frac{1}{1 + e^{-x}} = \frac{1 + \tanh(x/2)}{2}. $$

In [11]:
s2 = (1 + T.tanh(x/2))/2
logistic2 = function([x],s2)
logistic2([[0,1],[-1,-2]])

array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])

# Computing multiple things at once

In [12]:
a,b = T.dmatrices('a','b')
diff = a - b
abs_diff = abs(diff)
diff_squared = diff**2
f = function([a,b],[diff,abs_diff,diff_squared])

In [13]:
f([[1,1],[1,1]],[[0,1],[2,3]])

[array([[ 1.,  0.],
        [-1., -2.]]), array([[ 1.,  0.],
        [ 1.,  2.]]), array([[ 1.,  0.],
        [ 1.,  4.]])]

# Setting default argument value

e.g. if we have a function that takes two numbers and we want an automatic value for one of them if we only give it one function as input

In [14]:
from theano import In
x,y = T.dscalars('x','y')
z = x + y
f = function([x, In(y, value=1)], z) #the In class lets us create an instance where we can make y's default value 1.
f(33)

array(34.0)

In [15]:
f(33,7)

array(40.0)

# Shared variables

In [16]:
from theano import shared
state = shared(0)
inc = T.iscalar('inc')
accumulator = function([inc],state,updates=[(state,state+inc)])

New things:
The shared thing above constructs shared variables--they have a value that can be shared between functions. They can be used in symbolic expressions, but also have an internal value that defines the value taken on by the symbol in $all$ functions that use it. We can access and modify that value using .get_value() and .set_value()

The updates parameter of the function. We must supply it with a list of pairs of the form (shared_variable, new expression). Each time it runs, it will replace the .value of the shared variable with the new expression. Above, we replace the state's value with the value of state plus the increment.

In [17]:
print(state.get_value())
accumulator(1)
print(state.get_value())
accumulator(300)
print(state.get_value())

0
1
301


In [18]:
state.set_value(-1)
accumulator(3)
print(state.get_value())

2


Here, we'll define another function that can also update the value of the shared variable

In [19]:
decrementor = function([inc],state,updates=[(state,state-inc)])
decrementor(2)
print(state.get_value())

0


If we want to express a formula using the shared variable but do $not$ want to use its value, we can use the givens parameter of the function to replace a node in a graph for the purpose of a function

In [20]:
fn_of_state = state*2 + inc
#this type of foo has to match state, which we replace w/ givens
foo = T.scalar(dtype=state.dtype)
skip_shared = function([inc,foo], fn_of_state, givens=[(state,foo)])
skip_shared(1,3) #we're using 3 as the value of state in this function, but NOT as state.value

print(state.get_value()) #this should still be 0!

0


# Copying Functions

In [21]:
accumulator(10)
print(state.get_value())

10


We'll use a copy to create a similar accumulator with its own internal state using the swap parameter, a dictionary of shared variables to exchange. 

In [22]:
new_state = shared(0)
new_accumulator = accumulator.copy(swap={state:new_state})
new_accumulator(100)
#this should now accumulate new_state, and act just as the original accumulator did!
print(new_state.get_value())

100


In [23]:
print(state.get_value()) #this should be left the same

10


In [24]:
#we'll create a copy with updates removed
null_accumulator = accumulator.copy(delete_updates=True)

In [25]:
null_accumulator(9000)
print(state.get_value())

10


# Using Random Numbers

In [26]:
from theano.tensor.shared_randomstreams import RandomStreams
srng = RandomStreams(seed=234)
rv_u = srng.uniform((2,2)) #random stream of 2x2 matrices drawn from a uniform distribution
rv_n = srng.normal((2,2))
f = function([],rv_u) #calling f() gives random uniform numbers
g = function([],rv_n,no_default_updates=True) #no_default_updates means if we call g multiple times get same numbers
nearly_zeros = function([],rv_u+rv_u - 2*rv_u) # a random var drawn at most once during function execution, so
# this will return approx 0 even though rv_u drawn thrice in the fxn

In [27]:
f_val0 = f()
f_val1 = f() #different numbers

In [28]:
g_val0 = g()
g_val1 = g() #the same numbers as above!

Seeding streams:

In [29]:
rng_val = rv_u.rng.get_value(borrow=True) #get rng val for rv_u
rng_val.seed(89234) #seed the generator
rv_u.rng.set_value(rng_val,borrow=True) #assign back seeded rng

In [30]:
#can seed all vars allocated by RandomStreams object
srng.seed(902340)

Sharing streams between functions:

In [31]:
state_after_v0 = rv_u.rng.get_value().get_state()
nearly_zeros()

v1 = f()
rng = rv_u.rng.get_value(borrow=True)
rng.set_state(state_after_v0)
rv_u.rng.set_value(rng,borrow=True)
v2 = f() #v2 != v1
v3 = f() #v3 == v1

In [32]:
from __future__ import print_function
from theano.sandbox.rng_mrg import MRG_RandomStreams

In [36]:
class Graph():
    def __init__(self,seed=123):
        self.rng = RandomStreams(seed)
        self.y = self.rng.uniform(size=(1,))

In [37]:
g1 = Graph(seed=123)
f1 = function([],g1.y)

In [38]:
g2 = Graph(seed=987)
f2 = function([],g2.y)

In [40]:
f1()

array([ 0.59044123], dtype=float32)

In [41]:
f2()

array([ 0.55421311], dtype=float32)

In [44]:
def copy_random_state(g1,g2):
    if isinstance(g1.rng, MRG_RandomStreams):
        g2.rng.rstate = g1.rng.rstate
        
    for(su1,su2) in zip(g1.rng.state_updates,g2.rng.state_updates):
        su2[0].set_value(su1[0].get_value())

In [45]:
copy_random_state(g1,g2) #now copy state of random num generators
f1()

array([ 0.23715077], dtype=float32)

In [46]:
f2()

array([ 0.23715077], dtype=float32)

# Logistic Regression

In [49]:
rng = numpy.random

N = 400
feats = 784

#generate dataset D = (input_values,target_class)
D = (rng.rand(N,feats), rng.randint(size=N,low=0,high=2))
training_steps = 10000

x = T.dmatrix('x')
y = T.dvector('y')

#initialize weight matrix w randomly, then bias vector b
w = shared(rng.randn(feats),name="w")
b = shared(0.,name="b")

#print("Initial model: ")
#print(w.get_value())
#print(b.get_value())

#construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x,w) - b)) #prob that target = 1
prediction = p_1 > 0.5
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) #the crossentropy cost function!
cost = xent.mean() + 0.01 *  (w**2).sum() #cost to minimize
gw,gb = T.grad(cost,[w,b])


#Compile!
train = function(
            inputs=[x,y],
            outputs=[prediction,xent],
            updates=((w,w-0.1*gw),(b,b-0.1*gb))) #update wt, bias vectors w/ lr = 0.1
predict = function(inputs=[x],outputs=prediction)

#Train
for i in range(training_steps):
    pred,err = train(D[0],D[1])
    
    
print("Final model: ")
print(w.get_value())
print(b.get_value())
print("Target values for D: ")
print(D[1])
print("prediction on D: ")
print(predict(D[0]))

Final model: 
[-0.17144017 -0.12235735 -0.06632164  0.57092876  0.05504196 -0.0648716
  0.04003755  0.07577378 -0.03931145 -0.17406691 -0.0935531  -0.2391577
  0.61475667  0.21190433  0.14095704 -0.06256582  0.00132707  0.00696094
  0.04634598 -0.0507064   0.16730067  0.06043857  0.0847003   0.30302936
 -0.15396835  0.1418503  -0.0903475   0.05249203 -0.18861605  0.04015244
  0.11246356  0.27502742  0.08662615 -0.27868303  0.25967119 -0.00244696
 -0.12203641 -0.39151491 -0.17180096  0.48442904  0.1062769   0.13057024
  0.59204579  0.05835904 -0.19221535  0.06428506  0.1079226  -0.34206992
  0.14868259 -0.2548779  -0.1134257   0.10915145 -0.05214226  0.20209501
 -0.1601143   0.11239614 -0.25225932  0.27854799  0.03342658 -0.01406118
 -0.09763272 -0.03961872  0.06302567  0.18062666  0.01893438  0.03311746
 -0.03956982 -0.05927962  0.27202194  0.02021152  0.23111604  0.295915
 -0.36726737  0.00108652  0.09938558  0.43200296  0.05230785 -0.23255005
 -0.08899803  0.13055701 -0.02591016  0.2

# Derivatives/Gradients in Theano

In [50]:
x = T.dscalar('x')
y = x**2
gy = T.grad(y,x)
pp(gy) #print grad prior to optimization

'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'

In [51]:
f = function([x],gy)
f(4)

array(8.0)

In [54]:
numpy.allclose(f(94.2),188.4)

True

Gradient of the logistic functinion sigmoid:

In [56]:
x = T.dmatrix('x')
s = T.sum(1 / (1 + T.exp(-x)))
gs = T.grad(s,x)
dlogistic = function([x],gs)
dlogistic([[0,1],[-1,-2]])

array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])

Now we'll compute the Jacobian

In [60]:
from theano import scan
x = T.dvector('x')
y = x**2
J,updates = scan(lambda i,y,x: T.grad(y[i],x), sequences=T.arange(y.shape[0]), non_sequences=[y,x])
f = function([x],J,updates=updates)
f([4,4])

array([[ 8.,  0.],
       [ 0.,  8.]])

# Conditions

In [65]:
from theano import tensor as T
from theano.ifelse import ifelse
import theano, time, numpy

a,b = T.scalars('a', 'b')
x,y = T.matrices('x', 'y')

z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))

f_switch = theano.function([a, b, x, y], z_switch,
                           mode=theano.Mode(linker='vm'))
f_lazyifelse = theano.function([a, b, x, y], z_lazy,
                               mode=theano.Mode(linker='vm'))

val1 = 0.
val2 = 1.
big_mat1 = numpy.ones((10000, 1000))
big_mat2 = numpy.ones((10000, 1000))

n_times = 10

tic = time.clock()
for i in range(n_times):
    f_switch(val1, val2, big_mat1, big_mat2)
print('time spent evaluating both values %f sec' % (time.clock() - tic))

tic = time.clock()
for i in range(n_times):
    f_lazyifelse(val1, val2, big_mat1, big_mat2)
print('time spent evaluating one value %f sec' % (time.clock() - tic))

TypeError: Bad input argument to theano function with name "<ipython-input-65-62025576ae8d>:12" at index 2 (0-based).  
Backtrace when that variable is created:

  File "/home/dbashir/.local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/dbashir/.local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/dbashir/.local/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/dbashir/.local/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/dbashir/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/dbashir/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2822, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/dbashir/.local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-65-62025576ae8d>", line 6, in <module>
    x,y = T.matrices('x', 'y')
TensorType(float32, matrix) cannot store a value of dtype float64 without risking loss of precision. If you do not mind this loss, you can: 1) explicitly cast your data to float32, or 2) set "allow_input_downcast=True" when calling "function". Value: "array([[ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       ..., 
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.]])"

# Loops/Scan

In [69]:
import numpy as np

# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")

results,updates = theano.scan(lambda v: T.tanh(T.dot(v,W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)

#test values
x = np.eye(2, dtype = theano.config.floatX)
w = np.ones((2,2), dtype = theano.config.floatX)
b = np.ones((2), dtype = theano.config.floatX)
b[1] = 2

print(compute_elementwise(x,w,b))

#compare w numpy
print(np.tanh(x.dot(w) + b))

[[ 0.96402758  0.99505478]
 [ 0.96402758  0.99505478]]
[[ 0.96402758  0.99505478]
 [ 0.96402758  0.99505478]]


In [71]:
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")

results,updates = theano.scan(lambda y,p,x_tm1: T.tanh(T.dot(x_tm1,W) + T.dot(y,U) + T.dot(p,V)),
                             sequences = [Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X,W,Y,U,P,V], outputs=results)

#test values
x = np.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = np.ones((2,2), dtype=theano.config.floatX)
y = np.ones((5,2), dtype=theano.config.floatX)
y[0, :] = 3
u = np.ones((2,2), dtype=theano.config.floatX)
p = np.ones((5,2), dtype=theano.config.floatX)
p[0, :] = 3
v = np.ones((2, 2), dtype=theano.config.floatX)

print(compute_seq(x, w, y, u, p, v))

# comparison with numpy
x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
    x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print(x_res)

[[ 0.99999994  0.99999994]
 [ 0.99998772  0.99998772]
 [ 0.99998772  0.99998772]
 [ 0.99998772  0.99998772]
 [ 1.          1.        ]]
[[ 0.99999994  0.99999994]
 [ 0.99998772  0.99998772]
 [ 0.99998772  0.99998772]
 [ 0.99998772  0.99998772]
 [ 1.          1.        ]]


# Convolution