## Baby step - Algebra

In [20]:
import numpy
import theano.tensor as T
from theano import function
import theano

### Adding two Scalars

In [4]:
x = T.dscalar('x') #creating a symbol. dscalar is a 0-D array of doubles
y = T.dscalar('y')
z = x+y #another tensor variable
f = function([x,y],z) #create a function with x,y as inputs and z as output

In [5]:
f(2,3) # we have created a function f(x,y)

array(5.0)

In [6]:
numpy.allclose(f(16.3,12.1),28.4)

True

In [9]:
k = T.dscalar() #create an unnamed symbol

In [10]:
from theano import pp
print(pp(z)) #pretty print out the computation associated to z

(x + y)


In [14]:
z.eval({x: 16.3,y:12.1}) #no need to create the function([x,y],z)

array(28.4)

eval() will be slow the first time you call it on a variable – it needs to call function() to compile the expression behind the scenes. Subsequent calls to eval() on that same variable will be fast, because the variable caches the compiled function. 

### Adding two Matrices

In [15]:
x = T.dmatrix('x')
y = T.dmatrix('y')
z = x+y
f = function([x,y],z)

In [16]:
f([[1,2],[3,4]],[[10,20],[30,40]])

array([[ 11.,  22.],
       [ 33.,  44.]])

In [18]:
import numpy
f(numpy.array([[1,2],[3,4]]),numpy.array([[10,20],[30,40]])) 
#we can also use numpy arrays as input

array([[ 11.,  22.],
       [ 33.,  44.]])

### Exercise

In [22]:
a = T.vector() #declare a variable
out = a + a**10 #build a symbolic expression
f = function([a],out)
print(f([0,1,2]))

[    0.     2.  1026.]


In [24]:
b = T.vector()
out = a**2 + b**2 + 2*a*b
f = function([a,b],out)
print f([1,2],[4,5])

[ 25.  49.]


## More examples

### Logistic function

In [26]:
x = T.dmatrix('x')
s = 1/(1+T.exp(-x))
logistic = function([x],s)
logistic([[0,1],[-1,-2]])

array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])

Operations are elementwise because all of its operations, division, addition, exponentiation and division are themselves elementwise operations

In [27]:
s2 = (1+T.tanh(x/2))/2
logistic2 = function([x],s2)
logistic2([[0,1],[-1,-2]])

array([[ 0.5       ,  0.73105858],
       [ 0.26894142,  0.11920292]])

#### Computing more tha one thing at the same time

In [30]:
a,b, = T.dmatrices('a','b')
diff = a-b
abs_diff = abs(diff)
diff_squred = diff**2
f=function([a,b],[diff,abs_diff,diff_squred])
f([[1, 1], [1, 1]], [[0, 1], [2, 3]])

[array([[ 1.,  0.],
        [-1., -2.]]), array([[ 1.,  0.],
        [ 1.,  2.]]), array([[ 1.,  0.],
        [ 1.,  4.]])]

In [33]:
f([[1, 1], [1, 1]], [[0, 1], [2, 3]])[2]

array([[ 1.,  0.],
       [ 1.,  4.]])

#### Setting default value to argument/

In [35]:
x,y = T.dscalars('x','y')
z = x+y
f = function([x,theano.In(y,value=1)],z)
print f(33)
print f(33,2)

34.0
35.0


### Using shared Variables

In [37]:
state = theano.shared(0) #its value is shared between many functions
inc = T.iscalar('inc')
accumulator = function([inc],state,updates = [(state,state+inc)])
#the updates means that whenever this function runs, it will replace the .value
# of each shared variable with the result of the corresponding expression

In [38]:
#accumulator replaces the state's value with the sum of the state and
#increment amount
print(state.get_value())
accumulator(1)

0


array(0)

In [39]:
print state.get_value()
accumulator(300)
print state.get_value()

1
301


In [40]:
state.set_value(-1) #reset the state
print accumulator(3)
print state.get_value()

-1
2


In [41]:
#we can define more than one function to use the same shared variable
decrementor = function([inc],state,updates=[(state,state-inc)])
print decrementor(2)
print state.get_value()

2
0


In [43]:
#we do not want to use the value of state hence we use the givens paramater
#of function
fn_of_state = state* 2 + inc #fn_of_state is a function defined by state
foo = T.scalar(dtype = state.dtype)
skip_shared = function([inc,foo],fn_of_state,givens = [(state,foo)])
print skip_shared(1,3) #we're using 3 for the state, not state.value
print state.get_value()

7
0


In practice, a good way of thinking about the givens is as a mechanism that allows you to replace any part of your formula with a different expression that evaluates to a tensor of same shape and dtype.

### Copying functions

In [45]:
state = theano.shared(0)
inc = T.iscalar('inc')
accumulator = function([inc],state,updates = [(state,state+inc)])
print accumulator(10)
print state.get_value()

0
10


In [47]:
# create a copy of the accumulator with a new shared variable
new_state = theano.shared(0)
new_accumulator = accumulator.copy(swap={state:new_state})
print new_accumulator(100)
new_state.get_value()

[array(0)]


array(100)

In [48]:
print state.get_value() #state of first function is left untouched

10


In [58]:
#we create a copy with updates removed
null_accumulator = accumulator.copy()
print null_accumulator(9000)
print state.get_value()

[array(18010)]
27010


## Using random numbers

In [59]:
from theano.tensor.shared_randomstreams import RandomStreams

In [61]:
srng = RandomStreams(seed=234)
rv_u = srng.uniform((2,2))
rv_n = srng.normal((2,2))
f = function([],rv_u)
g = function([],rv_n,no_default_updates=True) #not updating rv_n.rng
nearly_zeros = function([],rv_u+rv_u - 2*rv_u)

In [62]:
f_val0 = f()
f_val1 = f()

In [63]:
print f_val0, f_val1

[[ 0.12672381  0.97091597]
 [ 0.13989098  0.88754825]] [[ 0.31971415  0.47584377]
 [ 0.24129163  0.42046081]]


In [64]:
g_val0 = g()
g_val1 = g()
print g_val0, g_val1

[[ 0.37328447 -0.65746672]
 [-0.36302373 -0.97484625]] [[ 0.37328447 -0.65746672]
 [-0.36302373 -0.97484625]]


The extra argument no_default_updates = True makes the RNG state not affected by calling the returned function. So calling $g$ multiple times will return the same numbers

In [65]:
nearly_zeros()

array([[ 0.,  0.],
       [ 0.,  0.]])

In [66]:
rng_val = rv_u.rng.get_value(borrow=True)   # Get the rng for rv_u
rng_val.seed(89234)                         # seeds the generator
rv_u.rng.set_value(rng_val, borrow=True)    # Assign back seeded rng

In [67]:
srng.seed(902340) 

## Derivatives in Theano

In [68]:
x = T.dscalar('x')
y = x **2
gy = T.grad(y,x)
theano.pp(gy)

'((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'

In [69]:
f = function([x],gy)
f(4)

array(8.0)

In [70]:
x = T.dmatrix('x')
s = T.sum(1/(1+T.exp(-x)))
gs = T.grad(s,x)
dlogistic = function([x],gs)
dlogistic([[0,1],[-1,-2]])

array([[ 0.25      ,  0.19661193],
       [ 0.19661193,  0.10499359]])

### Comptuing the Jacobian

In [71]:
x = T.dvector('x')
y = x ** 2
J, updates = theano.scan(lambda i, y,x : T.grad(y[i], x), 
                         sequences=T.arange(y.shape[0]), non_sequences=[y,x])
#we generate a sequence of ints from 0 to y.shape[0] using T.arange.
#tghen we loop through this sequence at each step, we compute the 
#gradient of element y[i] with respect to x. scan automatically concatenates all of
# these rows, generating a matrix which corresponds
#to the Jacobian
f = theano.function([x], J, updates=updates)
f([4, 4])

array([[ 8.,  0.],
       [ 0.,  8.]])

## Computing the Hessian

In [72]:
x = T.dvector('x')
y = x **2
cost = y.sum()
gy = T.grad(cost,x)
H, updates = theano.scan(lambda i, gy,x : T.grad(gy[i],x),
                        sequences = T.arange(gy.shape[0]),
                        non_sequences = [gy,x])
f = theano.function([x],H,updates = updates)
f([4,4])

array([[ 2.,  0.],
       [ 0.,  2.]])