In [1]:
import theano
import theano.tensor as T
import theano.tensor.nnet as nnet
import numpy as np

In [21]:
x = T.dvector()
y = T.dscalar()

This was adopted from the Theano tutorial [here](http://outlace.com/Beginner-Tutorial-Theano/)

This tutorial only included a layer function with an activation function, while the examples given in the book kept the last layer linear. I'll test out both of these cases (as well as testing out the sigmoid function vs relu)

In [15]:
def layerActivation(x, w):
    b = np.array([1], dtype=theano.config.floatX)
    new_x = T.concatenate([x, b])
    m = T.dot(w.T, new_x)
    h = nnet.sigmoid(m)
    return h

def layer(x, w):
    b = np.array([1], dtype=theano.config.floatX)
    new_x = T.concatenate([x, b])
    m = T.dot(w.T, new_x)
    return m

In [16]:
def grad_desc(cost, theta):
    alpha = 0.1
    return theta - (alpha * T.grad(cost, wrt=theta))

In [19]:
theta1 = theano.shared(np.array(np.random.rand(3,3), dtype=theano.config.floatX))
theta2 = theano.shared(np.array(np.random.rand(4,1), dtype=theano.config.floatX))

In [20]:
hid1 = layerActivation(x, theta1) # hidden

TypeError: Join() can only join tensors with the same number of dimensions.

To define my cost functions, I'll need to include two versions: one for a mean squared error and one for a mean absolute error. I have named these fcMSE and fcMAE

In [7]:
out1 = T.sum(layerActivation(hid1, theta2)) #output layer
fcMSE = (out1 - y)**2 #cost expression, mean squared error
fcMAE = np.abs(out1 - y) #cost expression, mean absolute error

Lets start out by looking at the mean squared error.

In [8]:
cost = theano.function(inputs=[x, y], outputs=fcMSE, updates=[
        (theta1, grad_desc(fcMSE, theta1)),
        (theta2, grad_desc(fcMSE, theta2))])
run_forward = theano.function(inputs=[x], outputs=out1)

Now I'll define my data. I'm just using the analytic function given in the HW to create a matrix, and define my x and y

In [13]:
from itertools import product

data = []

for i in product(np.arange(0,10), np.arange(0,10)):
    data.append([i[0], i[1], 2*(i[0]**2) - 3*(i[1]**2) + 1])
    
data = np.array(data)

x = data[:,[0,1]].reshape(100,2)
y = np.array(data[:,2])

Now I could use this x and y to examine my results

In [14]:

cur_cost = 0
for i in range(10000):
    for k in range(len(x)):
        cur_cost = cost(x[k], y[k]) #call our Theano-compiled cost function, it will auto update weights
    if i % 500 == 0: #only print the cost every 500 epochs/iterations (to save space)
        print('Cost: %s' % (cur_cost,))

Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0
Cost: 6400.0


In [11]:
x

array([[0, 0],
       [0, 1],
       [0, 2],
       [0, 3],
       [0, 4],
       [0, 5],
       [0, 6],
       [0, 7],
       [0, 8],
       [0, 9],
       [1, 0],
       [1, 1],
       [1, 2],
       [1, 3],
       [1, 4],
       [1, 5],
       [1, 6],
       [1, 7],
       [1, 8],
       [1, 9],
       [2, 0],
       [2, 1],
       [2, 2],
       [2, 3],
       [2, 4],
       [2, 5],
       [2, 6],
       [2, 7],
       [2, 8],
       [2, 9],
       [3, 0],
       [3, 1],
       [3, 2],
       [3, 3],
       [3, 4],
       [3, 5],
       [3, 6],
       [3, 7],
       [3, 8],
       [3, 9],
       [4, 0],
       [4, 1],
       [4, 2],
       [4, 3],
       [4, 4],
       [4, 5],
       [4, 6],
       [4, 7],
       [4, 8],
       [4, 9],
       [5, 0],
       [5, 1],
       [5, 2],
       [5, 3],
       [5, 4],
       [5, 5],
       [5, 6],
       [5, 7],
       [5, 8],
       [5, 9],
       [6, 0],
       [6, 1],
       [6, 2],
       [6, 3],
       [6, 4],
       [6, 5],
       [6,