In [228]:
import numpy as np
import matplotlib.pyplot as pp
# import scipy.linalg as la
%matplotlib inline

data = np.loadtxt(open("RegressionData.txt","rb"), delimiter=" ", skiprows=0)
# print data

X = data[:,0]  # random numbers x_n drawn from uniform distribution over [0,1]
print X
Y = data[:,1]  # labels/target values t_n for x_n, generated using 2*pi*x_n, gaussian noise with standard deviation sigma=0.25
print Y

[ 0.5503  0.9206  0.5359  0.6081  0.0202  0.8545  0.2357  0.4847  0.3996
  0.1957]
[-0.5894 -0.2507 -0.0468 -0.3402  0.2857 -1.0683  0.8605 -0.0801  0.6837
  1.185 ]


# 3.1

In [292]:
def init(X, layers=[10, 3, 10]):
    rows = len(X)+1  # include bias node
    columns = len(layers)

    # activations, 3 columns: bias + input layer, 1 hidden layer, output layer
    S = np.zeros((rows, columns))
    
    # bias
    S[0,:] = -1.

    # connection weights W[from-layer][from-index, to-index]
    W = [np.random.uniform(-0.5, 0.5, rows*rows).reshape(rows, rows) for i in range(columns-1)]
#     W = [np.ones((rows, rows)) for i in range(columns-1)]
    for w in range(len(W)):
        left = layers[w]  # max from-index
        right = layers[w+1]  # max to-index
        
        for i in range(rows):
            for j in range(rows):
                # only allow weights between nodes that exist in the model
                # may not come from too high index
                # may not go to too high index
                # may not go to bias node
                if i > left or j > right or j == 0:
                    W[w][i,j] = 0.
#         print W[w], "W["+str(w)+"]"
    
    # transfer fct
    f = np.tanh
#     f = lambda x: x
    # 1st derivative of transfer fct
    fd = lambda h : (1./np.cosh(h))**2

    return S, W, f, fd

# calculate activations
def forward(S, W, X, f):
    rows, columns = S.shape
    
    # init
    H = np.zeros(S.shape)
    S[1:,0] = X.T
    H[1:,0] = X.T
    
    # propagation
    for v in range(1, columns):
        S[1:,v] = H[1:,v] = W[v-1].T.dot(S[:,v-1])[1:]
        if v < columns-1:
            S[1:,v] = f(H[1:,v])
    
    return H, S

# calculate output error as quadratic error
def output_error(Yt, Yx):  # target labels, computed labels
#     print Yt, len(Yt)
#     print Yx, len(Yx)
    return ((Yt-Yx)**2)/2.

# calculate local error for each training point
def backward(S, H, W, Eo, fd):
    rows, columns = S.shape
    
    # init
    D = np.zeros((rows, columns))
    D[1:,-1] = 1  # =1 if f = identity
    
    # propagate
    for v in range(columns-1)[::-1]:
        D[:,v] = fd(H[:,v]) * (D[:,v+1].T.dot(W[v]))
    
    return D

def gradient(S, D, Yt, Yx):
    rows, columns = S.shape
    # 1st factor as per script p. 17 (1.9)
    dedy = np.insert(Yx - Yt, 0, 0)  # row vector
    print dedy, "dedy"
    
    # 2nd factor as per script p. 18 (1.11)
    dydw = [D[:,i+1,np.newaxis] * S[np.newaxis,:,i] for i in range(columns-1)]
    # dydw should now be a quadratic matrix indexed as dydw[from,to]
#     print dydw, "dydw"
    print dydw[0].shape, "dydw[0].shape"
    
    # script p. 17 (1.8)
    dedw = [dydw[i] * dedy[np.newaxis,:] for i in range(len(dydw))]
    print dedw[i].shape, "dedw[i].shape"
    
    # script p. 17 (1.7)
    N = len(Yt)
    G = (1./N) * (np.sum(dedw, 1))
    
    return G

def weight_update(W, G, eta):
    print W[0].shape
    print G.shape
    
    for w in range(len(W)):
        W[w] = W[w] - eta*G[w]
    
    return W

def ex31(X, Yt, S, W, f, fd):
#     rows, columns = S.shape
#     W[0][1,1] = 1
#     W[1][1,3] = 1
    H, S = forward(S, W, X, f)
#     print H, "total input of transfer fct: H"
    print S, "activations S"
    
    Eo = output_error(Yt, S[1:,-1])
#     print Eo, "output error"
    
    D = backward(S, H, W, Eo, fd)
    print D, "local error D"

    G = gradient(S, D, Yt, S[1:,-1])
    print G, "gradient eta*G"
    print G.shape, "gradient shape"
    
    eta = .5
    W = weight_update(W, G, eta)

# cache variables
_S, _W, _f, _fd = init(X)
ex31(X, Y, _S, _W, _f, _fd)

[[-1.         -1.         -1.        ]
 [ 0.5503      0.19311316  0.16833644]
 [ 0.9206      0.69444663 -0.20397982]
 [ 0.5359     -0.71719249  0.54402566]
 [ 0.6081      0.         -0.15778227]
 [ 0.0202      0.          0.52256829]
 [ 0.8545      0.         -0.64352454]
 [ 0.2357      0.         -0.32804654]
 [ 0.4847      0.         -0.07573577]
 [ 0.3996      0.          0.83123771]
 [ 0.1957      0.         -0.08519495]] activations S
[[ 0.          0.          0.        ]
 [ 0.58233134 -0.4416484   1.        ]
 [ 0.13213969  0.02561372  1.        ]
 [-0.32902132 -0.42620419  1.        ]
 [ 0.          0.04266189  1.        ]
 [ 0.          0.06881398  1.        ]
 [ 0.         -0.68065353  1.        ]
 [ 0.         -0.05264848  1.        ]
 [ 0.          0.32292986  1.        ]
 [ 0.          0.71617221  1.        ]
 [ 0.          0.6170374   1.        ]] local error D
[ 0.          0.75773644  0.04672018  0.59082566  0.18241773  0.23686829
  0.42477546 -1.18854654  0.00436423  0