In [14]:
import numpy as np

wGreenNeuron = np.array([1.73673761, 1.89791391, -2.10677342, -0.14891209, 0.58306155])
wOrangeNeuron = np.array([-2.25923303, 0.13723954, -0.70121322, -0.62078008, -0.47961976])
wOutputNeuron = np.array([1.20973877, -1.07518386, 0.80691921, -0.29078347, -0.22094764, -0.16915604,
    1.10083444, 0.08251052, -0.00437558, -1.72255825, 1.05755642, -2.51791281, -1.91064012])

X = np.array([
    [0, 0, 0.8, 0.4, 0.4, 0.1, 0, 0, 0],
    [0, 0.3, 0.3, 0.8, 0.3, 0, 0, 0, 0],
    [0, 0, 0, 0, 0.3, 0.3, 0.8, 0.3, 0],
    [0, 0, 0, 0, 0, 0.8, 0.4, 0.4, 0.1],
    [0.8, 0.4, 0.4, 0.1, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0.3, 0.3, 0.8, 0.3],
])

y = np.array([-1, 1, 1, -1, -1, 1])

## Forward pass

In [15]:
def shared_layer(w, n):
    nW = w.shape[0] - 1 # number of weights (excluding bias) in the layer
    forward = lambda s: np.tanh(np.dot(w, np.append(s, 1.0)))
    return lambda x: np.array([forward(x[i: i+nW]) for i in range(n)])

layerGreen = shared_layer(wGreenNeuron, 6)
layerOrange = shared_layer(wOrangeNeuron, 6)
layerOutput = lambda alpha, beta: np.tanh(np.dot(wOutputNeuron, np.append(np.concatenate((alpha, beta)), 1.0) ))
model = lambda x: layerOutput(layerGreen(x), layerOrange(x))

In [16]:
model(X[0])

-0.98629150472508542

## Backward pass

**Output layer**

$$
\delta^H = \frac{\partial E}{\partial \nu^H} = -2 e (1-(y^H)^2)
$$

$$
\Delta w_{i}^H = -\alpha \delta^H y_i^{H-1}
$$

$$
\Delta b^H = -\alpha \delta^H
$$

**Hidden layer**

$$
\delta_j^h = \frac{\partial E}{\partial \nu_j^h} = \left( \sum_{l = 1}^{L^{h+1}} \frac{\partial E}{\partial \nu_l^{h+1}} \frac{\partial \nu_l^{h+1}}{\partial y_j^h} \right) \frac{\partial y_j^h}{\partial \nu_j^h}
=  \frac{\partial E}{\partial \nu^H} \frac{\partial \nu^H}{\partial y_j^h}  \frac{\partial y_j^h}{\partial \nu_j^h}
=\delta^H w_j^H (1-(y_j^h)^2)
$$

$$
\Delta w_{i}^h = -\alpha \sum_{j=1}^{L^0} {\delta_j^h \frac{\partial \nu_j^h}{\partial w_i^h}}
=-\alpha \sum_{j=1}^{L^0} {\delta_j^h  {x_{j+i-1}}}
$$

$$
\Delta b^h = -\alpha \sum_{j=1}^{L^0} {\delta_j^h \frac{\partial \nu_j^h}{\partial b^h}}
=-\alpha \sum_{j=1}^{L^0} {\delta_j^h }
$$

In [29]:
def train_one(x, d, alpha):
    y_green = layerGreen(x)
    y_orange = layerOrange(x)
    y_output = layerOutput(y_green, y_orange)
    e = d - y_output
    delta_out = -2.0 * e * (1.0 - y_output*y_output)
    update_out = -alpha * delta_out * np.append(np.concatenate((y_green, y_orange)), 1.0)
    delta_green = delta_out * wOutputNeuron[:6] * (1.0 - y_green*y_green)
    delta_orange = delta_out * wOutputNeuron[6:12] * (1.0 - y_orange*y_orange)
    update_green = np.array(
        [-alpha * np.dot(delta_green, x[i: i+delta_green.shape[0]]) for i in range(wGreenNeuron.shape[0]-1)] +
        [-alpha * np.sum(delta_green)])
    update_orange = np.array(
        [-alpha * np.dot(delta_orange, x[i: i+delta_orange.shape[0]]) for i in range(wOrangeNeuron.shape[0]-1)] +
        [-alpha * np.sum(delta_orange)])
    return locals()
    
train_one(X[0], y[0], 0.2)

{'update_orange': array([  3.16047797e-05,   1.38911525e-05,  -3.13270817e-05,
         -1.96539452e-05,   2.07629719e-04]),
 'update_green': array([ -2.93791112e-06,   3.72379012e-05,  -3.12354539e-05,
         -4.94013179e-06,   5.05783631e-06]),
 'update_out': array([  1.22681049e-04,  -1.24430975e-04,  -1.42433219e-04,
         -1.41761412e-04,  -1.34243954e-04,  -9.54300872e-05,
          1.28231347e-04,   1.06848843e-04,   1.47584893e-04,
          1.32142694e-04,   1.31181625e-04,   9.07603998e-05,
         -1.49307818e-04]),
 'delta_out': 0.00074653909200695517,
 'e': -0.013708495274914578,
 'y_output': -0.98629150472508542,
 'y_orange': array([-0.8588388 , -0.71562792, -0.98846058, -0.88503533, -0.8785985 ,
        -0.6078744 ]),
 'y_green': array([-0.82166527,  0.83338553,  0.95395687,  0.94945739,  0.89910867,
         0.63914997]),
 'd': -1,
 'x': array([ 0. ,  0. ,  0.8,  0.4,  0.4,  0.1,  0. ,  0. ,  0. ]),
 'delta_orange': array([  2.15641153e-04,   3.00519004e-05,  -7.4

**Compared with auto-differentiation**

In [None]:
def train_one_autodiff(x, d, alpha):
    y_green = layerGreen(x)
    y_orange = layerOrange(x)
    y_output = layerOutput(y_green, y_orange)
#     e = d - y_output
#     delta_out = -2.0 * e * (1.0 - y_output*y_output)
#     update_out = -alpha * delta_out * np.append(np.concatenate((y_green, y_orange)), 1.0)
#     update_green = np.array(
#         [-alpha * np.dot(delta_green, x[i: i+delta_green.shape[0]]) for i in range(wGreenNeuron.shape[0]-1)] +
#         [-alpha * np.sum(delta_green)])
#     update_orange = np.array(
#         [-alpha * np.dot(delta_orange, x[i: i+delta_orange.shape[0]]) for i in range(wOrangeNeuron.shape[0]-1)] +
#         [-alpha * np.sum(delta_orange)])
    return locals()