In [1]:
import numpy as np
# parameter setup
n = 300
d = 14
k = 20

In [2]:
def relu(data):
    return np.maximum(data, 0)

def dRelu(data, Z):
    result = np.zeros(Z.shape)
    result[Z>0] = data[Z>0]
    return result

In [3]:
def forward(X, W, v):
    Z_trans = relu(W@X.T) # mat-mat
    Z = Z_trans.T # trans
    yhat = Z@v # mat-vec
    return Z, yhat

In [4]:
def backward(error, Z, v, X, W):
    n, d = X.shape 
    dv = Z.T @ error  # mat-vector
    dZ = np.outer(error, v) # outer product
    dp = dRelu(dZ, Z)
    dW = dp.T @ X  # mat-mat
    return dW, dv

In [5]:
def update(W, v, dW, dv, alpha):
    W -= (alpha/n) * dW
    v -= (alpha/n) * dv
    return W, v

In [6]:
# setting up data
W = np.random.rand(k, d)
v = np.random.rand(k)
X = np.random.rand(n, d)
y = np.random.rand(n)

# training loop
alpha = 0.001
for iteration in range(20):
    Z, yhat = forward(X, W, v)
    mse = np.sum((y - yhat) ** 2) / n
    print("mse: ", mse)
    dW, dv = backward(yhat - y, Z, v, X, W)
    W, v = update(W, v, dW, dv, alpha)

mse:  1062.0920266355738
mse:  535.5798159056685
mse:  279.76656603244106
mse:  148.79588918389402
mse:  79.91747568322813
mse:  43.17539443000831
mse:  23.42529924704887
mse:  12.764588562345306
mse:  6.996935903186636
mse:  3.8725489057362967
mse:  2.1788299210238984
mse:  1.2602927262684043
mse:  0.7620276971358071
mse:  0.49169552759715224
mse:  0.3450063211467373
mse:  0.26539548939203833
mse:  0.22217835233457406
mse:  0.19870764866942242
mse:  0.1859512344940013
mse:  0.17900843762497043
