In [90]:
import numpy as np
# parameter setup
n = 300
d = 14
k = 20

In [91]:
def relu(data):
    return np.maximum(data, 0)

def dRelu(data, Z):
    result = np.zeros(Z.shape)
    result[Z>0] = data[Z>0]
    return result

In [92]:
def forward(X, W, v):
    Z_trans = relu(W@X.T) # mat-mat
    Z = Z_trans.T # trans
    yhat = Z@v # mat-vec
    return Z, yhat

In [93]:
def backward(error, Z, v, X, W):
    n, d = X.shape 
    dv = Z.T @ error / n # mat-vector
    dZ = np.outer(error, v) # outer product
    dp = dRelu(dZ, Z)
    dW = dp.T @ X /n # mat-mat
    return dW, dv

In [94]:
def update(W, v, dW, dv, alpha):
    W -= alpha * dW
    v -= alpha * dv
    return W, v

In [96]:
# setting up data
W = np.random.rand(k, d)
v = np.random.rand(k)
X = np.random.rand(n, d)
y = np.random.rand(n)

# training loop
alpha = 0.001
for iteration in range(20):
    Z, yhat = forward(X, W, v)
    mse = np.sum((y - yhat) ** 2) / n
    print("mse: ", mse)
    dW, dv = backward(yhat - y, Z, v, X, W)
    W, v = update(W, v, dW, dv, alpha)

mse:  1501.3498801251972
mse:  767.6849596459
mse:  412.0526727358918
mse:  226.76410157520561
mse:  126.52067743320958
mse:  71.16300406001197
mse:  40.240892363028664
mse:  22.856135484372377
mse:  13.046105954493436
mse:  7.498648724053874
mse:  4.357753906737876
mse:  2.5781269862709917
mse:  1.569346459062586
mse:  0.9973556546080302
mse:  0.6729606085397626
mse:  0.4889483575545233
mse:  0.3845420766512706
mse:  0.32528140878698203
mse:  0.2916248705981824
mse:  0.27249014811609606
