In [1]:
import numpy as np

In [2]:
def linear_forward(X, w, b):
    return X @ w + b

def relu_forward(X):
    return np.maximum(X, 0)

def linear_backward(X, w, b, dlinear):
    db = dlinear.mean(axis=0)
    dw = X.T @ dlinear
    dX = dlinear @ w.T
    return dX, dw, db

def relu_backward(X, drelu):
    return (X > 0) * drelu

In [3]:
def error_func(predicted, true):
    return np.square(predicted - true).mean()

def derror_func(predicted, true):
    return (predicted - true) * (2 / np.prod(predicted.shape))

In [4]:
INPUT_SIZE = 10
OUTPUT_SIZE = 2
LINEAR_SIZE = 10
SAMPLE_SIZE = 100

lr = 0.01

In [5]:
inp = np.random.normal(size=(100, INPUT_SIZE))
out = np.random.normal(size=(100, OUTPUT_SIZE))

linear1_w = np.random.normal(size=(INPUT_SIZE, LINEAR_SIZE))
linear1_b = np.random.normal(size=LINEAR_SIZE)
linear2_w = np.random.normal(size=(LINEAR_SIZE, OUTPUT_SIZE))
linear2_b = np.random.normal(size=OUTPUT_SIZE)

lr = 0.01
for i in range(500):

    linear1_out = linear_forward(inp, linear1_w, linear1_b)
    relu_out = relu_forward(linear1_out)
    linear2_out = linear_forward(relu_out, linear2_w, linear2_b)

    error = error_func(linear2_out, out)
    if i % 50 == 0:
        print(i, error)
    dlinear2_out = derror_func(linear2_out, out)

    drelu_out, dlinear2_w, dlinear2_b = linear_backward(relu_out, linear2_w, linear2_b, dlinear2_out)
    dlinear1_out = relu_backward(linear1_out, drelu_out)
    dinp, dlinear1_w, dlinear1_b = linear_backward(inp, linear1_w, linear1_b, dlinear1_out)
    
    linear1_w -= lr * dlinear1_w
    linear1_b -= lr * dlinear1_b
    linear2_w -= lr * dlinear2_w
    linear2_b -= lr * dlinear2_b
print(i, error)

0 24.54572734903997
50 2.436612368551602
100 1.6554576287017528
150 1.4168705912377784
200 1.2947483194218647
250 1.213519032381727
300 1.163767340124988
350 1.1361367378962677
400 1.1160875356622313
450 1.0994470453328942
499 1.0853518093838914


# Dropout

In [6]:
def dropout_forward(X, p, eval):
    if eval:
        return X * (1 - p)
    mask = (np.random.uniform(low=0, high=1.0, size=X.shape) > p)
    return X * mask, mask

def dropout_backward(mask, ddropout):
    return mask * ddropout

In [7]:
p = 0.5

In [8]:
inp = np.random.normal(size=(100, INPUT_SIZE))
out = np.random.normal(size=(100, OUTPUT_SIZE))

linear1_w = np.random.normal(size=(INPUT_SIZE, LINEAR_SIZE))
linear1_b = np.random.normal(size=LINEAR_SIZE)
linear2_w = np.random.normal(size=(LINEAR_SIZE, OUTPUT_SIZE))
linear2_b = np.random.normal(size=OUTPUT_SIZE)

lr = 0.01
for i in range(500):

    linear1_out = linear_forward(inp, linear1_w, linear1_b)
    relu_out = relu_forward(linear1_out)
    dropout_out, dropout_mask = dropout_forward(relu_out, p, False)
    linear2_out = linear_forward(dropout_out, linear2_w, linear2_b)

    error = error_func(linear2_out, out)
    if i % 50 == 0:
        print(i, error)
    dlinear2_out = derror_func(linear2_out, out)

    ddropout_out, dlinear2_w, dlinear2_b = linear_backward(relu_out, linear2_w, linear2_b, dlinear2_out)
    drelu_out = dropout_backward(dropout_mask, ddropout_out)
    dlinear1_out = relu_backward(linear1_out, drelu_out)
    dinp, dlinear1_w, dlinear1_b = linear_backward(inp, linear1_w, linear1_b, dlinear1_out)
    
    linear1_w -= lr * dlinear1_w
    linear1_b -= lr * dlinear1_b
    linear2_w -= lr * dlinear2_w
    linear2_b -= lr * dlinear2_b
print(i, error)

0 29.514139819124566
50 4.373964836022686
100 2.7392116645099356
150 2.061508707697005
200 1.8867119014298404
250 1.6556199967182132
300 1.315392639414344
350 1.3191449906631487
400 1.4437900664847456
450 1.2892275810071152
499 1.319652864542099
