In [1]:
import numpy as np

In [2]:
a = np.array([1, 2, 3])
print("a: ", a)
b = np.array([(1, 2, 3), (4, 5, 6)])
print("b:\n", b)
b1 = np.array([(1, 2, 3), (4, 5, 6)], dtype=float)
print("b1:\n", b1)

a:  [1 2 3]
b:
 [[1 2 3]
 [4 5 6]]
b1:
 [[1. 2. 3.]
 [4. 5. 6.]]


In [3]:
np.linspace(0,2,9)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

In [4]:
mat = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

In [5]:
print("Martrix 1:\n", mat)
print("\nMatrix Addition:\n", mat + mat, "\n\nMatrix Subtraction:\n", mat - mat)
print("\nMatrix Multiplication:\n", mat * mat, "\n\nMatrix Division:\n", mat / mat)
print("\nMatrix Exponentiation:\n", mat ** 2, "\n\nMatrix Transpose:\n", mat.T)

Martrix 1:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]

Matrix Addition:
 [[ 2  4  6]
 [ 8 10 12]
 [14 16 18]] 

Matrix Subtraction:
 [[0 0 0]
 [0 0 0]
 [0 0 0]]

Matrix Multiplication:
 [[ 1  4  9]
 [16 25 36]
 [49 64 81]] 

Matrix Division:
 [[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]

Matrix Exponentiation:
 [[ 1  4  9]
 [16 25 36]
 [49 64 81]] 

Matrix Transpose:
 [[1 4 7]
 [2 5 8]
 [3 6 9]]


2-Layer Neural Network

Input → Linear → ReLU → Linear → Output


In [6]:
import numpy as np

np.random.seed(42)

# dataset
X = np.random.randn(100, 3) # 100 samples, 3 features
y = np.random.randn(100, 1) # regression target

print(X, y)

[[ 0.49671415 -0.1382643   0.64768854]
 [ 1.52302986 -0.23415337 -0.23413696]
 [ 1.57921282  0.76743473 -0.46947439]
 [ 0.54256004 -0.46341769 -0.46572975]
 [ 0.24196227 -1.91328024 -1.72491783]
 [-0.56228753 -1.01283112  0.31424733]
 [-0.90802408 -1.4123037   1.46564877]
 [-0.2257763   0.0675282  -1.42474819]
 [-0.54438272  0.11092259 -1.15099358]
 [ 0.37569802 -0.60063869 -0.29169375]
 [-0.60170661  1.85227818 -0.01349722]
 [-1.05771093  0.82254491 -1.22084365]
 [ 0.2088636  -1.95967012 -1.32818605]
 [ 0.19686124  0.73846658  0.17136828]
 [-0.11564828 -0.3011037  -1.47852199]
 [-0.71984421 -0.46063877  1.05712223]
 [ 0.34361829 -1.76304016  0.32408397]
 [-0.38508228 -0.676922    0.61167629]
 [ 1.03099952  0.93128012 -0.83921752]
 [-0.30921238  0.33126343  0.97554513]
 [-0.47917424 -0.18565898 -1.10633497]
 [-1.19620662  0.81252582  1.35624003]
 [-0.07201012  1.0035329   0.36163603]
 [-0.64511975  0.36139561  1.53803657]
 [-0.03582604  1.56464366 -2.6197451 ]
 [ 0.8219025   0.08704707

In [7]:
input_dim = 3
hidden_dim = 5
output_dim = 1

# He initialization for ReLU
W1 = np.random.randn(input_dim, hidden_dim) * np.sqrt(2 / input_dim)
b1 = np.zeros((1, hidden_dim))

W2 = np.random.randn(hidden_dim, output_dim) * np.sqrt(2 / hidden_dim)
b2 = np.zeros((1, output_dim))

W1, b1, W2, b2

(array([[-1.30184473, -0.48938766,  0.00428146,  0.03835949, -0.36747692],
        [ 0.50855484, -0.87170843, -0.11625236,  0.09822097,  0.42003755],
        [ 0.58103111, -0.91826642, -1.25259898,  1.04321876,  0.27133325]]),
 array([[0., 0., 0., 0., 0.]]),
 array([[-0.47338445],
        [ 0.98103465],
        [ 0.07315906],
        [ 0.74585303],
        [ 0.04270244]]),
 array([[0.]]))

In [8]:
# forward pass

def relu(z):
    return np.maximum(0, z)

def forward(X):
    z1 = X @ W1 + b1        # linear
    a1 = relu(z1)           # activation
    z2 = a1 @ W2 + b2       # output layer (linear)
    return z1, a1, z2

# loss function

def mse_loss(y, y_hat):
    return np.mean((y - y_hat) ** 2)

# backprop

def relu_grad(z):
    # z = np.array([-2, 0, 3])
    # z > 0 => array([False, False, True])
    # astype(float) => array([0., 0., 1.])
    return (z > 0).astype(float) 


def backward(X, y, z1, a1, y_hat):
    global W1, b1, W2, b2
    
    n = X.shape[0]

    # dL/dy_hat
    dL_dy = 2 * (y_hat - y) / n

    # layer 2 gradients
    dW2 = a1.T @ dL_dy # @ means matrix multiplication
    db2 = np.sum(dL_dy, axis=0, keepdims=True)

    # backprop to hidden layer
    da1 = dL_dy @ W2.T
    dz1 = da1 * relu_grad(z1)

    # layer 1 gradients
    dW1 = X.T @ dz1
    db1 = np.sum(dz1, axis=0, keepdims=True)

    return dW1, db1, dW2, db2


In [10]:
lr = 0.01
epochs = 500

for epoch in range(epochs + 1):
    z1, a1, y_hat = forward(X)
    loss = mse_loss(y, y_hat)

    dW1, db1, dW2, db2 = backward(X, y, z1, a1, y_hat)

    # update
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")    


Epoch 0, Loss: 0.7370
Epoch 100, Loss: 0.7294
Epoch 200, Loss: 0.7231
Epoch 300, Loss: 0.7177
Epoch 400, Loss: 0.7146
Epoch 500, Loss: 0.7121
