In [200]:
!pip install numpy datasets



In [228]:
import numpy as np
from datasets import load_dataset
import math
import PIL.Image as Image

In [6]:
def relu(Z):
    return np.max(0,Z)

In [259]:
def softmax(Z):
    exps = np.exp(Z - np.max(Z))
    sum = np.sum(exps, axis=0, keepdims=True)
    return exps / sum

In [260]:
A = softmax(np.random.randn(10,2))
print(np.sum(A))
# using math.isclose because of floating point errors
assert math.isclose(2, np.sum(A))

2.0000000000000004


In [206]:
def linear_forward(A_prev, W, b):
    Z = np.dot(W, A_prev) + b
    cache = (A_prev, W, b)

    return Z, cache

In [7]:
def forward_activation(Z, activation):
    if activation == "relu":
        A = relu(Z)
        return A, Z
    if activation == "softmax":
        A = softmax(Z)
        return A, Z

In [203]:
def compute_cost(A, Y, m):
    return -np.sum(Y * np.log(A))/m

In [262]:
Y = np.zeros((10,2))
Y[0][0] = 1
Y[1][1] = 1
print(Y)
A = softmax(np.array(np.random.randn(10,2)))
print(A)
print(compute_cost(A, Y, 2))

[[1. 0.]
 [0. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
[[0.08943704 0.13513204]
 [0.07254956 0.09116356]
 [0.37362549 0.10807862]
 [0.10479987 0.08596945]
 [0.07538426 0.06104204]
 [0.00954156 0.05769868]
 [0.07634918 0.16520432]
 [0.13501739 0.11714793]
 [0.02936771 0.09566227]
 [0.03392796 0.08290109]]
2.4046602078576287


In [249]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = np.dot(W, A_prev.T) / m
    db = np.sum(dZ, axis = 0, keepdims = True) / m
    dA_prev = np.dot(W.T, dZ)

    return dW, db, dA_prev

In [216]:
def relu_activation_backward(Z):
    grad = np.zeros_like(Z)
    grad[Z > 0] = 1

    return grad

In [250]:
Z = np.random.randn(10,2)
print(Z)
print(relu_activation_backward(Z))

[[ 0.53912934 -1.50990938]
 [ 0.2898538  -0.50601689]
 [-0.46231117  0.91124587]
 [ 0.03983663 -0.99073278]
 [-0.15204389  1.06318523]
 [-0.221117    1.20075688]
 [ 0.87472124 -1.76064672]
 [-0.51140411  1.81999754]
 [-0.18035394 -1.26079972]
 [-0.50956385 -0.04391935]]
[[1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 0.]
 [0. 0.]]


In [223]:
def softmax_activation_backward(Y_hat, Y):
    return Y_hat - Y

In [224]:
def update_params(W, b, grads, learning_rate):
    dW, db = grads
    W = W - dW * learning_rate
    b = b - db * learning_rate
    

In [273]:
n_h = 10
W1 = np.random.randn(n_h,784) * 0.01
b1 = np.zeros((n_h,1))

dataset = load_dataset('mnist')
train_dataset = dataset['train']
train_images = []

Y = np.zeros((10, 60000))
for i, example in enumerate(train_dataset):
    train_images.append(np.array(example['image']).flatten())
    label = example['label']
    Y[label][i] = 1