# Neural Networks

In [2]:
import numpy as np

from sklearn.datasets import fetch_openml

In [3]:
samples, targets = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)

In [95]:
# Normalization
samples = samples / 255.

In [181]:
def get_one_hot(targets, nb_classes):
    res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
    return res.reshape(list(targets.shape)+[nb_classes])


def sigmoid(x):
    return 1. / (1. + np.exp(-x))


def sigmoid_grad(out, grad_in):
    return out * (1 - out) * grad_in


def softmax(x):
    num = np.exp(x - np.max(x))
    
    return num / np.sum(num, axis=0, keepdims=True)


# def softmax_grad(out, grad_in):
#       return (np.einsum('ij,jk->ijk', out, np.eye(out.shape[-1])) \
#            - np.einsum('ij,ik->ijk', out, out))
    
    
def softmax_grad(probs, bp_err):
    dim = probs.shape[1]
    output = np.empty(probs.shape)
    for j in range(dim):
        d_prob_over_xj = - (probs * probs[:,[j]])  # i.e. prob_k * prob_j, no matter k==j or not
        d_prob_over_xj[:,j] += probs[:,j]   # i.e. when k==j, +prob_j
        output[:,j] = np.sum(bp_err * d_prob_over_xj, axis=1)
    return output


def cross_entropy_loss(pred, target):
    return -target * np.log(pred)


def cross_entropy_grad(pred, target):
    return pred - target

In [166]:
num_features = samples.shape[1]
num_classes = 10

# Hidden layer configuration
num_nodes = 200
layer1_weights = (np.random.rand(num_nodes, num_features + 1) - 0.5)

# Output layer configuration
layer2_weights = (np.random.rand(num_classes, num_nodes + 1) - 0.5)

# Forward Pass

In [167]:
layer1_a = layer1_weights @ np.concatenate((np.ones((samples.shape[0], 1)), samples), axis=1).T
layer1_z = sigmoid(layer1_a)
layer2_a = layer2_weights @ np.concatenate((np.ones((layer1_a.shape[1], 1)), layer1_a.T), axis=1).T
layer2_z = softmax(layer2_a)

In [168]:
loss = cross_entropy_loss(layer2_z.T, get_one_hot(targets.astype(int), 10))

In [169]:
print(np.sum(np.mean(loss, axis=0)))

16.49095674734101


# Backward Pass

In [188]:
d_loss = cross_entropy_grad(layer2_z.T, get_one_hot(targets.astype(int), 10))
d_layer2_z = softmax_grad(layer2_z.T, d_loss)
print(layer2_weights.shape)
# d_layer2_weights = d_layer2_z * np.concatenate((np.ones((layer1_a.shape[1], 1)), layer1_a.T), axis=1).T

(10, 201)


In [180]:
print(d_softmax[0])

[-1.75858561e-11 -5.64890638e-15 -1.27426123e-03 -5.86122676e-21
 -6.17716959e-02 -5.42094108e-05 -1.74975953e-04 -4.26398328e-11
  6.32751425e-02 -2.12157850e-12]
