In [1]:
import numpy as np

In [2]:
samples = np.array([
  [1, 2, 3],
  [6, 7, 8],
  [7, 8, 9],
  [3, 4, 5],
  [4, 5, 6],
])

# By convention, inputs in matrix form are often denoted by a capital X.
X = samples

targets = np.array([False, True, True, False, False])

In [3]:
uniques, indicies = np.unique(targets, return_inverse=True)
# Return inverse means that from the indecies returned in that array if you then use them to call the unique values, you can construct the original data.

In [4]:
binary = uniques.shape[0]
n_samples = samples.shape[0]
y = np.zeros((n_samples,binary))

In [5]:
y[np.arange(n_samples),indicies] = 1
y

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [6]:
np.random.seed(10)

n_features = X.shape[1]
n_hidden = 4
w_h = np.random.uniform(-0.5,0.5,(n_features,n_hidden))
b_h = np.zeros((1,n_hidden))
b_h_show = np.ones((1,n_hidden))
print(np.dot(X,w_h))
print(X@w_h + b_h_show)

[[-0.72433282 -2.26463532  0.08585342  2.13004535]
 [-1.02964036 -8.09519327  0.17120801  6.94368505]
 [-1.09070187 -9.26130486  0.18827893  7.90641299]
 [-0.84645584 -4.5968585   0.11999526  4.05550123]
 [-0.90751735 -5.76297009  0.13706617  5.01822917]]
[[ 0.27566718 -1.26463532  1.08585342  3.13004535]
 [-0.02964036 -7.09519327  1.17120801  7.94368505]
 [-0.09070187 -8.26130486  1.18827893  8.90641299]
 [ 0.15354416 -3.5968585   1.11999526  5.05550123]
 [ 0.09248265 -4.76297009  1.13706617  6.01822917]]


In [7]:
h1 = X @ w_h + b_h
a1 = np.maximum(0,h1) # ReLu...
a1

array([[0.        , 0.        , 0.08585342, 2.13004535],
       [0.        , 0.        , 0.17120801, 6.94368505],
       [0.        , 0.        , 0.18827893, 7.90641299],
       [0.        , 0.        , 0.11999526, 4.05550123],
       [0.        , 0.        , 0.13706617, 5.01822917]])

In [8]:
np.random.seed(100)

w_o  = np.random.uniform(-0.5,0.5,(n_hidden,binary))
b_o = np.zeros((1,binary))
o = a1 @ w_o + b_o
o

array([[0.32118171, 0.66159156],
       [1.10083177, 2.19782851],
       [1.25676178, 2.5050759 ],
       [0.63304174, 1.27608634],
       [0.78897175, 1.58333373]])

In [9]:
e_x = np.exp(o - o.max(axis=1, keepdims=True))

y_hat = e_x/np.sum(e_x,1,keepdims=True)
y_hat

array([[0.41570992, 0.58429008],
       [0.25030304, 0.74969696],
       [0.22299211, 0.77700789],
       [0.34455863, 0.65544137],
       [0.31123284, 0.68876716]])

In [10]:
import torch

torch.softmax(torch.tensor(o),1)

tensor([[0.4157, 0.5843],
        [0.2503, 0.7497],
        [0.2230, 0.7770],
        [0.3446, 0.6554],
        [0.3112, 0.6888]], dtype=torch.float64)

In [None]:
np.finfo(float).eps #Gives you the smallest possible discernable number

2.220446049250313e-16

In [None]:
y_hat_clipped = np.clip(y_hat, np.finfo(float).eps, 1 - np.finfo(float).eps) 
# We set a max and min for y_hat, so that if the value is 0 we dont get log(0) = inf.



In [17]:
neg_logs = np.sum(y * -np.log(y_hat_clipped), axis=1)
cce_loss = np.mean(neg_logs)
cce_loss

0.7301727079439226

Forward-Pass complete

A one-hot vector is just a fancy way of saying “I’m exactly one of these things, and nothing else.