In [2]:
import numpy as np
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from tqdm import tqdm

In [3]:
m = 1000

X, y = make_blobs(n_samples=m,
                  n_features=3,
                  shuffle=True,
                  random_state=42)

train_size = 750

X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((750, 3), (750,), (250, 3), (250,))

In [4]:
X_train[:10]

array([[-1.62313406,  8.87720831,  4.84001326],
       [ 1.71267112, -5.15279335, -6.78367587],
       [-1.94449656,  8.37968109,  5.63563107],
       [-8.99181204,  8.71078033,  3.16892415],
       [-4.30124272,  8.56751262,  5.84480176],
       [-9.5004586 ,  6.72571453,  2.36111484],
       [-3.43328046,  6.40173712,  5.59024852],
       [-9.15841974,  6.92686076,  2.50482428],
       [-2.84344471,  8.69643922,  4.95764587],
       [-7.03616295,  7.70662313,  2.50260489]])

In [5]:
# matching torch model test

bias = np.array([-0.4236,  0.5018,  0.1081])

weights = np.array(
        [[ 0.4414,  0.4792, -0.1353],
        [ 0.5304, -0.1265,  0.1165],
        [-0.2811,  0.3391,  0.5090]]
)

<img src="https://www.gstatic.com/education/formulas2/472522532/en/softmax_function.svg">

In [6]:
sample = X_train[:2]
print(sample)
sample[:,None].shape

[[-1.62313406  8.87720831  4.84001326]
 [ 1.71267112 -5.15279335 -6.78367587]]


(2, 1, 3)

In [11]:
def softmax(X):
    a = np.exp(X)
    return a/np.sum(a)

def forward(X):
    pred = weights.dot(X) + bias
    #pred = softmax(pred)
    return pred

def softmax_multi(X):
    expd = np.exp(X)
    sums = np.sum(expd, axis=1)
    return (expd.T/sums).T
    
def forward_multi(X):
    pred = np.sum(weights*X[:,None], axis=2) + bias
    #red = softmax_multi(pred)
    return pred

def one_hot_batch(a):
    return np.squeeze(np.eye(3)[a.reshape(-1)])

def cross_entropy_loss(y_pred, y_true):
    y_pred = softmax(y_pred)
    lce = -np.sum(y_true*(np.log(y_pred)))
    return lce

def cross_entropy_loss_multi(y_pred, y_true):
    y_pred = softmax_multi(y_pred)
    lce = -np.sum(y_true*np.log(y_pred))
    return lce/(len(y_true))

def compute_loss_gradients(y_pred, y_true):
    dL = -(y_true / y_pred) + (1 - y_true) / (1 - y_pred)
    return dL


# single

i = 5

sample = X_train[i]
label = one_hot_batch(y_train[i])
logits = forward(sample)
loss = cross_entropy_loss(logits, label)

print("Logits:  ", logits)
print("Softmax: ", softmax(logits))
print("Loss:    ", loss)
print("\n")



# multi

i = 5

sample = X_train[:i]
labels = one_hot_batch(y_train[:i])
logits = forward_multi(sample)
loss = cross_entropy_loss_multi(logits, labels)


print(f"Logits: {logits.shape}\n", logits)
print(f"\nSoftmax: {softmax(logits).shape}\n", softmax_multi(logits))
print("\nLoss:\n", loss)
print("\n")



d = compute_loss_gradients(logits, labels)
d

Logits:   [-1.71359886 -5.11297625  6.26117616]
Softmax:  [3.43910028e-04 1.14845504e-05 9.99644605e-01]
Loss:     0.0003554577461493537


Logits: (5, 3)
 [[ 2.45905305 -0.91821561  6.03819107]
 [-1.21901419  1.27173088 -5.5735351 ]
 [ 1.97114151 -0.93303961  6.36478406]
 [-0.64713534 -5.00019115  7.20250637]
 [ 0.99258183 -2.18245008  7.19742695]]

Softmax: (5, 3)
 [[2.71173407e-02 9.25801654e-04 9.71956858e-01]
 [7.64343987e-02 9.22583527e-01 9.82074543e-04]
 [1.21966860e-02 6.68300838e-04 9.87135013e-01]
 [3.89737730e-04 5.01492372e-06 9.99605247e-01]
 [2.01538116e-03 8.42283621e-05 9.97900390e-01]]

Loss:
 2.860418402503541




array([[-0.4066606 ,  0.52131783, -0.19848394],
       [ 0.45065056, -0.78632989,  0.15212515],
       [-0.50732025,  0.51731997, -0.18640079],
       [ 0.60711465,  0.16666136, -0.13884056],
       [-1.00747361,  0.31422331, -0.16135729]])