In [13]:
# Relu forward and backward tests
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Load data
X = np.load(r'../with-torch-tests/relu-layer/X_relu.npy')
Y = np.load(r'../with-torch-tests/relu-layer/out_relu.npy')
dY = np.load(r'../with-torch-tests/relu-layer/up_grad_relu.npy')
dX = np.load(r'../with-torch-tests/relu-layer/down_grad_relu.npy')

# Convert to PyTorch tensors
# make sure to set requires_grad=True for the input tensor so that the Autograd engine can compute the gradients
X_torch = torch.from_numpy(X).to(torch.float32).requires_grad_(True)
dY_torch = torch.from_numpy(dY).to(torch.float32)

# Forward pass with ReLU
loss = nn.CrossEntropyLoss()
Y_torch = loss(X_torch)

# Compare the forward pass results
print("Forward pass comparison:")
print("Y (numpy):", Y[0, 0:5])
print("Y_torch:", Y_torch.detach().numpy()[0, 0:5])
print("Match:", np.allclose(Y, Y_torch.detach().numpy(), atol=1e-4, rtol=1e-4))


# Validate the backward pass
Y_torch.backward(dY_torch)

# Get the gradients from X_torch
dX_torch = X_torch.grad

# Compare the backward pass results
print("\nBackward pass comparison:")
print("dX (numpy):", dX[0, 0:5])
print("dX_torch:", dX_torch.numpy()[0, 0:5])
print("Match:", np.allclose(dX, dX_torch.numpy(), atol=1e-4, rtol=1e-4))

tensor([[ 0.0281,  0.0416,  0.0497, -0.2930,  0.1736],
        [ 0.0384,  0.1314,  0.0290,  0.1127, -0.3116],
        [ 0.0245,  0.0842,  0.0635, -0.2918,  0.1196]])


In [1]:
import torch
import torch.nn.functional as F

def crossentropy_softmax_backward_cpu(dlogits, dlosses, probs, targets):
    B, V = dlogits.shape
    for b in range(B):
        dlogits_b = dlogits[b]
        probs_b = probs[b]
        dloss = dlosses[b]
        ix = targets[b]
        for i in range(V):
            p = probs_b[i]
            indicator = 1.0 if i == ix else 0.0
            dlogits_b[i] += (p - indicator) * dloss

def crossentropy_softmax_backward(dlogits, dlosses, probs, targets):
    B = len(targets)
    V = len(probs) // B

    # Backwards through both softmax and crossentropy
    for b in range(B):
        for i in range(V):
            indicator = 1.0 if i == targets[b] else 0.0
            dlogits[i + b * V] += probs[i + b * V]*(probs[i + b * V] - indicator) * dlosses[b]


# Input data
B = 2
V = 3
dlogits = torch.zeros(B, V, requires_grad=True)
dlosses = torch.tensor([0.1, 0.2])
probs = F.softmax(torch.rand(B, V), dim=1)
targets = torch.tensor([1, 0])

# convert arrays to numpy
dlogits_numpy = dlogits.detach().numpy()
dlosses_numpy = dlosses.numpy()
probs_numpy = probs.detach().numpy()
targets_numpy = targets.numpy()

# Custom implementation
crossentropy_softmax_backward(dlogits_numpy, dlosses_numpy, probs_numpy, targets_numpy)

# Compute gradients using PyTorch autograd
loss = F.cross_entropy(dlogits, targets)
loss.backward()

# Compare gradients
print("Gradients - Custom Implementation:")
print(dlogits.grad)

print("\nGradients - PyTorch Autograd:")
print(dlogits.grad.numpy())


Gradients - Custom Implementation:
tensor([[ 0.1673, -0.3337,  0.1664],
        [-0.3319,  0.1663,  0.1656]])

Gradients - PyTorch Autograd:
[[ 0.1672683  -0.33369532  0.16642703]
 [-0.3318987   0.16631456  0.16558418]]


In [5]:
import torch
import torch.nn.functional as F

# Assuming you have your model predictions and ground truth labels
predictions = torch.randn(3, 5, requires_grad=True)
labels = torch.tensor([2, 0, 4])

# Compute cross-entropy loss
loss = F.cross_entropy(predictions, labels)

# Perform backward pass
loss.backward()

# Access gradients
gradients = predictions.grad

print("Gradients:")
print(gradients)


Gradients:
tensor([[ 0.1281,  0.0192, -0.3127,  0.1316,  0.0338],
        [-0.3003,  0.0147,  0.1182,  0.1163,  0.0511],
        [ 0.0260,  0.0581,  0.0192,  0.0581, -0.1615]])


In [19]:
import torch

# Validation
B = 3  # Batch size
V = 4  # Vocabulary size

# Random input tensors
dlogits = torch.tensor([[0.567248, 0.515976, 0.648122, 0.720573, 0.385052],
 [0.380139, 0.0264595, 0.901303, 0.209906, 0.861324],
 [0.545885, 0.964293, 0.628193, 0.372692, 0.949461]]
)
dlosses = torch.tensor([[-0.0282296, -0.454024, 0.604358, -0.817072, -0.103977],
 [ 0.280313, 0.946532, 0.321757, 0.581225, 0.12656],
 [-0.919248, 0.76281, -0.863277, -0.406415, 0.970153]])

# probs = torch.softmax(torch.randn(B, V), dim=1)
targets = torch.tensor([1,0,0])

# PyTorch's autograd requires gradients to be calculated
dlogits.requires_grad = True

# Compute loss and gradients
loss = torch.nn.CrossEntropyLoss()(dlogits, targets)
loss.backward()

# Print and compare gradients
print("\nGradients from PyTorch autograd:")
print(dlogits.grad)



Gradients from PyTorch autograd:
tensor([[ 0.0662, -0.2704,  0.0718,  0.0772,  0.0552],
        [-0.2763,  0.0400,  0.0960,  0.0481,  0.0922],
        [-0.2772,  0.0852,  0.0609,  0.0472,  0.0840]])
