## Derivación a Mano

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

X = torch.tensor([0,3,4,3]).reshape(-1,1)
m = X.shape[0]
y = torch.tensor([1,0,1,0]).reshape(-1,1).float()

E = torch.tensor([[0.1,0.2,0.3],[0,0.1,-0.1],[-0.2,0,0.2],[0.4,0,-0.1],[0.2,-0.2,0.1]], requires_grad=True)

S = F.one_hot(X.squeeze(-1)).float()
W1 = torch.tensor([0.5,-0.25,0.1]).reshape(-1,1)
b = torch.tensor([0.1])


In [2]:
e = E[X.squeeze(-1)]
e

tensor([[ 0.1000,  0.2000,  0.3000],
        [ 0.4000,  0.0000, -0.1000],
        [ 0.2000, -0.2000,  0.1000],
        [ 0.4000,  0.0000, -0.1000]], grad_fn=<IndexBackward0>)

In [3]:
phi1 = e @ W1
phi1


tensor([[0.0300],
        [0.1900],
        [0.1600],
        [0.1900]], grad_fn=<MmBackward0>)

In [4]:
Z = phi1 + b
Z

tensor([[0.1300],
        [0.2900],
        [0.2600],
        [0.2900]], grad_fn=<AddBackward0>)

In [5]:
p = torch.sigmoid(Z)
p

tensor([[0.5325],
        [0.5720],
        [0.5646],
        [0.5720]], grad_fn=<SigmoidBackward0>)

In [6]:
S.T @(p - y)@W1.T/m

tensor([[-0.0584,  0.0292, -0.0117],
        [ 0.0000, -0.0000,  0.0000],
        [ 0.0000, -0.0000,  0.0000],
        [ 0.1430, -0.0715,  0.0286],
        [-0.0544,  0.0272, -0.0109]], grad_fn=<DivBackward0>)

## Derivación en Pytorch

In [7]:
class EmbeddingMLP(nn.Module):
  def __init__(self):
    super().__init__()
    self.emb = nn.Embedding(5, 3)
    self.emb.weight.data = E
    self.fc = nn.Linear(5, 1)
    self.fc.weight.data = W1.T
    self.fc.bias.data = b

  def forward(self,x):
    x = self.emb(x)
    x = self.fc(x)
    return x

model = EmbeddingMLP()
criterion = nn.BCEWithLogitsLoss()
logits = model(X)
loss = criterion(logits, y.unsqueeze(-1))
loss.backward()

In [8]:
model.emb.weight.grad

tensor([[-0.0584,  0.0292, -0.0117],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.1430, -0.0715,  0.0286],
        [-0.0544,  0.0272, -0.0109]])