In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torchinfo import summary
import numpy as np

In [2]:
class LinearFunction(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(1))

    def forward(self, x):
        return self.a * x

In [3]:
model = LinearFunction()
batch_size = 1
summary(model, input_size=(batch_size, 1))

Layer (type:depth-idx)                   Output Shape              Param #
LinearFunction                           [1, 1]                    1
Total params: 1
Trainable params: 1
Non-trainable params: 0
Total mult-adds (M): 0
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [4]:
model.a

Parameter containing:
tensor([-0.7021], device='cuda:0', requires_grad=True)

In [5]:
model(3)

tensor([-2.1064], device='cuda:0', grad_fn=<MulBackward0>)

In [6]:
x = torch.linspace(start=0, end=1, steps=10)
y_true = 2 * x + torch.rand(10)/5
y_true

tensor([0.0562, 0.4220, 0.5055, 0.7469, 0.9412, 1.2882, 1.4002, 1.6240, 1.9462,
        2.0788])

In [7]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)


In [8]:
num_epochs = 100
for n in range(num_epochs):
    y_pred = model(x)
    loss = loss_fn(y_pred, y_true)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [9]:
model.a

Parameter containing:
tensor([-0.7021], device='cuda:0', requires_grad=True)

In [10]:
model.a.grad

In [11]:
model.zero_grad
x_test = torch.rand(1,5)
x_test.requires_grad = True
x_test


tensor([[0.9059, 0.3891, 0.1664, 0.7872, 0.5475]], requires_grad=True)

In [12]:
y_pred = model(x_test)
y_pred

tensor([[1.9445, 0.8351, 0.3571, 1.6896, 1.1753]], grad_fn=<MulBackward0>)

In [16]:
for i in range(5):
    print(torch.autograd.grad(y_pred[:,i], x_test, retain_graph=True)[0]) # takes only scalar yi = y[:,i]

tensor([[2.1464, 0.0000, 0.0000, 0.0000, 0.0000]])
tensor([[0.0000, 2.1464, 0.0000, 0.0000, 0.0000]])
tensor([[0.0000, 0.0000, 2.1464, 0.0000, 0.0000]])
tensor([[0.0000, 0.0000, 0.0000, 2.1464, 0.0000]])
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 2.1464]])
