In [20]:
import torch
from torch import nn, Tensor
from torch.nn import Parameter, Softmax, ReLU, Linear, Sequential, MSELoss

In [23]:
f = nn.Softmax(dim=0)
x = nn.Parameter(Tensor([-1., 0., 1., 2., 5., -6.]), requires_grad=True)
y = f(x)
print(y)

test = nn.Parameter(Tensor([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]]))
print(test.shape)
with torch.no_grad():
    target = y+ Tensor([-1., 0., 1., 1, -5, -1.])

loss_fn = nn.MSELoss()
loss = loss_fn(y, target)
loss.backward()

print(x.grad)

tensor([2.3008e-03, 6.2543e-03, 1.7001e-02, 4.6213e-02, 9.2822e-01, 1.5503e-05],
       grad_fn=<SoftmaxBackward0>)
torch.Size([2, 2, 3])
tensor([-2.7458e-03, -9.5486e-03, -3.1623e-02, -8.5959e-02,  1.2989e-01,
        -1.8501e-05])


In [56]:
criterion = nn.CrossEntropyLoss()
logits = torch.tensor([[2.0, 1.0, 0.1]])

target1 = torch.tensor([0])
fake_param1 = Parameter(torch.zeros_like(logits), requires_grad=True)
loss1 = criterion(logits+fake_param1, target1)
print(loss1.item())
loss1.backward()
print(fake_param1.grad)

print("")

target2 = torch.tensor([[0.9, 0.1, 0.]])
fake_param2 = Parameter(torch.zeros_like(logits), requires_grad=True)
loss2 = criterion(logits+fake_param2, target2)
print(loss2.item())
loss2.backward()
print(fake_param2.grad)



0.4170299470424652
tensor([[-0.3410,  0.2424,  0.0986]])

0.5170299410820007
tensor([[-0.2410,  0.1424,  0.0986]])


In [33]:
conv1d = nn.Conv1d(in_channels=2, out_channels=3, kernel_size=3, padding=0)

model = Sequential(conv1d)
model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

x = Tensor([[ [0.,1.,2.,3.,4.,5.], [0.,-1.,-2.,-3.,-4.,-5.] ]])
x.requires_grad = True
y = model(x)
print(y)
loss_fn = MSELoss()
with torch.no_grad():
    target = y+Tensor([[ [-1.,0.,-2.,-1.], [1.,0.,-2.,-4.], [-2.,-1.,0.,-1.] ]])
print(target)
loss = loss_fn(y, target)
loss.backward()


print("FIRST CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)
print("")
print("--------------------------")

print("SECOND CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)


tensor([[[ -6., -10., -14., -18.],
         [  6.,   8.,  10.,  12.],
         [ -2.,  -3.,  -4.,  -5.]]], grad_fn=<ConvolutionBackward0>)
tensor([[[ -7., -10., -16., -19.],
         [  7.,   8.,   8.,   8.],
         [ -4.,  -4.,  -4.,  -6.]]])
FIRST CONVOLUTION
Gradient wrt x:
tensor([[[-0.8333, -0.6667, -1.0000, -0.8333,  0.1667, -0.1667],
         [-1.3333, -0.3333,  0.6667,  0.3333, -0.1667, -0.8333]]])
Gradient wrt weights:
tensor([[[ 1.1667,  1.8333,  2.5000],
         [-1.1667, -1.8333, -2.5000]],

        [[ 2.6667,  3.5000,  4.3333],
         [-2.6667, -3.5000, -4.3333]],

        [[ 0.6667,  1.3333,  2.0000],
         [-0.6667, -1.3333, -2.0000]]])
Gradient wrt bias:
tensor([0.6667, 0.8333, 0.6667])

--------------------------
SECOND CONVOLUTION
Gradient wrt x:
tensor([[[-0.8333, -0.6667, -1.0000, -0.8333,  0.1667, -0.1667],
         [-1.3333, -0.3333,  0.6667,  0.3333, -0.1667, -0.8333]]])
Gradient wrt weights:
tensor([[[ 1.1667,  1.8333,  2.5000],
         [-1.1667, -1.833

In [20]:
linear = nn.Linear(in_features=3, out_features=2, bias=True)

linear.weight = Parameter(torch.tensor([[-1.,  2., -3.],
                                        [ 4.,  5., -6.]]))

linear.bias = Parameter(torch.tensor([-1., 3.]))  

model = nn.Sequential(linear)

x = Parameter(Tensor([0., 1., 2.]))
y = model(x)
print(y)

loss_fn = MSELoss()
with torch.no_grad():
    target = Tensor([-4., -3.])
loss = loss_fn(y, target)
loss.backward(retain_graph=True)
print(x.grad)

loss.backward()
print(x.grad)





tensor([-5., -4.], grad_fn=<ViewBackward0>)
tensor([-3., -7.,  9.])
tensor([ -6., -14.,  18.])


In [19]:
x = torch.tensor([0., 1., 2.], requires_grad=True)
grad_out = torch.tensor([-4., -3.])

linear = Linear(in_features=3, out_features=2, bias=True)

with torch.no_grad():
    linear.weight.copy_(torch.tensor([[-1.,  2., -3.],
                                      [ 4.,  5., -6.]]))
    linear.bias.copy_(torch.tensor([-1., 3.]))

y = linear(x)

y.backward(grad_out)

print("propagated_loss:")
print(x.grad)

print("weight.grad:")
print(linear.weight.grad)

print("bias.grad:")
print(linear.bias.grad)


propagated_loss:
tensor([ -8., -23.,  30.])
weight.grad:
tensor([[-0., -4., -8.],
        [-0., -3., -6.]])
bias.grad:
tensor([-4., -3.])


In [27]:
# Input
x = Tensor([[[0., 1., 2., 3., 4., 5.],
             [0., -1., -2., -3., -4., -5.]]])  # shape: (batch=1, in_channels=2, length=6)
x.requires_grad = True

# Conv1d layer
conv1d = nn.Conv1d(
    in_channels=2,
    out_channels=3,
    kernel_size=3,
    stride=3,
    dilation=2,
    padding=1,
    bias=True
)

# Set weights manually
conv1d.weight = Parameter(torch.tensor([
    [[-1., 0., 1.], [-1., 2., 3.]],    # out_channel 0
    [[0., 0., 0.], [1., -1., -2.]],    # out_channel 1
    [[-2., -1., -2.], [-3., -1., 0.]]  # out_channel 2
], dtype=torch.float32))

conv1d.bias = Parameter(torch.tensor([0., 1., 2.], dtype=torch.float32))

# Sequential model
model = nn.Sequential(conv1d)

# Forward pass
y = model(x)
print("Output y:")
print(y)

# Define target for MSE loss
target = y + Tensor([[[ -1., 0., -2., -1.],
                      [ 1., 0., -2., -4.],
                      [ -2., -1., 0., -1.]]])

# Compute loss and backward
loss_fn = nn.MSELoss()
loss = loss_fn(y, target)
loss.backward()

print("Input gradient x.grad:")
print(x.grad)


Output y:
tensor([[[-8., -8.],
         [ 8.,  3.],
         [-4.,  4.]]], grad_fn=<ConvolutionBackward0>)


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (2) must match the size of tensor b (4) at non-singleton dimension 2

In [49]:
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import Parameter

x = Tensor([[[0., 1., 2., 3., 4., 5.],      # channel 0
             [0., -1., -2., -3., -4., -5.]]])  # channel 1

x.requires_grad = True

print("Input shape:", x.shape)

conv1d = nn.Conv1d(
    in_channels=2,
    out_channels=3,
    kernel_size=3,
    stride=3,
    dilation=2,
    padding=1,
    bias=True
)

model = Sequential(conv1d)
model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

#print("\nWeight shape:", conv1d.weight.shape)
#print("\nBias:", conv1d.bias.tolist())

x = Tensor([[ [0.,1.,2.,3.,4.,5.], [0.,-1.,-2.,-3.,-4.,-5.] ]])
x.requires_grad = True
y = model(x)
print(y)
print("Output shape:", y.shape)



loss_fn = MSELoss()
with torch.no_grad():
    target = Tensor([[ [-4.,-3.],[-2.,-1.], [0.,1.] ]])
print(target)
loss = loss_fn(y, target)
loss.backward()


print("FIRST CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)
print("")
print("--------------------------")

print("SECOND CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)


Input shape: torch.Size([1, 2, 6])
tensor([[[-8., -8.],
         [ 8.,  3.],
         [-4.,  4.]]], grad_fn=<ConvolutionBackward0>)
Output shape: torch.Size([1, 3, 2])
tensor([[[-4., -3.],
         [-2., -1.],
         [ 0.,  1.]]])
tensor(30.3333, grad_fn=<MseLossBackward0>)
FIRST CONVOLUTION
Gradient wrt x:
tensor([[[  0.0000,   1.3333,  -0.3333,   1.3333,  -1.0000,   0.0000],
         [  0.0000,  -4.6667,   0.0000, -10.6667,  -5.6667,   0.0000]]])
Gradient wrt weights:
tensor([[[ -3.3333,  -8.0000,  -4.0000],
         [  3.3333,   8.0000,   4.0000]],

        [[  2.6667,   8.6667,  10.0000],
         [ -2.6667,  -8.6667, -10.0000]],

        [[  2.0000,   2.6667,  -4.0000],
         [ -2.0000,  -2.6667,   4.0000]]])
Gradient wrt bias:
tensor([-3.0000,  4.6667, -0.3333])

--------------------------
SECOND CONVOLUTION
Gradient wrt x:
tensor([[[  0.0000,   1.3333,  -0.3333,   1.3333,  -1.0000,   0.0000],
         [  0.0000,  -4.6667,   0.0000, -10.6667,  -5.6667,   0.0000]]])
Gradient 

In [59]:
output = torch.tensor([1., 2., -3.], requires_grad=True)
label = torch.tensor([-5., -4., 2.])

loss_fn = MSELoss()
loss = loss_fn(output, label)
loss.backward()

print(output.grad)


tensor([ 4.0000,  4.0000, -3.3333])
