In [3]:
import torch
from torch import nn, Tensor
from torch.nn import Parameter, Softmax, ReLU, Linear, Sequential, MSELoss

In [4]:
f = nn.Softmax(dim=0)
x = nn.Parameter(Tensor([-1., 0., 1., 2., 5., -6.]), requires_grad=True)
y = f(x)
print(y)

with torch.no_grad():
    target = Tensor([2.e-03, 7.e-03, 2e-02, 7.e-02, 9.e-01, 1.e-3])
    
print(target.sum())
    
def loss_fn(output, target):
    return torch.sum(-torch.log(output) * target)

loss = loss_fn(y, target)
print(loss)
loss.backward()
print(x.grad)
x.grad.zero_()

loss_fn2 = nn.CrossEntropyLoss()
loss2 = loss_fn2(x, target)
loss2.backward()

print(x.grad)




torch.Size([6])
tensor([2.3008e-03, 6.2543e-03, 1.7001e-02, 4.6213e-02, 9.2822e-01, 1.5503e-05],
       grad_fn=<SoftmaxBackward0>)
tensor(1.0000)
tensor(0.4225, grad_fn=<SumBackward0>)
tensor([ 0.0003, -0.0007, -0.0030, -0.0238,  0.0282, -0.0010])
tensor([ 0.0003, -0.0007, -0.0030, -0.0238,  0.0282, -0.0010])


In [19]:
x = nn.Parameter(Tensor([0.8, 0.2, 0.]), requires_grad=True)
target = Tensor([0.9, 0.1, 0.])

loss = loss_fn(x, target)

loss.backward()
print(x.grad)

tensor([-1.1250, -0.5000,     nan])


In [23]:
x = nn.Parameter(Tensor([2., 1., 0.1]), requires_grad=True)

with torch.no_grad():
    target = Tensor([0.9, 0.1, 0.])
    
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(x, target)
loss.backward()

print(x.grad)



tensor([-0.2410,  0.1424,  0.0986])


In [56]:
criterion = nn.CrossEntropyLoss()
logits = torch.tensor([[2.0, 1.0, 0.1]])

target1 = torch.tensor([0])
fake_param1 = Parameter(torch.zeros_like(logits), requires_grad=True)
loss1 = criterion(logits+fake_param1, target1)
print(loss1.item())
loss1.backward()
print(fake_param1.grad)

print("")

target2 = torch.tensor([[0.9, 0.1, 0.]])
fake_param2 = Parameter(torch.zeros_like(logits), requires_grad=True)
loss2 = criterion(logits+fake_param2, target2)
print(loss2.item())
loss2.backward()
print(fake_param2.grad)



0.4170299470424652
tensor([[-0.3410,  0.2424,  0.0986]])

0.5170299410820007
tensor([[-0.2410,  0.1424,  0.0986]])


In [3]:
conv1d = nn.Conv1d(in_channels=2, out_channels=3, kernel_size=3, padding=0)

model = Sequential(conv1d)
model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

x = Tensor([[ [0.,1.,2.,3.,4.,5.], [0.,-1.,-2.,-3.,-4.,-5.] ]])
x.requires_grad = True
y = model(x)
print(y)
loss_fn = MSELoss()
with torch.no_grad():
    target = y+Tensor([[ [-1.,0.,-2.,-1.], [1.,0.,-2.,-4.], [-2.,-1.,0.,-1.] ]])
print(target)
loss = loss_fn(y, target)
loss.backward()


print("FIRST CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)
print("")
print("--------------------------")

print("SECOND CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)


tensor([[[ -2.,  -6., -10., -14., -18., -10.],
         [  3.,   6.,   8.,  10.,  12.,   2.],
         [  0.,  -2.,  -3.,  -4.,  -5.,   6.]]],
       grad_fn=<ConvolutionBackward0>)


RuntimeError: The size of tensor a (6) must match the size of tensor b (4) at non-singleton dimension 2

In [20]:
linear = nn.Linear(in_features=3, out_features=2, bias=True)

linear.weight = Parameter(torch.tensor([[-1.,  2., -3.],
                                        [ 4.,  5., -6.]]))

linear.bias = Parameter(torch.tensor([-1., 3.]))  

model = nn.Sequential(linear)

x = Parameter(Tensor([0., 1., 2.]))
y = model(x)
print(y)

loss_fn = MSELoss()
with torch.no_grad():
    target = Tensor([-4., -3.])
loss = loss_fn(y, target)
loss.backward(retain_graph=True)
print(x.grad)

loss.backward()
print(x.grad)





tensor([-5., -4.], grad_fn=<ViewBackward0>)
tensor([-3., -7.,  9.])
tensor([ -6., -14.,  18.])


In [19]:
x = torch.tensor([0., 1., 2.], requires_grad=True)
grad_out = torch.tensor([-4., -3.])

linear = Linear(in_features=3, out_features=2, bias=True)

with torch.no_grad():
    linear.weight.copy_(torch.tensor([[-1.,  2., -3.],
                                      [ 4.,  5., -6.]]))
    linear.bias.copy_(torch.tensor([-1., 3.]))

y = linear(x)

y.backward(grad_out)

print("propagated_loss:")
print(x.grad)

print("weight.grad:")
print(linear.weight.grad)

print("bias.grad:")
print(linear.bias.grad)


propagated_loss:
tensor([ -8., -23.,  30.])
weight.grad:
tensor([[-0., -4., -8.],
        [-0., -3., -6.]])
bias.grad:
tensor([-4., -3.])


In [60]:
# Input
x = Tensor([[[0., 1., 2., 3., 4., 5.],
             [0., -1., -2., -3., -4., -5.]]])  # shape: (batch=1, in_channels=2, length=6)
x.requires_grad = True

# Conv1d layer
conv1d = nn.Conv1d(
    in_channels=2,
    out_channels=3,
    kernel_size=3,
    stride=3,
    dilation=2,
    padding=1,
    bias=True
)

# Set weights manually
conv1d.weight = Parameter(torch.tensor([
    [[-1., 0., 1.], [-1., 2., 3.]],    # out_channel 0
    [[0., 0., 0.], [1., -1., -2.]],    # out_channel 1
    [[-2., -1., -2.], [-3., -1., 0.]]  # out_channel 2
], dtype=torch.float32))

conv1d.bias = Parameter(torch.tensor([0., 1., 2.], dtype=torch.float32))

# Sequential model
model = nn.Sequential(conv1d)

# Forward pass
y = model(x)
print("Output y:")
print(y)

# Define target for MSE loss
target = y + Tensor([[[ -1., 0., -2., -1.],
                      [ 1., 0., -2., -4.],
                      [ -2., -1., 0., -1.]]])

# Compute loss and backward
loss_fn = nn.MSELoss()
loss = loss_fn(y, target)
loss.backward()

print("Input gradient x.grad:")
print(x.grad)


Output y:
tensor([[[-8., -8.],
         [ 8.,  3.],
         [-4.,  4.]]], grad_fn=<ConvolutionBackward0>)


RuntimeError: The size of tensor a (2) must match the size of tensor b (4) at non-singleton dimension 2

In [6]:
import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import Parameter

x = Tensor([[[0., 1., 2., 3., 4., 5.],      # channel 0
             [0., -1., -2., -3., -4., -5.]]])  # channel 1

x.requires_grad = True

print("Input shape:", x.shape)

conv1d = nn.Conv1d(
    in_channels=2,
    out_channels=3,
    kernel_size=3,
    stride=3,
    dilation=2,
    padding=1,
    bias=True
)

model = Sequential(conv1d)
model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

#print("\nWeight shape:", conv1d.weight.shape)
#print("\nBias:", conv1d.bias.tolist())

x = Tensor([[ [0.,1.,2.,3.,4.,5.], [0.,-1.,-2.,-3.,-4.,-5.] ]])
x.requires_grad = True
y = model(x)
print(y)
print("Output shape:", y.shape)



loss_fn = MSELoss()
with torch.no_grad():
    target = Tensor([[ [-4.,-3.],[-2.,-1.], [0.,1.] ]])
print(target)
loss = loss_fn(y, target)
loss.backward()


print("FIRST CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)
print("")
print("--------------------------")

print("SECOND CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)


Input shape: torch.Size([1, 2, 6])
tensor([[[-8., -8.],
         [ 8.,  3.],
         [-4.,  4.]]], grad_fn=<ConvolutionBackward0>)
Output shape: torch.Size([1, 3, 2])
tensor([[[-4., -3.],
         [-2., -1.],
         [ 0.,  1.]]])
FIRST CONVOLUTION
Gradient wrt x:
tensor([[[  0.0000,   1.3333,  -0.3333,   1.3333,  -1.0000,   0.0000],
         [  0.0000,  -4.6667,   0.0000, -10.6667,  -5.6667,   0.0000]]])
Gradient wrt weights:
tensor([[[ -3.3333,  -8.0000,  -4.0000],
         [  3.3333,   8.0000,   4.0000]],

        [[  2.6667,   8.6667,  10.0000],
         [ -2.6667,  -8.6667, -10.0000]],

        [[  2.0000,   2.6667,  -4.0000],
         [ -2.0000,  -2.6667,   4.0000]]])
Gradient wrt bias:
tensor([-3.0000,  4.6667, -0.3333])

--------------------------
SECOND CONVOLUTION
Gradient wrt x:
tensor([[[  0.0000,   1.3333,  -0.3333,   1.3333,  -1.0000,   0.0000],
         [  0.0000,  -4.6667,   0.0000, -10.6667,  -5.6667,   0.0000]]])
Gradient wrt weights:
tensor([[[ -3.3333,  -8.0000,  

In [59]:
output = torch.tensor([1., 2., -3.], requires_grad=True)
label = torch.tensor([-5., -4., 2.])

loss_fn = MSELoss()
loss = loss_fn(output, label)
loss.backward()

print(output.grad)


tensor([ 4.0000,  4.0000, -3.3333])


In [58]:
x = torch.tensor([[[0., 1., 2., 3., 4., 5.],
                   [0., -1., -2., -3., -4., -5.]]], requires_grad=True)

model = nn.Sequential(
    nn.Conv1d(in_channels=2, out_channels=3, kernel_size=3, dilation=2, stride=1, padding="same"),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(in_features=6, out_features=2)
)

model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

model[3].weight = Parameter(torch.tensor([
    [1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1.],
    [-1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0.]
], dtype=torch.float32))
model[3].bias = Parameter(torch.tensor([0., 1.], dtype=torch.float32))

y = model(x)

with torch.no_grad():
    target = torch.tensor([[0.1, 0.9]])
loss_fn = nn.CrossEntropyLoss()
loss = loss_fn(y, target)

print("Model output:", y)
print("Loss:", loss.item())

loss.backward()
print("Input gradient x.grad:")
print(x.grad)


Model output: tensor([[-1., -2.]], grad_fn=<AddmmBackward0>)
Loss: 1.213261604309082
Input gradient x.grad:
tensor([[[ 0.0000,  0.0000,  1.2621,  1.2621,  0.6311,  0.6311],
         [-1.8932,  1.8932, -0.6311,  1.2621,  2.5242, -1.2621]]])


In [None]:

# Input: 1 Batch, 1 Channel, Länge 6
x = torch.tensor([[[0., 1., 2., 3., 4., 5.]]], requires_grad=True)

model = nn.Sequential(
    nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, dilation=2, stride=1, padding="same"),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(in_features=6, out_features=1)  # Output-Größe = Input-Größe
)

# Conv1d-Gewicht: (out_channels=1, in_channels=1, kernel_size=3)
model[0].weight = Parameter(Tensor([[[-1., 0., 1.]]]))
model[0].bias = Parameter(Tensor([0.]))

# Linear-Gewicht: (out_features=6, in_features=6)
model[3].weight = Parameter(torch.tensor([
    [-1., 0., 1., -1., 2., 3.]
], dtype=torch.float32))
model[3].bias = Parameter(torch.tensor([-1.], dtype=torch.float32))

# Forward
y = model(x)

print("Input shape:", x.shape)
print("Conv output shape:", model[0](x).shape)  # vor Flatten
print("Model output shape:", y.shape)
print("Model output:", y)

# Loss
target = torch.tensor([[-4.]])  # gleiche Größe wie Output
loss_fn = nn.MSELoss()
loss = loss_fn(y, target)

print("Loss:", loss.item())

# Backward
loss.backward()
print("Input gradient x.grad:")
print(x.grad)


In [5]:
conv1d = nn.Conv1d(in_channels=2, out_channels=3, kernel_size=3, padding="same")

model = Sequential(conv1d)
model[0].weight = Parameter(
    Tensor([[[-1., 0., 1.], [-1., 2., 3.]], 
            [[0., 0., 0.], [1., -1., -2.]], 
            [[-2., -1., -2.], [-3., -1., 0.]]]))
model[0].bias = Parameter(Tensor([0.,1.,2.]))

x = Tensor([[ [0.,1.,2.,3.,4.,5.], [0.,-1.,-2.,-3.,-4.,-5.] ]])
x.requires_grad = True
y = model(x)
print(y)
loss_fn = MSELoss()
with torch.no_grad():
    target = y+Tensor([[[ -2.,  -6., -10., -14., -18., -10.],
         [  3.,   6.,   8.,  10.,  12.,   2.],
         [  0.,  -2.,  -3.,  -4.,  -5.,   6.]]])
print(target)
loss = loss_fn(y, target)
loss.backward()


print("FIRST CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)
print("")
print("--------------------------")

print("SECOND CONVOLUTION")
print("Gradient wrt x:")
print(x.grad)
print("Gradient wrt weights:")
print(model[0].weight.grad)
print("Gradient wrt bias:")
print(model[0].bias.grad)


tensor([[[ -2.,  -6., -10., -14., -18., -10.],
         [  3.,   6.,   8.,  10.,  12.,   2.],
         [  0.,  -2.,  -3.,  -4.,  -5.,   6.]]],
       grad_fn=<ConvolutionBackward0>)
tensor([[[ -4., -12., -20., -28., -36., -20.],
         [  6.,  12.,  16.,  20.,  24.,   4.],
         [  0.,  -4.,  -6.,  -8., -10.,  12.]]])
FIRST CONVOLUTION
Gradient wrt x:
tensor([[[-1.1111, -1.7778, -2.5556, -3.1111,  0.3333,  1.5556],
         [-1.2222,  0.1111,  2.1111,  3.8889, 12.3333, 11.7778]]])
Gradient wrt weights:
tensor([[[ 14.6667,  21.1111,  21.1111],
         [-14.6667, -21.1111, -21.1111]],

        [[ -8.0000, -12.2222, -15.4444],
         [  8.0000,  12.2222,  15.4444]],

        [[  0.2222,   1.1111,   6.0000],
         [ -0.2222,  -1.1111,  -6.0000]]])
Gradient wrt bias:
tensor([ 6.6667, -4.5556,  0.8889])

--------------------------
SECOND CONVOLUTION
Gradient wrt x:
tensor([[[-1.1111, -1.7778, -2.5556, -3.1111,  0.3333,  1.5556],
         [-1.2222,  0.1111,  2.1111,  3.8889, 12.333

In [54]:
import torch
from torch import nn
from torch.nn.parameter import Parameter

# ----------------------------
# Input: 1 Batch, 2 Channels, Länge 6
# ----------------------------
x = torch.tensor([[[0., 1., 2., 3., 4., 5.],
                   [0., -1., -2., -3., -4., -5]]], requires_grad=True)

# ----------------------------
# Modell: Conv1d -> ReLU -> Linear
# ----------------------------
model = nn.Sequential(
    nn.Conv1d(in_channels=2, out_channels=3, kernel_size=3, stride=1, dilation=2, padding="same"),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(in_features=3*6, out_features=2)
)

# ----------------------------
# Gewichte manuell setzen (deterministisch)
# ----------------------------
# Conv1d: (out_channels=3, in_channels=2, kernel_size=3)
conv_weights = torch.tensor([
    [[1., 0., -1.], [0., 1., 0.]],      # Out-Channel 0
    [[-1., 1., 0.], [1., -1., 1.]],     # Out-Channel 1
    [[0., -1., 1.], [1., 0., -1.]]      # Out-Channel 2
], dtype=torch.float32)
conv_bias = torch.tensor([0., 1., -1.], dtype=torch.float32)

model[0].weight = Parameter(conv_weights)
model[0].bias = Parameter(conv_bias)

# Linear: (out_features=2, in_features=18)
linear_weights = torch.tensor([
    [1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1.],
    [-1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0., -1., 1., 0.]
], dtype=torch.float32)
linear_bias = torch.tensor([0., 1.], dtype=torch.float32)

model[3].weight = Parameter(linear_weights)
model[3].bias = Parameter(linear_bias)

# ----------------------------
# Forward-Pass
# ----------------------------
y = model(x)
print("Input shape:", x.shape)
print("Conv output shape (vor Flatten):", model[0](x).shape)
print("Model output shape:", y.shape)
print("Model output:", y)

# ----------------------------
# Target
# ----------------------------
target = torch.tensor([[-4., -3.]], dtype=torch.float32)

# ----------------------------
# Loss
# ----------------------------
loss_fn = nn.MSELoss()
loss = loss_fn(y, target)
print("Loss:", loss.item())

# ----------------------------
# Backward-Pass
# ----------------------------
loss.backward()
print("Gradient w.r.t input x.grad:")
print(x.grad)

# Optional: Gradienten der Gewichte
print("Gradient Conv1d weight.grad:")
print(model[0].weight.grad)
print("Gradient Linear weight.grad:")
print(model[3].weight.grad)


Input shape: torch.Size([1, 2, 6])
Conv output shape (vor Flatten): torch.Size([1, 3, 6])
Model output shape: torch.Size([1, 2])
Model output: tensor([[-3.,  2.]], grad_fn=<AddmmBackward0>)
Loss: 13.0
Gradient w.r.t input x.grad:
tensor([[[ 5., -5., -9., 10.,  4., -5.],
         [-2., -4., 10., -6., -5.,  5.]]])
Gradient Conv1d weight.grad:
tensor([[[  0.,   0.,   0.],
         [  0.,   0.,   0.]],

        [[  7.,  13.,  -4.],
         [ -7., -13.,   4.]],

        [[ -4.,  -9., -17.],
         [  4.,   9.,  17.]]])
Gradient Linear weight.grad:
tensor([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  5.,  5.,  3.,  4.,
          5.,  5.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  5.,  0., 25., 25., 15., 20.,
         25., 25.,  0.,  0.]])
