In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.autograd import Function

class CustomAutogradFunction(Function):
    @staticmethod
    def forward(ctx, input):
        
        # Store input for use in the backward pass
        ctx.save_for_backward(input)
        
        # You can perform your custom operation here if needed
        output = input  # For demonstration, it just passes input as output
        
        return output

    @staticmethod
    def backward(ctx, grad_output):
        # Print information during the backward pass
        print("Backward pass: Gradients shape:", grad_output.shape)
        print("Gradients values:", grad_output)
        
        # Retrieve the input from the forward pass
        input, = ctx.saved_tensors
        
        # You can perform your custom gradient calculation here if needed
        grad_input = grad_output  # For demonstration, it just passes grad_output as grad_input
        
        return grad_input

In [23]:
# Define a simple neural network class
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        self.custom_layer = CustomAutogradFunction.apply
        
        self.explainable = [self.fc1.weight.data[:], self.fc1.bias.data[:]]
        
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.custom_layer(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [24]:
# Hyperparameters
input_size = 784  # MNIST images are 28x28 pixels
hidden_size = 128
output_size = 10  # 10 classes (digits 0-9)
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])



In [25]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create the model and optimizer
model = NeuralNetwork(input_size, hidden_size, output_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [26]:


# Training Loop
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.view(-1, 28 * 28)  # Flatten the input
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

print('Training finished.')
torch.save(model, 'numbers.pth')

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0014],
        [ 0.0009,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0008],
        [-0.0007,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0010],
        ...,
        [ 0.0007,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0013],
        [ 0.0004,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0008],
        [-0.0006,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0009]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-0.0009,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0005],
        [-0.0010,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0005],
        [ 0.0010,  0.0000,  0.0000,  ...,  0.0000,  0.0000, -0.0007],
        ...,
        [-0.0002,  0.0000,  0.0011,  ...,  0.0000,  0.0000, -0.0005],
        [-0.0008,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0003],
        [ 0.0010,  0.0000,  0.0000,  ...,  0.0000,  0.0000,

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1234e-04,  0.0000e+00, -5.0240e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.3928e-05],
        [-4.1938e-04,  0.0000e+00,  4.6243e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.0381e-04],
        [-2.7041e-04,  0.0000e+00, -7.8475e-04,  ...,  0.0000e+00,
          0.0000e+00, -5.7915e-04],
        ...,
        [ 7.9696e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7106e-04],
        [-4.1695e-04,  0.0000e+00,  9.1800e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.8084e-04],
        [ 1.0140e-03,  0.0000e+00, -1.1940e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.5315e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0004,  0.0000,  0.0012,  ...,  0.0000,  0.0000,  0.0013],
        [ 0.0007,  0.0000,  0.0012,  ...,  0.0000,  0.0000,  0.0014],
        [-0.0003,  0.0000, -0.0011,  ...,  0.0000,  0.0000, -0.0006],
        ...,
 

Gradients values: tensor([[-5.1363e-04,  0.0000e+00, -2.6300e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.8886e-04],
        [-1.0868e-04,  0.0000e+00, -3.3519e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9575e-04],
        [ 4.2290e-04,  0.0000e+00, -5.7834e-04,  ...,  0.0000e+00,
          0.0000e+00, -5.2305e-04],
        ...,
        [ 1.9147e-04,  0.0000e+00, -3.9375e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.3709e-05],
        [-1.1530e-03,  0.0000e+00, -5.4956e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1830e-03],
        [-3.2103e-04,  0.0000e+00,  8.0777e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.7998e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.8711e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.3959e-04],
        [ 3.6154e-05,  0.0000e+00, -4.5530e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.6938e-05],
        [-1.3586e-03,  0.0000e+00,  8.5886e-04,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.4242e-04,  0.0000e+00,  1.3770e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.1434e-04],
        [-6.0303e-05,  0.0000e+00, -7.1959e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.6331e-05],
        [-7.0914e-04,  0.0000e+00,  4.5822e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.1425e-04],
        ...,
        [ 2.2152e-05,  0.0000e+00, -1.7620e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.4936e-05],
        [ 1.2035e-03,  0.0000e+00, -1.9158e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.4271e-03],
        [-7.2017e-06,  0.0000e+00, -1.0622e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.1846e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.8008e-05,  0.0000e+00, -1.5746e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1695e-04],
        [-1.5974e-04,  0.0000e+00,  1.9291e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.8559e-05],
        [-5.9228e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3925e-04,  0.0000e+00, -1.9156e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.2154e-03],
        [-7.6560e-06,  0.0000e+00, -9.3165e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.3808e-05,  0.0000e+00, -3.4923e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.6531e-05],
        ...,
        [-7.1247e-06,  0.0000e+00, -6.4882e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0613e-05],
        [ 2.2057e-04,  0.0000e+00, -1.7410e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.3408e-05],
        [ 2.9212e-04,  0.0000e+00, -1.1746e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.0764e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.1624e-05,  0.0000e+00,  1.5998e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9661e-04],
        [-2.1810e-05,  0.0000e+00,  6.0181e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.8231e-04],
        [-6.8085e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4423e-05,  0.0000e+00, -1.5077e-03,  ...,  0.0000e+00,
          0.0000e+00, -7.4810e-04],
        [ 3.1801e-06,  0.0000e+00, -7.4888e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.9692e-06],
        [-5.3804e-06,  0.0000e+00, -4.8008e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.0909e-06],
        ...,
        [-1.8088e-06,  0.0000e+00, -5.3952e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2463e-05],
        [-3.9855e-06,  0.0000e+00,  1.5643e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.6802e-06],
        [ 3.0433e-06,  0.0000e+00, -1.4762e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7757e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.1116e-04,  0.0000e+00,  1.7864e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.0681e-04],
        [-3.0059e-05,  0.0000e+00, -2.0495e-03,  ...,  0.0000e+00,
          0.0000e+00, -7.5510e-04],
        [-8.9020e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1025e-05,  0.0000e+00,  1.3429e-07,  ...,  0.0000e+00,
          0.0000e+00,  4.1236e-06],
        [-5.4228e-04,  0.0000e+00, -1.7789e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.3495e-03],
        [ 9.6102e-05,  0.0000e+00, -1.5895e-04,  ...,  0.0000e+00,
          0.0000e+00,  9.0707e-05],
        ...,
        [ 4.7837e-04,  0.0000e+00, -2.0228e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.0924e-04],
        [ 1.2857e-05,  0.0000e+00, -1.6542e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.4722e-05],
        [ 3.2325e-05,  0.0000e+00, -2.7473e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.1970e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3763e-04,  0.0000e+00, -3.2785e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.1658e-04],
        [-1.0249e-05,  0.0000e+00,  3.2824e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.8585e-04],
        [ 6.0353e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7478e-04,  0.0000e+00,  4.1606e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.5728e-04],
        [-1.7782e-05,  0.0000e+00,  1.1604e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.4972e-05],
        [-3.6600e-04,  0.0000e+00,  6.0830e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.5853e-04],
        ...,
        [ 1.2571e-04,  0.0000e+00, -1.7997e-03,  ...,  0.0000e+00,
          0.0000e+00, -3.2740e-04],
        [-1.8803e-05,  0.0000e+00, -1.4735e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5265e-05],
        [ 3.9761e-07,  0.0000e+00, -2.7025e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3521e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0529e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3972e-07],
        [-3.0741e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0312e-05],
        [ 9.9119e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.5582e-06,  0.0000e+00,  9.4926e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.3430e-06],
        [ 4.5449e-06,  0.0000e+00, -1.0588e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0918e-05],
        [ 1.1511e-06,  0.0000e+00, -2.5306e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.5896e-06],
        ...,
        [-3.2757e-05,  0.0000e+00, -4.0756e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.8970e-05],
        [ 1.6260e-04,  0.0000e+00, -2.0713e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2420e-04],
        [ 4.8205e-05,  0.0000e+00, -6.6049e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.7783e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0877e-04,  0.0000e+00,  1.6608e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.2030e-03],
        [ 1.0583e-04,  0.0000e+00, -2.7862e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.3826e-04],
        [-8.1801e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0559e-05,  0.0000e+00, -2.2769e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.4013e-05],
        [-1.4005e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3526e-04],
        [ 2.5301e-05,  0.0000e+00,  8.8195e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.1889e-05],
        ...,
        [-1.5561e-04,  0.0000e+00,  1.3920e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.3457e-05],
        [ 7.9150e-05,  0.0000e+00, -3.9126e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.4487e-04],
        [ 4.9634e-06,  0.0000e+00,  9.8934e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.1687e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1756e-04,  0.0000e+00,  1.0734e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1246e-04],
        [-1.0725e-05,  0.0000e+00,  2.3490e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.6222e-05],
        [ 3.5485e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5694e-04,  0.0000e+00,  1.1107e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.8021e-04,  0.0000e+00, -7.3087e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.4193e-04],
        [-2.7509e-05,  0.0000e+00,  1.0299e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.4545e-06],
        ...,
        [ 1.3878e-05,  0.0000e+00,  3.1531e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.3092e-05],
        [-9.8157e-05,  0.0000e+00,  1.3704e-04,  ...,  0.0000e+00,
          0.0000e+00,  7.4300e-05],
        [-2.0902e-07,  0.0000e+00, -4.2977e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.8802e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4390e-04,  0.0000e+00,  4.0304e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.0541e-04],
        [ 1.0174e-05,  0.0000e+00, -2.2350e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.2868e-05],
        [ 3.6680e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.3391e-05,  0.0000e+00,  3.1635e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.4393e-04],
        [ 1.2761e-04,  0.0000e+00,  3.1713e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.6318e-04],
        [-1.1546e-05,  0.0000e+00,  4.3519e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.8174e-07],
        ...,
        [ 6.8378e-04,  0.0000e+00, -9.6048e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.2379e-04],
        [ 8.7802e-07,  0.0000e+00,  1.0838e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.0645e-07],
        [-1.9283e-06,  0.0000e+00, -7.0079e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.4362e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0305e-06,  0.0000e+00,  2.0067e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.4936e-05],
        [ 8.4571e-05,  0.0000e+00, -3.2470e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.7872e-05],
        [-8.8739e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1632e-05,  0.0000e+00,  8.2678e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.9254e-06],
        [ 1.1292e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6132e-06],
        [-2.2288e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.3562e-06],
        ...,
        [ 3.6237e-04,  0.0000e+00, -7.7761e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.5224e-04],
        [-1.1947e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4064e-04],
        [-1.2545e-05,  0.0000e+00,  4.4341e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.5628e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.5387e-06,  0.0000e+00,  1.8486e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0774e-06],
        [-8.0671e-07,  0.0000e+00, -6.8615e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.5390e-06],
        [ 7.5120e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7150e-06,  0.0000e+00, -6.2366e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.3522e-06],
        [-1.4197e-05,  0.0000e+00,  6.6099e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.8147e-06],
        [ 1.3101e-03,  0.0000e+00,  1.5385e-03,  ...,  0.0000e+00,
          0.0000e+00,  2.2087e-03],
        ...,
        [ 0.0000e+00,  0.0000e+00, -6.2383e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.9692e-04],
        [ 1.4073e-04,  0.0000e+00, -4.1811e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.6372e-04],
        [-1.6638e-04,  0.0000e+00,  8.7732e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.2430e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8124e-05,  0.0000e+00,  2.4190e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.6774e-05],
        [ 1.1174e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8586e-03],
        [-2.0700e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000,  0.0000,  0.0012,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0016,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0013,  0.0000,  0.0004,  ...,  0.0000,  0.0000, -0.0024],
        ...,
        [ 0.0012,  0.0000,  0.0002,  ...,  0.0000,  0.0000,  0.0009],
        [-0.0003,  0.0000,  0.0004,  ...,  0.0000,  0.0000, -0.0003],
        [ 0.0003,  0.0000, -0.0010,  ...,  0.0000,  0.0000,  0.0001]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.9581e-04,  0.0000e+00,  9.8402e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.3134e-04],
        [ 1.8834e-04,  0.0000e+00,  2.5632e-03,  ...,  0.0000e+00,
          0.0000e+00,  9.1944e-04],
        [ 1.2061e-06,  0.0000e+00,  1.0275e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.6956e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  2.0522e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00

Gradients values: tensor([[ 6.5291e-05,  0.0000e+00, -1.1140e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.4731e-04],
        [-5.7237e-05,  0.0000e+00,  6.2544e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.9697e-05],
        [-3.5599e-05,  0.0000e+00, -1.9626e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.0007e-05],
        ...,
        [-1.5282e-05,  0.0000e+00,  2.3321e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.2505e-05],
        [ 1.9092e-03,  0.0000e+00,  3.1432e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.5363e-03],
        [-1.7599e-05,  0.0000e+00,  3.6789e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.0903e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1854e-04,  0.0000e+00,  9.3612e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.1919e-04],
        [-4.8869e-06,  0.0000e+00, -7.5414e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.8037e-05],
        [-2.0618e-05,  0.0000e+00,  1.4100e-05,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0736e-03,  0.0000e+00,  3.2966e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4109e-03],
        [ 6.7352e-05,  0.0000e+00, -7.2006e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.1494e-05],
        [-4.2933e-05,  0.0000e+00, -9.7309e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.2102e-05],
        ...,
        [ 2.3705e-05,  0.0000e+00, -1.6987e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.7570e-03],
        [ 3.0254e-08,  0.0000e+00, -5.3842e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8251e-05,  0.0000e+00, -1.2443e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3728e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1474e-05,  0.0000e+00, -3.2646e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.8364e-05],
        [-1.1303e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1886e-05],
        [ 3.0378e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.5165e-06,  0.0000e+00,  2.4632e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8047e-05],
        [ 7.6923e-04,  0.0000e+00, -1.9986e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.3808e-04],
        [-6.8932e-05,  0.0000e+00, -1.6032e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.7956e-05],
        ...,
        [-4.1812e-06,  0.0000e+00,  1.4386e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9712e-05],
        [-2.0394e-03,  0.0000e+00,  4.9678e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.4711e-03],
        [ 4.7618e-04,  0.0000e+00, -1.2227e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.3412e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.0615e-06,  0.0000e+00, -4.0522e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.7011e-06],
        [-8.7840e-05,  0.0000e+00,  3.7568e-04,  ...,  0.0000e+00,
          0.0000e+00, -5.4010e-04],
        [-3.5652e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.9642e-06,  0.0000e+00,  5.4589e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.3199e-06],
        [-3.6610e-06,  0.0000e+00,  2.1989e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.0777e-08],
        [ 4.8354e-06,  0.0000e+00,  6.0467e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.7185e-06],
        ...,
        [-4.8965e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.4228e-07],
        [ 2.4235e-07,  0.0000e+00, -8.5258e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1579e-06,  0.0000e+00,  1.1299e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.0145e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3909e-04,  0.0000e+00,  3.3455e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.1974e-04],
        [-2.1589e-05,  0.0000e+00, -5.8031e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.8968e-05],
        [ 4.5504e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.9770e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.7803e-05],
        [-6.5050e-05,  0.0000e+00,  2.5926e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3519e-04],
        [-4.9928e-05,  0.0000e+00,  7.8469e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9735e-04],
        ...,
        [-3.4746e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.7577e-06],
        [ 1.2288e-07,  0.0000e+00, -2.4295e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -3.0939e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.5570e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.0731e-06,  0.0000e+00, -9.1869e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4024e-05],
        [ 4.4432e-05,  0.0000e+00, -4.6952e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.7860e-04],
        [-3.4393e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.1562e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2656e-04],
        [ 1.3186e-04,  0.0000e+00, -2.5874e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.3680e-04],
        [-1.1974e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5431e-03],
        ...,
        [-1.4972e-05,  0.0000e+00, -1.3181e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2433e-05],
        [-3.0690e-05,  0.0000e+00,  1.6677e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.4186e-05],
        [-1.5439e-03,  0.0000e+00, -1.4476e-03,  ...,  0.0000e+00,
          0.0000e+00, -9.9400e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8349e-04,  0.0000e+00, -2.8179e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.4919e-04],
        [ 7.7965e-06,  0.0000e+00, -1.1612e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.8384e-05],
        [-2.6208e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6351e-06,  0.0000e+00, -1.6916e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.6391e-05],
        [ 2.1766e-06,  0.0000e+00, -1.2373e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.9711e-06],
        [ 3.6556e-06,  0.0000e+00,  3.3103e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2983e-05],
        ...,
        [ 5.1497e-04,  0.0000e+00, -3.7514e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.8373e-04],
        [ 3.8144e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4648e-06],
        [ 8.5956e-06,  0.0000e+00, -1.6928e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.6271e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2315e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.4078e-04],
        [-3.9588e-05,  0.0000e+00,  5.2061e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.5307e-05],
        [ 6.6873e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5240e-05,  0.0000e+00, -4.6719e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.1773e-06],
        [ 1.9169e-05,  0.0000e+00, -4.5595e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.3257e-05],
        [-9.7834e-07,  0.0000e+00, -6.2567e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.4793e-06],
        ...,
        [-1.5222e-03,  0.0000e+00, -2.4458e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.0883e-03],
        [ 4.9355e-05,  0.0000e+00, -3.2804e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3867e-05],
        [-4.5258e-08,  0.0000e+00, -2.1879e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.6094e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4049e-05,  0.0000e+00, -4.0384e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.5232e-05],
        [ 1.2436e-04,  0.0000e+00,  1.6360e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.4240e-04],
        [ 1.5452e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8201e-06,  0.0000e+00, -5.7155e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.9122e-06],
        [ 2.3351e-04,  0.0000e+00,  2.0056e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.9500e-04],
        [-1.3313e-05,  0.0000e+00,  3.7057e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.5734e-06],
        ...,
        [ 3.8977e-06,  0.0000e+00, -8.6146e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.3899e-06],
        [ 8.4504e-04,  0.0000e+00, -3.8302e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.2778e-04],
        [-1.9843e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3777e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.2374e-05,  0.0000e+00,  2.2841e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.5958e-06,  0.0000e+00,  4.4316e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.9035e-06],
        [ 1.2779e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2614e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6039e-04],
        [-2.2205e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.1228e-05],
        [ 4.5230e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4796e-05],
        ...,
        [ 1.3528e-05,  0.0000e+00,  9.6838e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.6885e-06],
        [ 1.9455e-04,  0.0000e+00,  9.8299e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.9230e-04],
        [ 3.1544e-05,  0.0000e+00,  2.5595e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.0846e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6583e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.5641e-08,  0.0000e+00, -2.8510e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.0496e-06],
        [ 1.0441e-

Gradients values: tensor([[-6.2288e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3416e-06],
        [-8.5488e-08,  0.0000e+00, -3.4388e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0113e-05],
        [-1.0480e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5254e-03],
        ...,
        [ 1.9722e-06,  0.0000e+00, -4.2547e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.1383e-06],
        [-1.1401e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7508e-05],
        [-2.8920e-06,  0.0000e+00,  6.2561e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.3256e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7901e-04,  0.0000e+00,  1.6282e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.4672e-04],
        [ 6.1797e-06,  0.0000e+00,  2.2123e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5401e-05],
        [-2.5144e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3821e-06,  0.0000e+00, -7.5360e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.3706e-06],
        [-5.8981e-05,  0.0000e+00, -1.7572e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1192e-04],
        [-4.3625e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.3523e-08],
        ...,
        [-1.0976e-06,  0.0000e+00, -8.4693e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9790e-05],
        [-1.9351e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3645e-04],
        [-1.3763e-04,  0.0000e+00,  1.8326e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.8981e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.2277e-04,  0.0000e+00,  2.2212e-03,  ...,  0.0000e+00,
          0.0000e+00,  2.3074e-03],
        [-6.1082e-04,  0.0000e+00, -2.1076e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.1531e-03],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4314e-04,  0.0000e+00,  9.2620e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.3155e-04],
        [-1.1095e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.5920e-04],
        [-6.0787e-04,  0.0000e+00, -1.4644e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.7623e-03],
        ...,
        [ 3.5453e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0258e-04],
        [-3.1792e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5311e-04],
        [ 0.0000e+00,  0.0000e+00, -2.9519e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.9817e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.3276e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9924e-05],
        [-6.5760e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0989e-04],
        [ 3.1412e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.9078e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5564e-06],
        [-2.6124e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0766e-07],
        [-1.0490e-03,  0.0000e+00,  8.2370e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.0858e-04],
        ...,
        [-6.4007e-05,  0.0000e+00,  3.2075e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.2929e-04],
        [ 3.9573e-05,  0.0000e+00, -8.5539e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.9463e-05],
        [-4.0516e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6037e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.0097e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.1790e-06,  0.0000e+00,  1.0002e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.6418e-06],
        [-4.5451e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.0640e-06,  0.0000e+00, -1.5302e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.6351e-05],
        [ 7.9604e-04,  0.0000e+00, -3.8249e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.5805e-03],
        [ 6.4551e-06,  0.0000e+00, -7.9681e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.2854e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -4.7777e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.9532e-05],
        [ 2.4670e-08,  0.0000e+00, -1.2195e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.0279e-06,  0.0000e+00, -1.3495e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0677e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.1174e-05,  0.0000e+00,  4.9559e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.7732e-04],
        [-9.4030e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6969e-04],
        [ 6.3986e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1062e-04,  0.0000e+00, -2.5377e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3346e-04],
        [ 7.8330e-05,  0.0000e+00, -6.1360e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0274e-05],
        [ 0.0000e+00,  0.0000e+00, -3.6632e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.2025e-06],
        ...,
        [-6.8509e-06,  0.0000e+00, -6.9874e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.3224e-06],
        [ 1.6693e-07,  0.0000e+00, -3.9109e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.9663e-07],
        [-2.1744e-07,  0.0000e+00, -1.2662e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.7728e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.7445e-04,  0.0000e+00, -9.5897e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.5108e-04],
        [ 2.8400e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.5400e-04],
        [ 8.3119e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8106e-06,  0.0000e+00, -8.9916e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.1202e-05],
        [-5.5373e-05,  0.0000e+00, -1.1419e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.4632e-05],
        [ 5.0886e-06,  0.0000e+00, -1.1600e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.6380e-06],
        ...,
        [ 1.3300e-06,  0.0000e+00,  1.9846e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.3000e-06],
        [ 4.9137e-06,  0.0000e+00, -1.3622e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.4169e-06,  0.0000e+00, -2.7524e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.0122e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.2981e-05,  0.0000e+00, -1.3615e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.6118e-05],
        [-1.0718e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7836e-06],
        [-3.6181e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4248e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4891e-04],
        [-1.3390e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4147e-06],
        [-1.0643e-05,  0.0000e+00,  2.6900e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.3422e-05],
        ...,
        [ 3.9273e-05,  0.0000e+00, -3.6404e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.4310e-04],
        [ 2.4911e-06,  0.0000e+00, -4.0012e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.6060e-06],
        [-5.7033e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1338e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.1282e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3668e-06],
        [-3.5677e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5702e-05],
        [-4.5977e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.2385e-06,  0.0000e+00,  4.4460e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.4891e-06],
        [ 9.7277e-05,  0.0000e+00,  4.7365e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.0771e-04],
        [-9.8807e-05,  0.0000e+00, -1.7305e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.7549e-04],
        ...,
        [-9.4335e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.9758e-07],
        [-1.5132e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.3378e-05],
        [-9.8040e-05,  0.0000e+00,  8.7309e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7682e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.2629e-04,  0.0000e+00, -3.7037e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.4206e-03],
        [-2.1400e-08,  0.0000e+00,  1.5855e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.9195e-08],
        [-2.8319e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4690e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1252e-06],
        [ 9.2343e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.4269e-07],
        [-1.0637e-07,  0.0000e+00,  9.8571e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.3953e-06],
        ...,
        [ 1.5276e-04,  0.0000e+00,  1.3169e-04,  ...,  0.0000e+00,
          0.0000e+00,  9.3059e-05],
        [-1.8584e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.2370e-04],
        [ 2.2531e-03,  0.0000e+00,  5.8818e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.0225e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.1747e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0955e-03],
        [-3.1719e-06,  0.0000e+00, -6.4567e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.1415e-05],
        [-3.2368e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.2007e-06,  0.0000e+00, -2.6817e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1480e-05],
        [ 2.1362e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2231e-03],
        [ 3.8754e-07,  0.0000e+00, -1.1263e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-4.7992e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2937e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2080e-06],
        [ 9.1309e-05,  0.0000e+00, -3.6209e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.1548e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.2405e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2504e-05],
        [-7.5327e-04,  0.0000e+00,  7.6768e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1474e-04],
        [-1.2857e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7566e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6506e-04],
        [ 1.3623e-04,  0.0000e+00, -1.5364e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.5675e-04],
        [-4.3369e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2844e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -9.4487e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.9918e-06],
        [-1.4797e-07,  0.0000e+00, -9.5871e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.3713e-07],
        [ 0.0000e+00,  0.0000e+00, -7.7635e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.3357e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.5602e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1717e-03],
        [-4.0110e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0218e-05],
        [-2.4203e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.7808e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.8391e-06],
        [ 3.9009e-05,  0.0000e+00,  2.7886e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.6902e-05],
        [-1.7374e-05,  0.0000e+00,  5.3999e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.9353e-05],
        ...,
        [-5.7698e-05,  0.0000e+00, -7.4079e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2944e-04],
        [-7.9068e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.8532e-06],
        [-1.7894e-06,  0.0000e+00, -3.3807e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0671e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.6700e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1490e-05],
        [ 1.8988e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.9570e-06],
        [ 1.0666e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.9875e-06,  0.0000e+00, -7.0321e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.8877e-06],
        [-2.9253e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0064e-06],
        [-5.7045e-05,  0.0000e+00, -1.1900e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.6082e-05],
        ...,
        [ 1.9505e-05,  0.0000e+00,  2.9458e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.9251e-05],
        [-2.1244e-06,  0.0000e+00, -1.3969e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9937e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0086e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4829e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.0246e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.6839e-07],
        [-2.0044e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8490e-03,  0.0000e+00, -2.0821e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.2149e-03],
        [ 1.2805e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.3146e-05],
        [-7.8724e-06,  0.0000e+00, -2.1884e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2545e-05],
        ...,
        [-5.9747e-04,  0.0000e+00, -2.9786e-03,  ...,  0.0000e+00,
          0.0000e+00, -7.2971e-04],
        [-1.2226e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6715e-04],
        [-1.4964e-05,  0.0000e+00, -1.8459e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2122e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.2589e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6628e-03],
        [ 8.9636e-08,  0.0000e+00, -1.7666e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8141e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9441e-04,  0.0000e+00,  6.6004e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.5267e-04],
        [ 9.0477e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1710e-06],
        [-8.2800e-08,  0.0000e+00, -1.7748e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.4923e-07],
        ...,
        [ 7.9985e-04,  0.0000e+00,  4.8872e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.3603e-04],
        [ 3.4490e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.8335e-05],
        [ 1.1635e-04,  0.0000e+00,  7.5621e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.2504e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.4542e-04,  0.0000e+00, -2.8438e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.1125e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.1115e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.9886e-05,  0.0000e+00, -8.7410e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.1057e-05],
        [ 2.8037e-05,  0.0000e+00, -1.6242e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.8154e-05],
        [-2.1230e-06,  0.0000e+00,  2.6463e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.9451e-05],
        ...,
        [-5.1920e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.4468e-06],
        [-5.4646e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6237e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0838e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.2234e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1910e-04],
        [ 3.6788e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2145e-04],
        [ 2.0394e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.6318e-05,  0.0000e+00,  6.0984e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.3095e-04],
        [ 3.1628e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9025e-05],
        [ 5.6300e-05,  0.0000e+00,  7.1432e-04,  ...,  0.0000e+00,
          0.0000e+00,  7.8394e-05],
        ...,
        [ 3.8395e-05,  0.0000e+00, -6.7144e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.9880e-05],
        [ 2.6864e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7677e-06,  0.0000e+00,  7.9398e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.8233e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6506e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1537e-04],
        [-5.6674e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3160e-05],
        [-1.1416e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.5800e-07,  0.0000e+00, -4.2847e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.4044e-06],
        [ 1.0215e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.2823e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9916e-04],
        ...,
        [-9.3610e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0701e-05],
        [-1.1681e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1365e-05],
        [-9.2068e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3451e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0669e-05,  0.0000e+00, -2.5518e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.4612e-05],
        [ 2.7502e-06,  0.0000e+00, -2.4399e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.8629e-06],
        [ 1.3570e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1335e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3255e-05],
        [ 2.7382e-07,  0.0000e+00, -2.3704e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.7724e-04,  0.0000e+00,  3.7383e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.1753e-03],
        ...,
        [ 1.5026e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.3160e-05,  0.0000e+00,  3.3786e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5419e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9625e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.4646e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9769e-05],
        [-8.3942e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6113e-06],
        [ 3.8633e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9617e-06,  0.0000e+00, -1.8328e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9110e-06,  0.0000e+00,  9.8301e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2070e-05],
        [-1.6580e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6911e-05],
        ...,
        [-1.0029e-07,  0.0000e+00, -2.9632e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2253e-05],
        [-1.0015e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3210e-05],
        [ 4.3716e-07,  0.0000e+00, -1.0055e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.0048e-04,  0.0000e+00, -1.9338e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.4380e-04],
        [-1.0925e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1785e-06],
        [-8.4237e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.2061e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.3407e-07,  0.0000e+00,  1.1652e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.9539e-06],
        [-6.2192e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 3.4766e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.6666e-06,  0.0000e+00, -7.6971e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.0775e-04,  0.0000e+00,  6.6824e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.6446e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0708e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7417e-03],
        [-2.8521e-06,  0.0000e+00,  2.2102e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4063e-06],
        [-1.3027e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.3483e-04,  0.0000e+00, -1.9709e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.7889e-03],
        [ 6.3818e-05,  0.0000e+00,  1.1476e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.1132e-04],
        [ 2.5124e-06,  0.0000e+00,  2.9246e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.6551e-07],
        ...,
        [-1.4626e-03,  0.0000e+00,  3.0157e-03,  ...,  0.0000e+00,
          0.0000e+00, -6.2582e-04],
        [ 3.2309e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2253e-05],
        [ 1.4794e-04,  0.0000e+00,  3.2828e-03,  ...,  0.0000e+00,
          0.0000e+00,  2.6014e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7138e-07,  0.0000e+00, -4.6172e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.1878e-07,  0.0000e+00, -5.0923e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.1976e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.4760e-08,  0.0000e+00, -1.4024e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.8706e-07],
        [ 1.5482e-06,  0.0000e+00, -1.7116e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.3869e-06],
        [-9.8429e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5958e-05],
        ...,
        [-7.5801e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.8106e-04],
        [-5.0885e-06,  0.0000e+00,  1.5238e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.6324e-06],
        [ 2.9777e-04,  0.0000e+00, -1.5966e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.5827e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0677e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6336e-05],
        [-1.3843e-03,  0.0000e+00,  9.4844e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0940e-03],
        [ 2.1544e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1238e-04,  0.0000e+00, -3.7709e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1242e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.0338e-06],
        [-3.3153e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2898e-04],
        ...,
        [-1.4919e-07,  0.0000e+00, -3.4783e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1724e-06],
        [ 2.3094e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6595e-06,  0.0000e+00, -5.2520e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.1995e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.6161e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0785e-04],
        [-5.5242e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5270e-07],
        [-3.4810e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4155e-05,  0.0000e+00,  2.0026e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3741e-04,  0.0000e+00,  1.5257e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.3670e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0497e-06],
        ...,
        [-2.1492e-07,  0.0000e+00, -6.4138e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.1505e-07],
        [ 6.1020e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1165e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.3195e-06,  0.0000e+00, -8.3706e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.8020e-05],
        [ 8.3119e-06,  0.0000e+00, -5.7939e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.8838e-05],
        [ 3.2414e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.3576e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.8287e-05],
        [-1.6451e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0636e-07],
        [-2.1488e-06,  0.0000e+00, -3.9081e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.1676e-06],
        ...,
        [ 1.8783e-05,  0.0000e+00,  1.9843e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.6642e-05],
        [ 1.8823e-04,  0.0000e+00,  2.5352e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.6111e-04],
        [-3.7220e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9701e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9905e-05,  0.0000e+00, -1.5205e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1543e-05],
        [-7.1045e-06,  0.0000e+00, -5.5589e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.6230e-06],
        [-8.4347e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4749e-04,  0.0000e+00,  2.9148e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.8147e-04],
        [ 1.0017e-06,  0.0000e+00, -6.5529e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.9998e-06],
        [-1.0363e-06,  0.0000e+00,  1.7373e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3869e-06],
        ...,
        [ 3.1051e-03,  0.0000e+00,  2.2530e-04,  ...,  0.0000e+00,
          0.0000e+00,  7.3579e-04],
        [ 7.9630e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.5582e-04],
        [-7.7632e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2990e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7761e-08,  0.0000e+00, -4.3033e-08,  ...,  0.0000e+00,
          0.0000e+00,  6.4816e-08],
        [-6.6925e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.3274e-07],
        [ 3.1523e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8725e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3284e-06],
        [ 3.0193e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0930e-05],
        [-1.4920e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6768e-06],
        ...,
        [-4.6895e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.9801e-06,  0.0000e+00, -2.0139e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.0113e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.8289e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2536e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0476e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8906e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6673e-05],
        [ 2.9822e-04,  0.0000e+00, -6.5750e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.9380e-04],
        [ 6.5942e-06,  0.0000e+00,  8.6963e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.1660e-06],
        ...,
        [ 4.4087e-06,  0.0000e+00,  1.3177e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.0592e-06],
        [-3.8827e-07,  0.0000e+00, -8.9233e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.2461e-06],
        [ 1.9213e-05,  0.0000e+00, -2.2901e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.2709e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.4879e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.6831e-05],
        [-1.5712e-05,  0.0000e+00, -6.1527e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.1896e-05],
        [ 5.3666e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.9104e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.9368e-07,  0.0000e+00, -7.4157e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.4562e-06],
        [-7.8887e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0588e-06],
        ...,
        [-9.6354e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9801e-06],
        [ 5.7084e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8947e-06],
        [-1.5930e-05,  0.0000e+00,  2.0453e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.0624e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.9241e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.4870e-04],
        [ 2.2784e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6977e-04],
        [ 1.2558e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.2746e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5895e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.6943e-06],
        [-5.5598e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8547e-06],
        ...,
        [ 8.9041e-06,  0.0000e+00, -2.4996e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7504e-05],
        [ 3.9893e-05,  0.0000e+00,  1.1224e-04,  ...,  0.0000e+00,
          0.0000e+00, -5.0275e-05],
        [ 1.9707e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.9705e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0823e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1753e-04],
        [-1.1134e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.9963e-07],
        [ 1.3585e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0245e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5909e-05],
        [ 4.7307e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6578e-06],
        [ 3.6076e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3599e-06],
        ...,
        [-9.0522e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8900e-05],
        [ 6.1292e-05,  0.0000e+00,  2.2530e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.2714e-04],
        [-3.4163e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.8585e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.7692e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1842e-04],
        [-8.9083e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3466e-06],
        [ 4.0119e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9305e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4123e-06],
        [-9.4115e-06,  0.0000e+00, -3.2191e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0587e-05],
        [ 0.0000e+00,  0.0000e+00, -1.1007e-03,  ...,  0.0000e+00,
          0.0000e+00, -9.3765e-04],
        ...,
        [-4.5466e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8444e-06],
        [ 6.1530e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8301e-06],
        [-6.5676e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7438e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.8228e-05,  0.0000e+00, -5.0341e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.3359e-05],
        [-2.6759e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7811e-06],
        [-7.9994e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3555e-05,  0.0000e+00,  5.2143e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0689e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6136e-05,  0.0000e+00,  2.3570e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5269e-05],
        ...,
        [-4.1288e-10,  0.0000e+00, -2.1489e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.3454e-07],
        [ 0.0000e+00,  0.0000e+00, -3.0241e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.6790e-04],
        [-4.6069e-06,  0.0000e+00, -4.3916e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.2473e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0053e-05,  0.0000e+00, -3.6456e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.7593e-05],
        [-3.2026e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.0699e-05],
        [ 1.2015e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.5061e-05,  0.0000e+00, -3.4584e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.9088e-05],
        [ 2.7924e-07,  0.0000e+00, -3.9298e-07,  ...,  0.0000e+00,
          0.0000e+00,  8.4395e-07],
        [-1.6559e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1350e-06],
        ...,
        [ 4.3211e-04,  0.0000e+00, -1.1700e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.2784e-03],
        [ 2.6004e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.6126e-06,  0.0000e+00,  6.2830e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.6479e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6575e-06,  0.0000e+00, -3.0968e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.6614e-06],
        [ 2.0161e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.8778e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.0356e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.6651e-07,  0.0000e+00,  1.2566e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.1060e-07],
        [-7.3136e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7511e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00, -3.7132e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.4226e-05],
        [ 3.3869e-05,  0.0000e+00, -1.0773e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0998e-04],
        [ 9.6705e-06,  0.0000e+00, -1.4765e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6841e-06],
        [-4.9248e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5614e-06],
        [ 1.8268e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.8055e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7529e-05],
        [ 1.5780e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6352e-05],
        ...,
        [ 1.7581e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.9783e-05],
        [-4.9780e-08,  0.0000e+00, -5.6340e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.1754e-06,  0.0000e+00,  3.6578e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.7818e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4460e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5758e-05],
        [-9.9113e-08,  0.0000e+00, -7.5506e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.5424e-07],
        [-5.3632e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.9209e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7609e-05],
        [ 6.8955e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.5381e-07],
        [ 1.6068e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.4446e-04],
        ...,
        [ 1.2359e-05,  0.0000e+00, -3.6307e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.6459e-05],
        [-7.8142e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1219e-03],
        [-4.3098e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9750e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.5942e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2393e-05],
        [ 1.5675e-05,  0.0000e+00,  1.5161e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3329e-05],
        [-1.1798e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.2889e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8538e-07],
        [ 2.1788e-05,  0.0000e+00,  1.2109e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0854e-04],
        [-6.2219e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1242e-05],
        ...,
        [ 5.5402e-06,  0.0000e+00,  1.6376e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.2259e-05],
        [ 1.4586e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9373e-04],
        [ 2.3712e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9729e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.5763e-08,  0.0000e+00, -1.2957e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.0210e-07],
        [ 2.0785e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8678e-05],
        [ 1.3477e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.3441e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9988e-06],
        [-9.7302e-07,  0.0000e+00, -1.5547e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.8935e-07],
        [-1.0418e-06,  0.0000e+00, -4.8459e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.8536e-06],
        ...,
        [ 3.9199e-05,  0.0000e+00, -1.7859e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.4330e-04],
        [-3.7846e-07,  0.0000e+00, -4.0488e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.6812e-07],
        [ 2.0203e-04,  0.0000e+00,  1.8877e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.7966e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.1095e-07,  0.0000e+00, -2.7331e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1677e-06],
        [ 9.4555e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3807e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.2345e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8259e-05],
        [-1.0213e-05,  0.0000e+00, -9.1836e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7953e-05],
        [ 9.9624e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.6961e-06,  0.0000e+00, -3.4487e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.4624e-05],
        [-9.1363e-05,  0.0000e+00, -6.2067e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.5415e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2239e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1224e-06,  0.0000e+00,  1.3218e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.0692e-06],
        [ 8.0602e-06,  0.0000e+00,  1.2784e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0319e-05],
        [-1.1815e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9399e-07,  0.0000e+00, -2.4906e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.7336e-06],
        [ 3.2242e-06,  0.0000e+00, -6.6658e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.0981e-06],
        [ 3.0811e-06,  0.0000e+00, -6.7018e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.5991e-06],
        ...,
        [-3.9547e-04,  0.0000e+00,  1.1961e-03,  ...,  0.0000e+00,
          0.0000e+00,  6.6253e-04],
        [-5.6534e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5149e-06],
        [-1.7586e-05,  0.0000e+00,  1.8714e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.3886e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9766e-06,  0.0000e+00, -1.2693e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.1738e-06],
        [-6.9512e-05,  0.0000e+00, -8.0698e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1500e-04],
        [-8.8087e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7463e-05,  0.0000e+00,  4.6745e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0329e-05],
        [ 7.1365e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0706e-06,  0.0000e+00, -6.4249e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.5228e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4692e-05],
        [ 1.3998e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.5774e-07,  0.0000e+00, -7.2373e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.1979e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.0344e-04,  0.0000e+00, -1.1294e-03,  ...,  0.0000e+00,
          0.0000e+00, -3.3422e-04],
        [-1.7185e-08,  0.0000e+00, -4.0446e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.8726e-08],
        [-1.4758e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.1380e-04,  0.0000e+00,  1.2751e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.4251e-04],
        [-9.6688e-06,  0.0000e+00,  1.3477e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.2209e-04,  0.0000e+00, -4.9400e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.4971e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00, -3.9608e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.4842e-06],
        [ 0.0000e+00,  0.0000e+00, -3.9461e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.8977e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8128e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3285e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6840e-04],
        [ 8.2187e-06,  0.0000e+00,  7.3966e-07,  ...,  0.0000e+00,
          0.0000e+00,  5.0472e-06],
        [ 1.4808e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.7562e-05,  0.0000e+00,  6.1253e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.0288e-05],
        [-6.9346e-07,  0.0000e+00, -2.2713e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1319e-06],
        [ 3.4325e-05,  0.0000e+00, -3.0105e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.7331e-05],
        ...,
        [-2.2391e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9342e-07,  0.0000e+00, -1.6955e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4845e-06,  0.0000e+00, -1.7748e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.4853e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0669e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6081e-07],
        [ 3.1863e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.6676e-05,  0.0000e+00, -2.4930e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.7846e-07,  0.0000e+00,  2.3393e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.9370e-07],
        [ 4.2374e-05,  0.0000e+00, -1.3639e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.0360e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.4340e-06],
        [-8.8390e-06,  0.0000e+00,  1.1742e-04,  ...,  0.0000e+00,
          0.0000e+00,  8.5757e-05],
        [-2.6397e-05,  0.0000e+00,  3.8120e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9001e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8041e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6503e-05],
        [-3.0964e-05,  0.0000e+00,  6.8811e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4134e-05],
        [-1.2513e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0770e-06,  0.0000e+00, -3.0503e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7778e-06],
        [ 1.9678e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.9035e-07],
        [-6.7763e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.6614e-06],
        ...,
        [ 1.3571e-03,  0.0000e+00, -9.1854e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.1136e-03],
        [ 3.0502e-06,  0.0000e+00,  1.0464e-06,  ...,  0.0000e+00,
          0.0000e+00,  7.2702e-06],
        [ 5.6991e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6127e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6855e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3035e-03,  0.0000e+00, -9.4245e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.8265e-04],
        [ 1.6792e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.0564e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3348e-04],
        [ 0.0000e+00,  0.0000e+00, -2.5632e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9264e-04],
        [ 1.7689e-03,  0.0000e+00,  1.2154e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.6042e-03],
        ...,
        [ 1.5894e-07,  0.0000e+00,  3.8105e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.2779e-07],
        [-3.0011e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3005e-08,  0.0000e+00, -4.4308e-08,  ...,  0.0000e+00,
          0.0000e+00,  6.4824e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.5424e-07,  0.0000e+00, -1.4062e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.4457e-06],
        [ 9.5702e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.2367e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.2803e-07,  0.0000e+00, -1.7763e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.0708e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2135e-05],
        [-6.2650e-07,  0.0000e+00,  6.1808e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9040e-06],
        ...,
        [-1.4350e-06,  0.0000e+00,  1.5540e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.7923e-06],
        [-1.9792e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2968e-05],
        [ 1.9856e-06,  0.0000e+00,  3.2578e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.8428e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.2407e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.7139e-04,  0.0000e+00,  1.4957e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.0435e-04],
        [ 1.0510e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7309e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6406e-04],
        [ 1.8448e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8984e-05],
        [-1.8701e-07,  0.0000e+00, -3.0082e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.0745e-07],
        ...,
        [ 6.9250e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3720e-05],
        [ 8.0037e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9875e-05],
        [ 1.7685e-07,  0.0000e+00,  2.7317e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7287e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.0013e-08,  0.0000e+00,  2.3835e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.4519e-07],
        [ 1.0681e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.5180e-07],
        [-3.5224e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.8394e-05,  0.0000e+00, -9.6577e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.9329e-05],
        [ 6.3142e-06,  0.0000e+00, -1.4767e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.6179e-05],
        [ 5.6622e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1053e-03],
        ...,
        [-7.8440e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.8258e-04],
        [ 1.1334e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1036e-05],
        [-1.3695e-05,  0.0000e+00, -2.8380e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.6699e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6153e-06,  0.0000e+00, -2.2102e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.6988e-07],
        [ 9.1069e-06,  0.0000e+00, -9.9904e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.4622e-06],
        [ 4.8356e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3389e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2490e-04],
        [ 4.3617e-05,  0.0000e+00,  2.5592e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.8557e-04],
        [-2.6230e-07,  0.0000e+00, -2.6111e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.4907e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.9530e-06],
        [-3.3117e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0334e-05],
        [-2.7010e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0796e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4108e-06,  0.0000e+00, -2.6852e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.8955e-06],
        [-3.1489e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.2779e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4410e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4861e-06],
        [-2.3523e-05,  0.0000e+00, -6.7805e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.6806e-05],
        [-1.5911e-05,  0.0000e+00, -2.9496e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2279e-05],
        ...,
        [-2.1522e-07,  0.0000e+00,  9.3599e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.2394e-06],
        [ 8.7220e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4937e-04],
        [-2.0599e-05,  0.0000e+00,  3.1457e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1504e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7486e-05],
        [ 3.3546e-07,  0.0000e+00, -1.6597e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.8683e-07],
        [-5.6863e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6836e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6143e-04],
        [ 2.2182e-04,  0.0000e+00, -2.0910e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3552e-04],
        [-4.1699e-05,  0.0000e+00,  4.6606e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.0967e-04],
        ...,
        [ 5.1207e-04,  0.0000e+00, -3.0282e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2473e-06,  0.0000e+00, -6.3876e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.4099e-06],
        [ 1.9737e-06,  0.0000e+00,  1.0818e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4709e-05,  0.0000e+00,  1.0541e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.8877e-05],
        [-5.1608e-06,  0.0000e+00,  5.0801e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.4733e-05],
        [ 1.4658e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6402e-05,  0.0000e+00,  2.0877e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.5098e-04],
        [ 2.7100e-06,  0.0000e+00, -4.6911e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.9736e-06,  0.0000e+00,  1.4094e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.9775e-05],
        ...,
        [ 1.2261e-05,  0.0000e+00,  8.7519e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.0655e-05],
        [ 1.2819e-06,  0.0000e+00, -5.6389e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.6455e-06],
        [-2.3480e-08,  0.0000e+00, -5.9770e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.7487e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3999e-05,  0.0000e+00, -4.4303e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1143e-05],
        [-3.4891e-05,  0.0000e+00,  2.6912e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.3926e-05],
        [-1.3791e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4957e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0184e-06],
        [-2.7870e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.7242e-05],
        [-3.1345e-04,  0.0000e+00,  4.0071e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.7656e-04],
        ...,
        [-9.8495e-05,  0.0000e+00, -3.5332e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1983e-04],
        [-4.3982e-06,  0.0000e+00, -3.1369e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.5254e-06],
        [-1.3215e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4521e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.0537e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1009e-04],
        [ 8.4277e-06,  0.0000e+00, -5.6071e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.4806e-05],
        [-1.2466e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.5819e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8614e-04],
        [-8.7579e-04,  0.0000e+00,  1.6853e-03,  ...,  0.0000e+00,
          0.0000e+00, -7.2130e-04],
        [-2.1910e-03,  0.0000e+00, -2.8145e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.3686e-03],
        ...,
        [-1.3094e-05,  0.0000e+00,  1.9214e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0102e-05],
        [ 1.7217e-04,  0.0000e+00,  1.4301e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.8368e-04],
        [ 8.3481e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.4722e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.7219e-07,  0.0000e+00, -4.2499e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1150e-06],
        [ 3.5888e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4896e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.2173e-07,  0.0000e+00, -1.8003e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.2961e-07],
        [-3.7617e-04,  0.0000e+00,  5.0539e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.6318e-04],
        [ 0.0000e+00,  0.0000e+00,  7.1118e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.8354e-07],
        ...,
        [-4.8963e-05,  0.0000e+00,  4.3371e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.5749e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6928e-05],
        [ 1.3012e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2018e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7654e-06,  0.0000e+00,  1.7974e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.9448e-06],
        [-3.3226e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.3285e-05],
        [-3.4037e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5191e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.4247e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.6768e-07],
        ...,
        [ 3.5378e-06,  0.0000e+00, -2.1905e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4786e-05],
        [-3.6796e-07,  0.0000e+00, -1.5279e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.6077e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.8153e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2814e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4600e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  3.1257e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6491e-06,  0.0000e+00,  3.8910e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.3693e-05],
        [-2.1562e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5183e-07],
        ...,
        [-1.4613e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.6703e-06,  0.0000e+00, -3.5306e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.4129e-05],
        [ 1.0456e-03,  0.0000e+00, -1.6530e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.6697e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.0186e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.8607e-06],
        [-2.4182e-04,  0.0000e+00, -1.2041e-03,  ...,  0.0000e+00,
          0.0000e+00, -4.7393e-04],
        [-4.7779e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0806e-03,  0.0000e+00, -1.1578e-03,  ...,  0.0000e+00,
          0.0000e+00,  6.6387e-04],
        [ 5.4837e-09,  0.0000e+00, -2.4082e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.9998e-07,  0.0000e+00,  4.3791e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.6147e-07],
        ...,
        [ 4.8987e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0654e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.2104e-04,  0.0000e+00, -5.8152e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.2211e-06,  0.0000e+00,  2.9246e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0102e-05],
        [ 4.6392e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4247e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.0034e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5288e-06],
        [ 1.3912e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6537e-04],
        [ 1.1109e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.9019e-05],
        ...,
        [ 4.2991e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5384e-05,  0.0000e+00,  2.2921e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1822e-06,  0.0000e+00,  5.5905e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7286e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.8027e-03],
        [ 1.1109e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0273e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0913e-05,  0.0000e+00, -1.5544e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.8041e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4178e-07,  0.0000e+00, -5.4161e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1176e-06],
        ...,
        [ 2.6982e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.4370e-06,  0.0000e+00,  1.3594e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.6812e-06],
        [-3.2502e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3540e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.4387e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6735e-06],
        [-1.1104e-06,  0.0000e+00, -1.7528e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.9527e-06],
        [-2.4376e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.4270e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0469e-05],
        [ 2.7801e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0933e-04],
        [ 7.0137e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.9574e-05],
        ...,
        [ 3.1332e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.6020e-04],
        [ 1.5337e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1230e-04],
        [-8.5929e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4844e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.1029e-06,  0.0000e+00,  3.8894e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.3683e-07],
        [-2.8654e-07,  0.0000e+00, -5.6299e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.5612e-06],
        [ 1.3607e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.6657e-06,  0.0000e+00,  4.8033e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.1262e-05],
        [ 1.0052e-05,  0.0000e+00,  5.0754e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.7896e-06],
        [-4.5121e-05,  0.0000e+00,  1.7888e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.6758e-05],
        ...,
        [-7.6993e-06,  0.0000e+00,  1.7850e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2233e-05],
        [-3.8435e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7486e-05],
        [ 2.3705e-07,  0.0000e+00, -5.6232e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.9960e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3084e-05,  0.0000e+00,  8.5965e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.3951e-08],
        [ 1.5041e-07,  0.0000e+00, -1.1530e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.0677e-07],
        [ 6.7140e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.1383e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7841e-06],
        [ 1.1498e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8418e-05],
        [ 3.7189e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3440e-07],
        ...,
        [-6.5991e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.9644e-06],
        [ 1.6337e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.6108e-07],
        [ 7.6404e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.3355e-07,  0.0000e+00, -6.4477e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.0622e-07],
        [ 2.6476e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7718e-07],
        [-4.3402e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.4742e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.5371e-05],
        [ 2.7963e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1266e-07],
        [ 1.3553e-05,  0.0000e+00, -1.2182e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.6588e-05],
        ...,
        [-2.6417e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.5311e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.4004e-06],
        [-1.0016e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0443e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0156e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.4938e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3768e-07],
        [-4.1484e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5579e-05,  0.0000e+00, -2.2003e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.0436e-05],
        [ 3.2554e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1714e-04],
        ...,
        [ 2.6528e-05,  0.0000e+00, -1.9110e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.6605e-05],
        [ 8.0474e-07,  0.0000e+00, -5.0511e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.2123e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9721e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.8390e-04,  0.0000e+00,  4.2569e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.1209e-03],
        [ 0.0000e+00,  0.0000e+00,  1.3663e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.1151e-06],
        [-2.3472e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6923e-05,  0.0000e+00,  7.7810e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.2503e-04],
        [-6.4960e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.2132e-08],
        [ 5.5948e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1314e-04],
        ...,
        [ 3.5914e-07,  0.0000e+00, -3.3188e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7676e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.8769e-06,  0.0000e+00, -1.9592e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.7190e-05],
        [-1.9684e-05,  0.0000e+00, -1.3051e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.9321e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7403e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3492e-07,  0.0000e+00, -1.5409e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.6780e-05,  0.0000e+00,  8.5866e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.8212e-05],
        ...,
        [ 2.7424e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.6405e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3090e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0823e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1430e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.6685e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5970e-05],
        [-3.3313e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.9917e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8623e-05],
        [ 1.5719e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2283e-08,  0.0000e+00,  5.9119e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.0210e-06,  0.0000e+00, -6.2285e-07,  ...,  0.0000e+00,
          0.0000e+00,  9.8612e-07],
        [ 3.3266e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.5845e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.3037e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -9.9112e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.7087e-06],
        [ 7.8704e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.6724e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3286e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0835e-06],
        [-8.9774e-06,  0.0000e+00,  5.6540e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.7003e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7556e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00, -3.1521e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.5273e-06],
        [-2.7474e-07,  0.0000e+00, -2.9716e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.4733e-07],
        [-1.8374e-06,  0.0000e+00,  1.4756e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.6455e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.2006e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1872e-06],
        [-3.0891e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2474e-06],
        [-1.6743e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3895e-05,  0.0000e+00,  1.2135e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.2647e-05],
        [ 3.6863e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1928e-03],
        [ 5.9188e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  1.6937e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0264e-05],
        [ 6.8185e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1027e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1159e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.1378e-04,  0.0000e+00,  2.6277e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.1472e-04],
        [-2.4804e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4448e-07],
        [-1.0857e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1847e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5300e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.1013e-06],
        [ 3.8109e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2571e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -6.3457e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.4933e-05],
        [ 0.0000e+00,  0.0000e+00, -1.8631e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2797e-05],
        [-2.3279e-04,  0.0000e+00,  3.8906e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.1063e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1194e-05,  0.0000e+00,  1.8825e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.6161e-06],
        [-1.7869e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.5910e-06,  0.0000e+00, -1.9318e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.6979e-05],
        [ 7.4249e-07,  0.0000e+00, -6.6671e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.9657e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.2837e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1013e-05],
        [ 3.7736e-05,  0.0000e+00, -3.6144e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.5264e-04],
        [ 6.1761e-08,  0.0000e+00,  4.3528e-07,  ...,  0.0000e+00,
          0.0000e+00,  4.6400e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.9043e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.1891e-05],
        [ 6.0297e-05,  0.0000e+00,  1.4524e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.8747e-04],
        [-1.9009e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1073e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9463e-06],
        [-1.7560e-07,  0.0000e+00, -3.7688e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.3640e-07],
        [-4.3950e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3690e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.3507e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.6395e-04],
        [-3.6709e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4377e-04,  0.0000e+00, -2.2242e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.9901e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  2.3690e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3026e-06],
        [-1.0368e-05,  0.0000e+00,  1.5448e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0558e-04],
        [-4.9158e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.4233e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.9803e-06],
        [-1.7964e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3521e-04],
        [ 3.4826e-05,  0.0000e+00,  4.1047e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -5.1112e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.4740e-07],
        [ 0.0000e+00,  0.0000e+00, -4.0043e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.7103e-05],
        [ 1.0011e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.3825e-05,  0.0000e+00,  5.1680e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.4523e-05],
        [-1.5794e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6733e-06],
        [-1.8240e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.4040e-07,  0.0000e+00, -7.4980e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.4380e-06],
        [-1.7027e-07,  0.0000e+00, -3.7759e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.6416e-07],
        [ 1.2578e-06,  0.0000e+00, -1.4080e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.5403e-06],
        ...,
        [-4.5389e-07,  0.0000e+00,  3.4247e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.1853e-07],
        [-6.9199e-09,  0.0000e+00, -1.4215e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.7923e-08],
        [-3.7335e-06,  0.0000e+00,  5.9417e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.7620e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7944e-05,  0.0000e+00, -1.2277e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.4637e-05],
        [-9.1799e-07,  0.0000e+00, -2.0347e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.4779e-07],
        [-1.6988e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -6.6774e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.4279e-05],
        [ 7.8093e-07,  0.0000e+00,  5.0076e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.4263e-07],
        [ 0.0000e+00,  0.0000e+00, -4.1194e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.6975e-05,  0.0000e+00, -1.0913e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.8210e-05],
        [ 1.1010e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.5977e-06],
        [ 3.7279e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5341e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4179e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5626e-04],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7086e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.4780e-05],
        [-1.6120e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6815e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0214e-06],
        ...,
        [-1.1610e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1294e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.3165e-04],
        [-8.8724e-05,  0.0000e+00,  1.6098e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0811e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.1043e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.3206e-07],
        [-6.3920e-05,  0.0000e+00, -1.1196e-03,  ...,  0.0000e+00,
          0.0000e+00, -8.3245e-05],
        [-2.4002e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8720e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0763e-05,  0.0000e+00,  3.4287e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.5686e-05],
        [-8.2868e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.8907e-04,  0.0000e+00,  1.9497e-03,  ...,  0.0000e+00,
          0.0000e+00, -5.2494e-04],
        [ 3.3883e-06,  0.0000e+00,  1.0063e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.6591e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2727e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1093e-07],
        [ 1.0558e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2227e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3740e-04,  0.0000e+00,  6.7694e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.5235e-04],
        [-5.2368e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2875e-07],
        [-8.5102e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.9528e-05,  0.0000e+00, -3.9886e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7226e-05],
        [ 6.4494e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.7975e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1352e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.1169e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7909e-05],
        [ 9.7477e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3170e-06],
        [ 1.2277e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4639e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.6520e-06],
        [-3.5418e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.8334e-06],
        [-3.9994e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.4098e-06],
        ...,
        [-6.3710e-06,  0.0000e+00, -5.2571e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1205e-05],
        [-1.1523e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8339e-06],
        [ 0.0000e+00,  0.0000e+00,  4.9356e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.9586e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3816e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9163e-04],
        [ 1.7828e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4298e-06,  0.0000e+00,  7.7085e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.3395e-06],
        [-7.4713e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0712e-06],
        [ 3.8802e-07,  0.0000e+00, -1.2425e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.5000e-05,  0.0000e+00,  1.5262e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5731e-05],
        [ 3.4376e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0741e-04],
        [ 2.5898e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9469e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8150e-05,  0.0000e+00,  3.9095e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0079e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.1047e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6620e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.4209e-07],
        [-5.4832e-06,  0.0000e+00,  1.5144e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.2577e-06],
        [ 3.5611e-06,  0.0000e+00, -2.3068e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.1694e-06],
        ...,
        [ 2.6411e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0907e-03],
        [-6.8627e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1163e-06],
        [ 1.1612e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0405e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9092e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7412e-04],
        [ 1.2192e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2650e-04],
        [-1.3780e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8901e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2714e-05],
        [ 2.3487e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0466e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0546e-06],
        ...,
        [ 1.9547e-05,  0.0000e+00, -1.7430e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.3060e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2007e-04],
        [-2.1489e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4675e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.9029e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.7570e-07],
        [ 0.0000e+00,  0.0000e+00,  2.7685e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.5319e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7804e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3864e-04],
        [-4.1441e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2620e-05],
        [ 1.5775e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-4.2237e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.8195e-05],
        [ 5.6932e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.8287e-06],
        [ 0.0000e+00,  0.0000e+00, -9.0080e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.5615e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  9.5340e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3064e-05,  0.0000e+00,  5.6280e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.7073e-06],
        [ 0.0000e+

Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -8.2612e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.3550e-06],
        [ 0.0000e+00,  0.0000e+00, -6.5889e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.0047e-06],
        [-1.0525e-07,  0.0000e+00,  1.4075e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0243e-06],
        ...,
        [ 1.5635e-03,  0.0000e+00, -9.9914e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.7654e-04],
        [-3.8804e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0652e-05],
        [-6.7439e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3995e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8072e-05,  0.0000e+00, -1.3875e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.9745e-05],
        [ 4.3768e-10,  0.0000e+00, -1.0270e-08,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6729e-06,  0.0000e+00, -3.4467e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.6334e-06],
        [ 9.8637e-04,  0.0000e+00, -1.7487e-03,  ...,  0.0000e+00,
          0.0000e+00,  2.8146e-03],
        [ 1.2495e-06,  0.0000e+00, -3.2931e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.3945e-06],
        ...,
        [-6.2463e-09,  0.0000e+00, -4.1716e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.3087e-09],
        [ 6.6134e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.4099e-07,  0.0000e+00, -8.5862e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.3713e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8102e-08,  0.0000e+00, -7.0127e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.8329e-07,  0.0000e+00, -1.5912e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.8993e-06],
        [ 7.5657e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.4932e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0623e-06],
        [-3.9551e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7193e-07],
        [ 6.5622e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0451e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4421e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.3067e-05],
        [ 2.7828e-06,  0.0000e+00,  1.9466e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.5060e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8208e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4426e-04],
        [-1.3505e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2211e-05],
        [-2.1709e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8541e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5365e-06],
        [ 9.1615e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3116e-07],
        [ 0.0000e+00,  0.0000e+00,  8.0755e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.7856e-06],
        ...,
        [ 9.1847e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6528e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.2355e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.4571e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.0269e-06,  0.0000e+00, -5.9433e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.4763e-06],
        [-1.0172e-04,  0.0000e+00,  6.7923e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.7720e-04],
        [-1.4494e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3481e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.5020e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0958e-05],
        [-1.4966e-05,  0.0000e+00,  2.4957e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.5675e-06],
        ...,
        [-2.1824e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4702e-06],
        [ 1.7167e-07,  0.0000e+00,  3.3823e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.5873e-07],
        [ 1.4592e-05,  0.0000e+00, -7.1322e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.0870e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8526e-06,  0.0000e+00, -7.1597e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9900e-06],
        [ 1.5726e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8044e-07],
        [-2.2034e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8355e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.2477e-07],
        [ 1.2152e-05,  0.0000e+00, -1.4545e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0112e-05],
        [ 1.8455e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.3494e-09],
        [ 0.0000e+00,  0.0000e+00,  2.1567e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.6615e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2466e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.2345e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.3938e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.9490e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7503e-08],
        [-2.7791e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.1297e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0261e-06],
        ...,
        [-5.2989e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3044e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.0432e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3394e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.7971e-06,  0.0000e+00, -2.3854e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.6230e-06],
        [ 9.7124e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.1874e-06],
        [ 2.4131e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5412e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3651e-04],
        [ 7.6511e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.2530e-05],
        ...,
        [ 1.9159e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.6033e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1322e-05],
        [-3.8337e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.8262e-08,  0.0000e+00, -1.7133e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.1961e-07],
        [ 6.0192e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.0498e-05],
        [-3.0963e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.2596e-06,  0.0000e+00,  1.4713e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.5319e-06],
        [-1.3323e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3187e-06],
        [-1.0471e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.0170e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.3907e-06],
        [ 2.4149e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.3588e-06,  0.0000e+00, -1.3393e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1239e-03,  0.0000e+00, -3.7871e-03,  ...,  0.0000e+00,
          0.0000e+00, -2.2618e-03],
        [-6.1773e-04,  0.0000e+00,  1.1099e-03,  ...,  0.0000e+00,
          0.0000e+00, -9.0200e-04],
        [-1.1217e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.8546e-06,  0.0000e+00,  3.7717e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1735e-06,  0.0000e+00, -1.2694e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.8707e-06],
        [ 2.1885e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.3473e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.7222e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.5232e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4625e-09,  0.0000e+00, -3.4247e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.6867e-09],
        [ 5.1242e-06,  0.0000e+00, -4.9054e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.0001e-05],
        [-9.3628e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3124e-04,  0.0000e+00,  2.3776e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.5374e-05,  0.0000e+00,  2.2098e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.3009e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-6.4906e-08,  0.0000e+00, -2.1327e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.6402e-08],
        [ 2.8006e-06,  0.0000e+00,  7.0268e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.9420e-06],
        [ 2.5646e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2934e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.8387e-06,  0.0000e+00,  2.5836e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.9039e-06],
        [ 4.4812e-06,  0.0000e+00,  3.6112e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.2783e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0379e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.8056e-07,  0.0000e+00, -7.1073e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6150e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6230e-04],
        ...,
        [-4.1120e-08,  0.0000e+00,  6.0819e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.7121e-08],
        [ 3.7298e-05,  0.0000e+00, -1.5437e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.2393e-05],
        [-1.2247e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1830e-06,  0.0000e+00, -9.8475e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.1535e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1634e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.7292e-06,  0.0000e+00, -4.2793e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.8986e-05],
        [-4.3321e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0629e-06],
        [ 8.4487e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4105e-08],
        ...,
        [ 1.3333e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.6740e-07],
        [ 9.4177e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3736e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.8221e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8185e-06],
        [ 9.3704e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.8676e-07],
        [-2.6165e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.5381e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.9740e-07,  0.0000e+00, -3.1306e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9052e-06],
        [-3.2964e-06,  0.0000e+00, -1.1499e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.5824e-06],
        ...,
        [-6.3752e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9851e-05],
        [-6.3546e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3551e-08],
        [ 7.1481e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2470e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4156e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.4465e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.2485e-05],
        [ 6.4081e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.0291e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0731e-05,  0.0000e+00,  1.6152e-04,  ...,  0.0000e+00,
          0.0000e+00,  4.8282e-06],
        [-5.1795e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 8.3501e-05,  0.0000e+00,  1.7215e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.0237e-04],
        [ 8.2968e-07,  0.0000e+00, -1.0419e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1135e-05,  0.0000e+00,  1.0490e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9420e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4018e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1381e-06],
        [-4.5021e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1557e-04],
        [-7.5061e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.8646e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.9299e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2326e-06],
        [-1.8819e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.9234e-05],
        [-2.5516e-04,  0.0000e+00, -5.5328e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.2522e-04],
        [ 2.4971e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.0810e-06,  0.0000e+00, -4.3728e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.8553e-06],
        [-3.2187e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5131e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.8254e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3644e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7001e-05],
        [ 0.0000e+00,  0.0000e+00,  2.1165e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.8958e-05],
        ...,
        [ 9.1006e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.6465e-09,  0.0000e+00, -1.1022e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.3295e-09],
        [ 4.0552e-06,  0.0000e+00,  7.5486e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.9004e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8294e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8652e-05],
        [ 5.7819e-06,  0.0000e+00,  1.0681e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.6035e-06],
        [-2.7023e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.0024e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3077e-04,  0.0000e+00,  2.7573e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3912e-04],
        [-7.4633e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1241e-06],
        ...,
        [ 7.7997e-04,  0.0000e+00, -3.8674e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.8795e-03],
        [ 2.3792e-04,  0.0000e+00, -6.0092e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.1409e-03],
        [-6.8043e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5193e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8384e-08,  0.0000e+00, -3.1649e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.5538e-07],
        [ 2.0765e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4734e-06],
        [-1.0344e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.6601e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4800e-07,  0.0000e+00,  1.1229e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3511e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1339e-05],
        ...,
        [-3.3287e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7795e-07],
        [ 1.5679e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.0296e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.8102e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.0594e-06,  0.0000e+00,  4.7018e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.4518e-05],
        [-2.6248e-04,  0.0000e+00,  2.3012e-03,  ...,  0.0000e+00,
          0.0000e+00,  4.3044e-04],
        [ 5.8273e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3493e-05,  0.0000e+00, -3.0730e-04,  ...,  0.0000e+00,
          0.0000e+00, -5.3598e-05],
        [-5.2477e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0068e-05],
        [ 1.2143e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.0278e-06,  0.0000e+00,  2.2634e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.0278e-06],
        [ 3.3794e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.9729e-04],
        [-3.4518e-07,  0.0000e+00, -5.2487e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.9304e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.3177e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2488e-07],
        [ 1.1265e-07,  0.0000e+00,  1.6474e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.5966e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.9529e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4364e-07],
        [ 3.4530e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -3.9545e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.3463e-07],
        ...,
        [ 1.6307e-06,  0.0000e+00,  1.7528e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.4771e-08,  0.0000e+00, -2.5016e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.1951e-07],
        [-2.6711e-05,  0.0000e+00, -3.6085e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5309e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7424e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.1383e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1440e-06],
        [-1.2686e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7820e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.4305e-06],
        [-1.5150e-07,  0.0000e+00,  1.9640e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -8.2248e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.6595e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8988e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9862e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.9256e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8203e-

Gradients values: tensor([[ 4.1910e-06,  0.0000e+00, -3.3523e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0430e-05],
        [ 4.0027e-08,  0.0000e+00, -1.7638e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.6716e-07],
        [ 0.0000e+00,  0.0000e+00,  2.3568e-03,  ...,  0.0000e+00,
          0.0000e+00, -5.8297e-05],
        ...,
        [-1.8447e-06,  0.0000e+00,  4.8531e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.5899e-06],
        [ 8.4242e-06,  0.0000e+00, -2.3045e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.6620e-05],
        [-1.2873e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5114e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.9171e-05],
        [-8.5362e-06,  0.0000e+00,  1.4704e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.6053e-06],
        [-3.7730e-05,  0.0000e+00, -1.7181e-03,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.6767e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4937e-04],
        [-5.4313e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.7919e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.7222e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4782e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.9657e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7388e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.3303e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.0557e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.7941e-07,  0.0000e+00,  6.4174e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.3573e-07],
        [-1.1699e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6169e-05],
        [ 0.0000e+00,  0.0000e+00, -2.5913e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.6730e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2458e-06,  0.0000e+00, -5.4495e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8371e-06],
        [-8.0900e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0185e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8366e-06,  0.0000e+00, -1.8552e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.6694e-06],
        [-2.4852e-04,  0.0000e+00,  4.1753e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.5348e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.3058e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.1613e-07],
        [ 1.9754e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0473e-05],
        [-2.0345e-07,  0.0000e+00, -3.1608e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.2813e-06],
        ...,
        [ 1.3980e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.2135e-06],
        [-1.0679e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.0168e-05],
        [-7.8953e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4614e-06,  0.0000e+00,  1.4428e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9297e-06],
        [ 4.7460e-04,  0.0000e+00, -5.0305e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.7867e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1107e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5339e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7152e-05],
        [ 1.4048e-08,  0.0000e+00, -4.8710e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.0911e-03,  0.0000e+00,  1.6824e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.8186e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.3840e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.5626e-08],
        [ 3.0521e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.2890e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.3228e-07,  0.0000e+00,  2.3001e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.8490e-07],
        [ 0.0000e+00,  0.0000e+00,  5.7404e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.0755e-06],
        [ 1.4511e-04,  0.0000e+00,  3.0122e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1202e-05],
        ...,
        [-1.9047e-05,  0.0000e+00,  6.3437e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.0808e-06],
        [-1.4999e-06,  0.0000e+00,  1.5964e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.4391e-06],
        [ 7.3655e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.2344e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5451e-07,  0.0000e+00, -2.2601e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9079e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.2676e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4731e-07],
        [ 1.7039e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2917e-05,  0.0000e+00,  4.6360e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8284e-05],
        ...,
        [ 1.9632e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.4314e-05,  0.0000e+00, -2.7267e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2988e-04],
        [ 2.0554e-05,  0.0000e+00,  4.5546e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.9940e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.7176e-05,  0.0000e+00,  1.3181e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5105e-07,  0.0000e+00, -1.5284e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.3934e-07],
        [-1.2572e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.1161e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.1093e-08],
        [-2.3138e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3681e-06],
        [ 3.6267e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2050e-07],
        ...,
        [-6.9605e-08,  0.0000e+00, -2.4914e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.6945e-07],
        [-1.4555e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.8121e-07],
        [ 0.0000e+00,  0.0000e+00, -8.0983e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.6055e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.9462e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.8320e-05],
        [ 0.0000e+00,  0.0000e+00, -8.2068e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8876e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4366e-04,  0.0000e+00, -3.8272e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.6913e-06,  0.0000e+00, -6.2393e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9168e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 5.8680e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.9422e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4562e-08,  0.0000e+00, -9.8610e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5987e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -4.3662e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.6250e-06],
        [-8.4381e-09,  0.0000e+00, -3.8385e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.5377e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0123e-06],
        [ 9.9605e-05,  0.0000e+00, -5.9843e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0524e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-8.1812e-08,  0.0000e+00, -3.0659e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.6684e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.8344e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6717e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1090e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.0467e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.4629e-05],
        [-3.5868e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.3219e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2458e-06,  0.0000e+00, -3.2721e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0696e-05],
        ...,
        [ 6.9247e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5952e-06,  0.0000e+00,  2.0097e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.2344e-06],
        [ 7.8325e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0186e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8137e-06,  0.0000e+00,  3.9024e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0581e-05,  0.0000e+00,  1.2127e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.5568e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.0271e-07,  0.0000e+00, -1.4126e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.4006e-09],
        [-1.3615e-09,  0.0000e+00, -6.2303e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.6638e-09],
        [ 6.5167e-07,  0.0000e+00,  6.5026e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.2547e-07],
        ...,
        [-2.8423e-06,  0.0000e+00, -3.7018e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9699e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.9276e-06],
        [-7.2815e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7812e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.5034e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4957e-06],
        [ 5.7457e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0481e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.2613e-06],
        [ 2.9348e-06,  0.0000e+00, -1.3337e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.1566e-08],
        [ 9.1180e-08,  0.0000e+00, -9.6590e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5424e-06],
        ...,
        [ 3.5285e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7301e-08],
        [-1.4588e-08,  0.0000e+00, -4.4871e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3967e-06,  0.0000e+00, -7.7364e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.2842e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9277e-07,  0.0000e+00, -2.1756e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.5584e-07],
        [ 1.2791e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9569e-06],
        [ 3.0877e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7639e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9401e-04],
        [ 3.0635e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4141e-05],
        [ 1.0515e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-4.6227e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9371e-06],
        [ 1.7619e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9317e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2074e-05],
        [-6.2131e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5656e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0100e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5405e-06],
        [ 5.9043e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1004e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.0671e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.2981e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7088e-07],
        [ 1.0531e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2598e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.0819e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6518e-07],
        [-3.7361e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0914e-05,  0.0000e+00, -1.4295e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.4720e-05],
        [-5.5362e-08,  0.0000e+00, -1.3514e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.2196e-08],
        [-2.8623e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8306e-04],
        ...,
        [ 6.6129e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0836e-06],
        [-1.6190e-09,  0.0000e+00, -5.2524e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.1339e-09],
        [-4.3065e-04,  0.0000e+00,  1.1345e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.0858e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4362e-04],
        [-2.7225e-07,  0.0000e+00, -9.1076e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.1848e-07],
        [-2.5612e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.2826e-06,  0.0000e+00, -3.1018e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.9690e-06],
        [ 9.4325e-06,  0.0000e+00, -7.2727e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.9816e-06],
        [-7.1775e-09,  0.0000e+00,  2.2629e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.3373e-09],
        ...,
        [ 8.1506e-07,  0.0000e+00,  1.3914e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.7822e-07],
        [ 0.0000e+00,  0.0000e+00,  4.8956e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.2693e-06,  0.0000e+00, -1.0742e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0029e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.2327e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8817e-04],
        [ 3.6011e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9106e-07],
        [-4.7548e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.0440e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9955e-06],
        [-4.1027e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6320e-08],
        [ 3.8500e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9788e-07],
        ...,
        [ 1.2696e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7977e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6028e-06],
        [ 2.1109e-04,  0.0000e+00,  4.1239e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.9936e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7971e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1722e-07],
        [-3.6702e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5444e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.5946e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.5991e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.7077e-06],
        [ 1.2760e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5638e-07],
        ...,
        [ 2.4540e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2559e-06],
        [-2.3107e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5445e-06],
        [ 4.9187e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2243e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9029e-05,  0.0000e+00, -2.2640e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.0682e-05],
        [-4.1480e-05,  0.0000e+00,  8.3357e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.2779e-05],
        [ 4.8191e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8054e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8366e-06],
        [-3.1460e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0587e-05],
        [-1.7724e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8254e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.9362e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.4112e-07],
        [-6.1728e-07,  0.0000e+00, -2.6204e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3099e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.8572e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4035e-05],
        [ 4.5138e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3611e-05],
        [ 2.1658e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.8016e-06,  0.0000e+00,  1.0657e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.3400e-06],
        [-8.5716e-09,  0.0000e+00, -5.7644e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.2467e-08],
        [-3.7963e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0388e-07],
        ...,
        [ 3.3518e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6984e-05],
        [ 2.6752e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.6415e-07],
        [ 1.8301e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.8350e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1983e-07],
        [-1.7856e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9317e-07],
        [-2.4220e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.5883e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9677e-04],
        [-1.9271e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0187e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.5933e-09],
        ...,
        [-6.0858e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8885e-05],
        [-3.3911e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0750e-04],
        [-5.8347e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.5926e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0250e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3890e-05],
        [-9.9428e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -6.4301e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.6832e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.5120e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2186e-05],
        ...,
        [ 7.9259e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2367e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7430e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9769e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7348e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1035e-04],
        [ 9.6445e-07,  0.0000e+00, -1.0180e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.5477e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6987e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8018e-05],
        [ 1.6303e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2896e-03],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.3488e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00, -8.6014e-09,  ...,  0.0000e+00,
          0.0000e+00, -5.2547e-09],
        [-1.7747e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3541e-07],
        [-2.2490e-06,  0.0000e+00, -7.4931e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.2363e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6053e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4101e-08],
        [ 6.4597e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.0767e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.3357e-06,  0.0000e+00,  4.1410e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.1799e-06],
        [-3.7074e-10,  0.0000e+00, -4.6663e-09,  ...,  0.0000e+00,
          0.0000e+00, -5.3804e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.1428e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.3909e-07],
        [ 7.9460e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1113e-04],
        [-5.2387e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3368e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7210e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5098e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3349e-06],
        [-1.5607e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4578e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6167e-05],
        [ 2.2020e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.1051e-05],
        [-4.6158e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2430e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1281e-05],
        [ 5.8949e-04,  0.0000e+00,  1.7065e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.4517e-03],
        [-2.5006e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.5067e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3054e-05],
        [ 2.8582e-05,  0.0000e+00, -6.8390e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7488e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  7.7384e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7834e-04,  0.0000e+00,  2.1574e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.4894e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0934e-04],
        ...,
        [-9.5288e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5325e-04],
        [-2.4474e-06,  0.0000e+00,  4.1387e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.3771e-05],
        [ 0.0000e+00,  0.0000e+00, -4.2771e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.4677e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4839e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1452e-04,  0.0000e+00, -2.4108e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.1295e-04],
        [ 2.3774e-08,  0.0000e+00, -1.0928e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.6480e-09],
        [ 0.0000e+00,  0.0000e+00,  6.4275e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.2639e-05],
        ...,
        [-2.2218e-07,  0.0000e+00, -2.8270e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.6437e-07],
        [ 1.0080e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.7008e-06,  0.0000e+00, -6.7609e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.0403e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.0815e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3082e-07],
        [ 0.0000e+00,  0.0000e+00,  1.8721e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3912e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.0668e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5927e-06],
        [-5.4985e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.4061e-05],
        [-3.5152e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.5476e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3671e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0783e-08],
        [ 8.5167e-08,  0.0000e+00,  5.6947e-08,  ...,  0.0000e+00,
          0.0000e+00,  3.3590e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1905e-04,  0.0000e+00, -1.9482e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.3641e-05],
        [-6.8857e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4183e-07],
        [-6.8969e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6067e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5476e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.3538e-09,  0.0000e+00, -1.0724e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.0867e-08],
        ...,
        [-2.3568e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1751e-05],
        [-3.2392e-06,  0.0000e+00, -4.5038e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.4676e-06],
        [ 0.0000e+00,  0.0000e+00,  1.6068e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.1432e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4903e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.2529e-07],
        [ 9.7018e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.5865e-04],
        [ 5.3670e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1478e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1131e-07],
        [-1.0977e-04,  0.0000e+00, -4.4985e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.9800e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00,  1.3262e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1826e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.6371e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.2547e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0746e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1299e-06],
        [ 2.1680e-07,  0.0000e+00, -3.0237e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.3721e-07],
        [ 8.4309e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0010e-06,  0.0000e+00,  2.4071e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2186e-06],
        [-3.4356e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3512e-04],
        [ 6.0142e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.3810e-05,  0.0000e+00, -7.7371e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.0996e-05],
        [-1.5861e-07,  0.0000e+00, -2.3815e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.8672e-07],
        [ 8.3305e-07,  0.0000e+00,  4.2966e-08,  ...,  0.0000e+00,
          0.0000e+00,  8.2920e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.4516e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.3956e-04],
        [-1.3195e-05,  0.0000e+00,  1.2834e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.1165e-06],
        [ 9.1870e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8104e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0613e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2366e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2614e-04],
        ...,
        [ 1.6886e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8194e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.4831e-09,  0.0000e+00,  2.0445e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.0435e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7996e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.3770e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9358e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.4577e-05,  0.0000e+00,  4.8870e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.1584e-05],
        [ 1.2449e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.5664e-05],
        [ 7.8955e-10,  0.0000e+00,  3.3415e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 8.3168e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3347e-05,  0.0000e+00, -3.1176e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.9468e-05],
        [ 0.0000e+00,  0.0000e+00, -3.7657e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9942e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6828e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.4386e-06,  0.0000e+00, -1.6262e-05,  ...,  0.0000e+00,
          0.0000e+00,  6.6147e-06],
        [ 1.5854e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0012e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7853e-04],
        [ 1.6529e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1678e-06,  0.0000e+00, -4.0005e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.6598e-06],
        ...,
        [-1.5632e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1599e-07],
        [ 5.2845e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0101e-07],
        [ 3.7719e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6052e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.9464e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.0215e-08,  0.0000e+00, -1.9261e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.2360e-08],
        [ 0.0000e+00,  0.0000e+00, -6.2989e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.5189e-08],
        [-1.0681e-05,  0.0000e+00,  1.0442e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1464e-05],
        ...,
        [ 1.8287e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2120e-08,  0.0000e+00, -3.6837e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.1158e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7376e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6125e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.0191e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.4211e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5592e-07],
        [-6.0316e-05,  0.0000e+00,  8.8004e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.1330e-04],
        [ 3.1404e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.2864e-07,  0.0000e+00, -1.0741e-06,  ...,  0.0000e+00,
          0.0000e+00, -9.4881e-07],
        [-8.3344e-08,  0.0000e+00, -1.5039e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.0221e-07],
        [-1.0669e-06,  0.0000e+00, -1.1554e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.8818e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.7893e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0480e-04],
        [-7.4130e-08,  0.0000e+00,  1.5976e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.4563e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.1327e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9213e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6805e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4272e-05],
        ...,
        [-9.3236e-05,  0.0000e+00, -1.3239e-03,  ...,  0.0000e+00,
          0.0000e+00, -8.8654e-05],
        [-1.7696e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6768e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3432e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.9109e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7125e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7826e-07],
        [-1.8628e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.5047e-06,  0.0000e+00, -2.7128e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.3158e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.1316e-05,  0.0000e+00, -6.9446e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.3073e-05],
        ...,
        [ 1.0206e-07,  0.0000e+00,  1.3555e-06,  ...,  0.0000e+00,
          0.0000e+00,  7.2982e-08],
        [ 2.4376e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.4309e-10,  0.0000e+00, -7.8441e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.1238e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.1146e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0336e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5384e-

Gradients values: tensor([[-6.8174e-05,  0.0000e+00, -8.4580e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.2720e-05],
        [-4.6003e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.0623e-06,  0.0000e+00,  6.2571e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.8525e-06],
        ...,
        [ 7.1624e-08,  0.0000e+00, -4.1725e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.1904e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4454e-04],
        [-7.1433e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0776e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.1144e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.6607e-07,  0.0000e+00, -6.4272e-07,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3688e-06,  0.0000e+00,  1.5969e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.3074e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3142e-05],
        [-1.8213e-04,  0.0000e+00, -9.8660e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.1267e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.6566e-06],
        [ 7.0250e-05,  0.0000e+00, -1.8459e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.5074e-07,  0.0000e+00, -2.2185e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.6807e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  3.6526e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.6951e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0631e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7302e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3722e-06],
        [-1.0209e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.4904e-07],
        [-1.5609e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.4735e-06],
        ...,
        [-6.2820e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1817e-04],
        [ 3.4137e-06,  0.0000e+00, -2.5403e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6524e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.5269e-09,  0.0000e+00,  1.2100e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4318e-07],
        [ 2.9073e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.6448e-07,  0.0000e+00,  2.7326e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.0367e-06],
        [ 3.2955e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.4120e-08,  0.0000e+00, -5.5128e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -6.6931e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.3553e-08],
        [-1.6549e-08,  0.0000e+00, -8.3949e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.5258e-08],
        [ 2.5904e-08,  0.0000e+00, -3.2646e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.8106e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4804e-05,  0.0000e+00, -1.1329e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.6418e-05],
        [ 6.5514e-06,  0.0000e+00,  1.5222e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9289e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.2434e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7790e-06],
        [ 1.6205e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.9087e-07,  0.0000e+00,  5.6216e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.0098e-07],
        ...,
        [-1.2206e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  8.0282e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.2828e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1631e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.3343e-06],
        [ 7.5023e-07,  0.0000e+00, -5.8175e-07,  ...,  0.0000e+00,
          0.0000e+00,  8.0535e-07],
        [-5.0448e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7515e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.6219e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1308e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.3935e-06,  0.0000e+00,  1.1049e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.4406e-07],
        [ 1.4696e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1685e-06],
        [ 1.7295e-06,  0.0000e+00, -3.5249e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.0248e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.2482e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9786e-07],
        [ 1.9699e-09,  0.0000e+00, -1.4251e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.1943e-09],
        [ 9.4883e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.0762e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.0702e-06,  0.0000e+00,  4.3715e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.7457e-06],
        ...,
        [-2.2584e-06,  0.0000e+00,  4.7507e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.7423e-05],
        [-7.4913e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.1004e-05,  0.0000e+00, -8.4080e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.4836e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8937e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0224e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7471e-05],
        [ 3.5608e-06,  0.0000e+00,  4.9201e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.5649e-06],
        [ 6.3817e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8546e-04],
        ...,
        [-1.3142e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0291e-05],
        [-1.2634e-06,  0.0000e+00, -2.6703e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7208e-06],
        [ 0.0000e+00,  0.0000e+00,  9.2080e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.7081e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -5.3247e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.4909e-06],
        [ 0.0000e+00,  0.0000e+00, -3.0022e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.7927e-04],
        [ 3.0939e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5952e-05],
        [ 1.2988e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.8010e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5789e-05],
        ...,
        [-5.0183e-06,  0.0000e+00,  1.1670e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0553e-05],
        [ 1.8828e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5467e-07],
        [ 1.8858e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1336e-03,  0.0000e+00,  9.0142e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.6308e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9151e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.9449e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.4923e-05],
        [ 1.3224e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3267e-06],
        [ 0.0000e+00,  0.0000e+00, -1.1411e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.7683e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.9404e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.2393e-07],
        [-1.3720e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3582e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0642e-07,  0.0000e+00, -7.0830e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.1397e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.3866e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.6921e-06],
        [-6.9187e-07,  0.0000e+00, -1.0745e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3802e-06],
        [-1.1799e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3298e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -2.5149e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.1062e-08],
        [ 1.8922e-04,  0.0000e+00,  2.3251e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.9905e-04],
        [-1.2435e-07,  0.0000e+00, -5.6819e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.6697e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7033e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0069e-06],
        [-4.3120e-07,  0.0000e+00, -2.9818e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.9322e-07],
        [-2.3595e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3167e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5356e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.3900e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.8643e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7181e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4207e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.7285e-10,  0.0000e+00,  7.1231e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.5935e-08],
        [-1.1053e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3985e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 3.1321e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3464e-05],
        [ 3.9848e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.1897e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0420e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.0582e-05],
        [ 4.6592e-08,  0.0000e+00, -4.7667e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.8672e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6788e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.4751e-07],
        [ 1.2879e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4413e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.8276e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9418e-09],
        [ 1.6587e-09,  0.0000e+00, -1.2117e-08,  ...,  0.0000e+00,
          0.0000e+00,  2.7160e-09],
        [-6.3421e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.2781e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.5181e-05,  0.0000e+00, -6.6174e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.0350e-04],
        [-1.6262e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7877e-03],
        [-1.9360e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7054e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.8995e-07],
        [-1.8258e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.9436e-06,  0.0000e+00, -5.2790e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.9818e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3317e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.0616e-05],
        [ 0.0000e+00,  0.0000e+00, -7.1475e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.2630e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4017e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.8023e-07],
        [-1.7527e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0806e-

Gradients values: tensor([[-3.9536e-07,  0.0000e+00, -7.1995e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.5555e-07],
        [ 1.6660e-04,  0.0000e+00,  3.4031e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.0671e-04],
        [ 0.0000e+00,  0.0000e+00, -2.0511e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.7770e-07],
        ...,
        [-2.4779e-05,  0.0000e+00, -3.7850e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.7686e-05],
        [ 0.0000e+00,  0.0000e+00,  6.8697e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.1384e-09,  0.0000e+00, -2.3331e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.4606e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.2027e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.7612e-10,  0.0000e+00,  4.3222e-09,  ...,  0.0000e+00,
          0.0000e+00,  2.8733e-08],
        [ 0.0000e+00,  0.0000e+00, -3.5230e-04,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.1235e-08,  0.0000e+00, -5.1485e-07,  ...,  0.0000e+00,
          0.0000e+00, -7.2119e-08],
        ...,
        [-3.2492e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1476e-06],
        [-3.5994e-08,  0.0000e+00, -1.2670e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.0931e-08],
        [ 2.7724e-06,  0.0000e+00, -4.7555e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7117e-07,  0.0000e+00, -1.4744e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2040e-06,  0.0000e+00, -4.6287e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1308e-06],
        [ 9.2248e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4283e-07,  0.0000e+00, -1.0433e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.8767e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.4839e-06],
        [-3.2922e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.4407e-05],
        ...,
        [-2.5943e-06,  0.0000e+00, -1.4909e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.7321e-05],
        [-6.8442e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8535e-07],
        [-7.1446e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2431e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.0195e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.1394e-05],
        [ 4.8577e-08,  0.0000e+00,  8.9937e-08,  ...,  0.0000e+00,
          0.0000e+00,  5.3402e-08],
        [ 1.0401e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.5207e-06,  0.0000e+00, -1.2921e-05,  ...,  0.0000e+00,
          0.0000e+00,  4.7636e-06],
        [ 4.6646e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.7874e-08,  0.0000e+00, -1.1025e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1557e-07],
        ...,
        [ 5.5108e-10,  0.0000e+00, -4.2287e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.2356e-09],
        [ 5.9728e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8182e-04],
        [-6.4376e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.3552e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5026e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.9732e-07,  0.0000e+00, -1.6426e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.9451e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.1800e-06],
        [-5.0872e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3388e-07],
        [-6.8156e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.9029e-07],
        ...,
        [-2.4230e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7409e-06],
        [-1.8702e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0206e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8839e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2486e-08],
        [ 6.7871e-07,  0.0000e+00, -5.9411e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.7947e-07],
        [-2.1943e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.8520e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4709e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.7057e-05],
        ...,
        [-1.7634e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2518e-07],
        [ 6.6564e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0819e-06],
        [ 2.0942e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.9538e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4937e-06],
        [ 7.6146e-06,  0.0000e+00, -1.5851e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.3785e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8236e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.8707e-06],
        [ 0.0000e+00,  0.0000e+00,  9.0553e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.1210e-07],
        [-3.8133e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-8.5650e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2307e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.7290e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.6566e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.7590e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7761e-06],
        [-6.1690e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7334e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1833e-07],
        [ 1.5068e-07,  0.0000e+00,  1.8872e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.2295e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.0270e-07,  0.0000e+00, -2.7783e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.5855e-06],
        [ 3.6446e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.6820e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -6.9022e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1168e-09,  0.0000e+00, -2.1794e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.4258e-09],
        [-9.3567e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7832e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2859e-08],
        [-5.7129e-06,  0.0000e+00, -3.9719e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9257e-05],
        [-9.3480e-04,  0.0000e+00, -4.0932e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.1139e-03],
        ...,
        [ 9.8288e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3952e-05,  0.0000e+00, -1.3163e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.2329e-05],
        [-3.6523e-07,  0.0000e+00,  1.4065e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.0291e-08,  0.0000e+00, -8.2567e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.8487e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3845e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.4849e-07,  0.0000e+00, -5.4434e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.0351e-07],
        [ 0.0000e+00,  0.0000e+00, -7.9684e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0312e-08,  0.0000e+00, -3.9712e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.8651e-08],
        ...,
        [-7.2439e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  4.3690e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4181e-05,  0.0000e+00,  7.5036e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.7068e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.7135e-06,  0.0000e+00,  6.4143e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.3997e-06],
        [ 0.0000e+00,  0.0000e+00,  4.2583e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.1065e-07],
        [ 2.0627e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0964e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.2050e-07],
        [ 1.0494e-05,  0.0000e+00, -1.7260e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.1450e-05],
        [ 0.0000e+00,  0.0000e+00, -7.5289e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.7166e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00,  6.0003e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.0896e-03],
        [ 4.0608e-07,  0.0000e+00, -8.6305e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.2519e-07],
        [ 9.8720e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7194e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1887e-10,  0.0000e+00,  4.4258e-10,  ...,  0.0000e+00,
          0.0000e+00,  3.9408e-10],
        [-5.7372e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3812e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3712e-05,  0.0000e+00, -5.2087e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3348e-05],
        [ 0.0000e+00,  0.0000e+00, -5.3364e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.8384e-06,  0.0000e+00, -4.9829e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5243e-06],
        ...,
        [ 2.6032e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9696e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0341e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7933e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1233e-07],
        [-1.3458e-03,  0.0000e+00, -3.2894e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.2410e-03],
        [-5.1545e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.4252e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7474e-04],
        [-1.1764e-08,  0.0000e+00, -2.2640e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.0306e-09],
        [ 5.4554e-04,  0.0000e+00, -8.6180e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.3515e-04],
        ...,
        [-3.9841e-08,  0.0000e+00, -2.4046e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.8460e-08],
        [-5.2747e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8567e-06],
        [ 2.3476e-07,  0.0000e+00, -5.6002e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.2579e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0151e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.4097e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4541e-06],
        [ 8.1184e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6136e-08],
        [ 0.0000e+00,  0.0000e+00,  2.3274e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.2417e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.9670e-07],
        [ 0.0000e+00,  0.0000e+00,  2.4165e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.0727e-08],
        [ 1.5658e-08,  0.0000e+00,  2.1094e-08,  ...,  0.0000e+00,
          0.0000e+00,  8.1592e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.5866e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.0750e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2134e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.7477e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7444e-07],
        [ 9.9550e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2483e-07],
        [ 0.0000e+00,  0.0000e+00, -3.6425e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1081e-05],
        ...,
        [ 2.3284e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3178e-06],
        [ 0.0000e+00,  0.0000e+00, -4.4511e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.4460e-05],
        [-4.2187e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1612e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.3595e-08,  0.0000e+00, -2.0755e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5663e-07],
        [ 0.0000e+00,  0.0000e+00,  5.2574e-07,  ...,  0.0000e+00,
          0.0000e+00,  6.8487e-05],
        [-1.7342e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0925e-06],
        [-9.2684e-08,  0.0000e+00, -5.2096e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.3734e-07],
        [ 7.8562e-09,  0.0000e+00, -2.3886e-07,  ...,  0.0000e+00,
          0.0000e+00,  5.3235e-08],
        ...,
        [-8.5076e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8936e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5910e-07,  0.0000e+00, -8.4266e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.2595e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.8552e-03,  0.0000e+00,  5.3433e-03,  ...,  0.0000e+00,
          0.0000e+00, -2.1912e-03],
        [-7.7226e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3895e-06],
        [ 8.5103e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.2278e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1578e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.8776e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-3.8405e-04,  0.0000e+00,  2.1682e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.8661e-04],
        [-2.0996e-06,  0.0000e+00,  1.3313e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.0955e-06],
        [ 1.9838e-09,  0.0000e+00,  3.1431e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.8618e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.5877e-09,  0.0000e+00, -4.1712e-08,  ...,  0.0000e+00,
          0.0000e+00,  3.3585e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8883e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8476e-09,  0.0000e+00,  5.7076e-09,  ...,  0.0000e+00,
          0.0000e+00,  1.1163e-08],
        [-6.5656e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.7482e-04],
        [ 3.4526e-08,  0.0000e+00,  1.1917e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.4770e-08],
        ...,
        [-8.2519e-06,  0.0000e+00, -3.1247e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4527e-05],
        [ 1.3114e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1435e-04,  0.0000e+00, -5.8958e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.8494e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0617e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.5490e-06],
        [ 2.0451e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8658e-06],
        [-2.4023e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7379e-07,  0.0000e+00, -2.5496e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.0416e-07],
        [ 1.5607e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8485e-06],
        [ 0.0000e+00,  0.0000e+00,  6.9988e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.7203e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.5110e-08],
        [-5.7747e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.0939e-07],
        [-7.2067e-06,  0.0000e+00, -6.1094e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.3452e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2717e-06,  0.0000e+00, -2.6040e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3577e-06],
        [-1.0075e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9041e-04],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.4300e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.8377e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.5523e-08],
        [ 0.0000e+00,  0.0000e+00,  2.1138e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.9536e-06],
        ...,
        [ 1.3698e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.2132e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9326e-07],
        [-4.8148e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0014e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.2099e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3623e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.4620e-05,  0.0000e+00, -2.3372e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0356e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.4385e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.0154e-06],
        ...,
        [ 4.9442e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8964e-05],
        [-3.5347e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7316e-06],
        [-1.9337e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4139e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4561e-07],
        [ 2.7680e-07,  0.0000e+00,  1.0333e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.1861e-07],
        [ 1.4430e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.5182e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1990e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5800e-06],
        [ 1.0833e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.4325e-08],
        ...,
        [ 6.6126e-09,  0.0000e+00,  9.4763e-08,  ...,  0.0000e+00,
          0.0000e+00,  4.6168e-08],
        [ 1.1852e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9884e-06],
        [-8.6913e-07,  0.0000e+00,  1.7416e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.6401e-09,  0.0000e+00,  1.3865e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.8162e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.5361e-06],
        [ 2.7306e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.9596e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.5048e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7458e-07],
        ...,
        [ 9.2210e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.0038e-05],
        [ 1.7903e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1578e-10],
        [-1.6457e-07,  0.0000e+00,  7.8953e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.8975e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.0828e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.8234e-07],
        [ 4.0496e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2982e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0760e-06],
        [-8.7466e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1204e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0679e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0516e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5886e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3890e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  2.2927e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.9893e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.4295e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.4259e-05],
        [ 0.0000e+00,  0.0000e+00, -5.8681e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.9759e-07],
        [ 2.1992e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.8341e-06,  0.0000e+00, -3.8773e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.1974e-06],
        [ 0.0000e+00,  0.0000e+00, -4.2612e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.3917e-08],
        [ 8.1374e-08,  0.0000e+00, -1.0755e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.5818e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.0736e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0231e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.8784e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.5520e-07,  ...,  0.0000e+00,
          0.0000e+00,  5.8514e-08],
        [-1.2906e-06,  0.0000e+00,  2.2077e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.7801e-07],
        [ 3.2825e-05,  0.0000e+00,  2.7033e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.6470e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  3.5343e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5143e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2010e-07],
        [ 1.7517e-05,  0.0000e+00,  1.5743e-06,  ...,  0.0000e+00,
          0.0000e+00,  9.3275e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7747e-06,  0.0000e+00,  2.4063e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3458e-06],
        [-5.1557e-10,  0.0000e+00, -1.4034e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.1938e-09],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4952e-07,  0.0000e+00, -7.3466e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  7.2573e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9554e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8676e-09],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.2531e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.5306e-08],
        [-1.4407e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7205e-07],
        [-5.1739e-08,  0.0000e+00, -3.2474e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.7357e-06],
        [-8.4462e-11,  0.0000e+00, -9.9785e-10,  ...,  0.0000e+00,
          0.0000e+00, -3.3134e-10],
        [ 7.4392e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0766e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1529e-07],
        [ 2.3443e-08,  0.0000e+00, -6.4949e-06,  ...,  0.0000e+00,
          0.0000e+00, -9.0350e-07],
        ...,
        [ 2.9219e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5189e-07],
        [-1.7434e-06,  0.0000e+00,  2.2104e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.7034e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.4383e-06,  0.0000e+00, -1.9256e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2399e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.4146e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6931e-07],
        [-3.2329e-05,  0.0000e+00, -2.2259e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.0228e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.2898e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7074e-05],
        [-1.8449e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3943e-05],
        [-7.8746e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6844e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1055e-06],
        [-8.6425e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Gradients values: tensor([[-4.8248e-07,  0.0000e+00,  1.5415e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.8406e-07],
        [-8.1893e-07,  0.0000e+00,  9.6856e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.5357e-07],
        [-1.3008e-05,  0.0000e+00,  1.3855e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.6997e-06],
        ...,
        [-6.8008e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.6158e-07],
        [-7.8013e-06,  0.0000e+00, -2.9838e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.4421e-08,  0.0000e+00,  4.9263e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.7614e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.7331e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.9922e-08],
        [ 0.0000e+00,  0.0000e+00,  5.7906e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.2538e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6772e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5682e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4706e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4164e-05,  0.0000e+00, -2.7717e-03,  ...,  0.0000e+00,
          0.0000e+00, -2.4068e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.2844e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -4.0206e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.6223e-06],
        [-1.7222e-06,  0.0000e+00, -1.9778e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.3548e-06],
        [-1.2852e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.2602e-05],
        ...,
        [ 3.9049e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2136e-09],
        [ 6.2760e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.8308e-07,  0.0000e+00, -6.3550e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.2890e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.9475e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.8588e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.8469e-08],
        [-1.1438e-05,  0.0000e+00,  8.2658e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.1709e-06],
        [ 0.0000e+00,  0.0000e+00, -1.7981e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.2011e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9548e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4105e-07],
        [ 0.0000e+00,  0.0000e+00,  1.2622e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1002e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.1599e-06,  0.0000e+00, -1.9746e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.0119e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2177e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2536e-05],
        [ 3.6333e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1814e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.3647e-04],
        ...,
        [-2.6297e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2171e-06],
        [ 1.2337e-06,  0.0000e+00, -3.3822e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1561e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7613e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.3684e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.0956e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2384e-08,  0.0000e+00,  6.1090e-08,  ...,  0.0000e+00,
          0.0000e+00,  3.1298e-08],
        [-1.1696e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.0481e-06],
        [-9.0727e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.6079e-05],
        ...,
        [-2.0056e-06,  0.0000e+00, -5.0002e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.4498e-06],
        [-6.6453e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0610e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3062e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8549e-06,  0.0000e+00, -6.5666e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5748e-06],
        [-2.9291e-06,  0.0000e+00,  5.5345e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9844e-06],
        [-5.2611e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.0413e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8346e-04],
        [ 0.0000e+00,  0.0000e+00, -2.1785e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.3029e-04],
        [ 0.0000e+00,  0.0000e+00,  1.5806e-08,  ...,  0.0000e+00,
          0.0000e+00,  6.4546e-09],
        ...,
        [ 2.6572e-09,  0.0000e+00,  1.1064e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.2234e-09],
        [ 4.6894e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3882e-05],
        [ 3.1320e-06,  0.0000e+00, -2.2516e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9334e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6818e-08,  0.0000e+00, -1.7701e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.0204e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4384e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.1033e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9585e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  1.0753e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.1587e-05],
        [ 4.3918e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.8108e-07,  0.0000e+00, -8.9323e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4326e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.5131e-07],
        [-2.5267e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4160e-07,  0.0000e+00, -2.8539e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.1067e-07],
        [ 2.5802e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8459e-06],
        [ 0.0000e+00,  0.0000e+00, -1.5507e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.2657e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4636e-06],
        [ 6.3260e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0695e-08],
        [ 8.9450e-09,  0.0000e+00,  1.5310e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.4593e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3960e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5940e-05],
        [ 1.0148e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.4931e-08,  0.0000e+00, -2.6953e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.1350e-08],
        [-1.9731e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2983e-07],
        [-4.6860e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.9843e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1703e-05],
        [-2.1951e-08,  0.0000e+00, -4.4366e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.9072e-08],
        [ 0.0000e+00,  0.0000e+00, -2.1821e-04,  ...,  0.0000e+00,
          0.0000e+00,  6.4612e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.0091e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5358e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  4.2655e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1448e-09,  0.0000e+00,  9.2402e-10,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9721e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.6398e-07,  0.0000e+00, -4.1391e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.2772e-06],
        [ 0.0000e+00,  0.0000e+00,  1.2828e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.5967e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.1419e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.8388e-06],
        [ 5.1047e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.3972e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.2705e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.1804e-05],
        [ 2.5144e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3749e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.1936e-05],
        ...,
        [-1.5257e-07,  0.0000e+00, -4.9802e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.0548e-07],
        [ 1.0102e-05,  0.0000e+00, -1.8532e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.7007e-05],
        [-6.9826e-04,  0.0000e+00,  1.4131e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.3530e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.2738e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.5691e-07],
        [ 0.0000e+00,  0.0000e+00,  2.2488e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.6404e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.0614e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9666e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1777e-05],
        [ 1.1707e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.6467e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.9143e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2825e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4669e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.6407e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8672e-06],
        [ 2.2127e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2125e-05,  0.0000e+00,  5.1589e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.4670e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8243e-05],
        [ 3.4745e-08,  0.0000e+00, -2.8072e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.9810e-08],
        ...,
        [-5.0538e-06,  0.0000e+00, -9.8229e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.3097e-05],
        [-4.0545e-06,  0.0000e+00, -1.0507e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.6693e-06],
        [ 1.8301e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8048e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1454e-05,  0.0000e+00,  1.9952e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.8405e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3096e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  1.1338e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.7807e-07],
        [-2.3844e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8864e-05],
        ...,
        [-8.8549e-06,  0.0000e+00,  6.3281e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.1030e-06],
        [-7.3980e-06,  0.0000e+00,  3.1774e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.5537e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.8479e-07,  0.0000e+00, -8.4892e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.8890e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1577e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.8227e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7683e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.7713e-07],
        [-2.2323e-09,  0.0000e+00, -1.5124e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.8718e-09],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.4374e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5435e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2988e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.1924e-06],
        [ 5.1185e-06,  0.0000e+00,  1.2361e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.5432e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2345e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1812e-04],
        [ 0.0000e+00,  0.0000e+00, -1.4048e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5517e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.1033e-05,  0.0000e+00, -2.5717e-05,  ...,  0.0000e+00,
          0.0000e+00, -9.6251e-06],
        [-4.1213e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  2.4655e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.7718e-04,  0.0000e+00, -9.1866e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.2458e-04],
        [-4.0380e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.7253e-05],
        [-8.9359e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.3578e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.9968e-07],
        [ 1.6993e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.1173e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.7564e-06,  0.0000e+00,  1.1982e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.4614e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.9570e-04],
        [-1.6971e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0527e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7314e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.2589e-06],
        [-6.1313e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.0378e-09,  0.0000e+00, -1.9978e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.5349e-08],
        [ 1.6123e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5005e-07],
        [-5.1318e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.3042e-06],
        ...,
        [-9.9423e-06,  0.0000e+00,  7.1437e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.8929e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5162e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6648e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0580e-07,  0.0000e+00, -8.7240e-07,  ...,  0.0000e+00,
          0.0000e+00,  4.1378e-08],
        [-1.1629e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.0212e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4597e-08,  0.0000e+00, -8.2167e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.0045e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1181e-05],
        ...,
        [ 9.4390e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4260e-07],
        [ 0.0000e+00,  0.0000e+00, -1.7439e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.1702e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5740e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7438e-09,  0.0000e+00, -3.5054e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.7401e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.0986e-10,  ...,  0.0000e+00,
          0.0000e+00, -1.0809e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.3722e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.8117e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  8.9847e-07,  ...,  0.0000e+00,
          0.0000e+00,  4.1475e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0892e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.9797e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3710e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.5974e-07,  0.0000e+00, -3.4058e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.3290e-07],
        [-3.6165e-06,  0.0000e+00, -1.4186e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.5127e-08,  0.0000e+00, -1.9802e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.7742e-07],
        ...,
        [-1.4379e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.9424e-08],
        [-3.4107e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1978e-07],
        [-3.5709e-06,  0.0000e+00, -3.8308e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.6419e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.6330e-03,  ...,  0.0000e+00,
          0.0000e+00,  9.6482e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -5.7098e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -5.0653e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  7.1495e-10,  ...,  0.0000e+00,
          0.0000e+00, -1.2596e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.5341e-05,  0.0000e+00,  3.6247e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.4470e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  3.7772e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0485e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  4.9208e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -5.9033e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9484e-06],
        [ 0.0000e+00,  0.0000e+00, -5.9475e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.3901e-08,  0.0000e+00, -1.3861e-07,  ...,  0.0000e+00,
          0.0000e+00, -7.6356e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.3915e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.3975e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0392e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.8926e-07],
        [-1.5798e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5944e-08],
        [ 3.8934e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9669e-06,  0.0000e+00,  2.5323e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9555e-06],
        [ 2.8215e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0510e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2629e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -7.3893e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.0804e-04],
        [-4.4496e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.1266e-06],
        [-1.9088e-07,  0.0000e+00,  8.3519e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.9602e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9516e-06,  0.0000e+00,  2.5632e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3225e-06],
        [ 1.6008e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.5720e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0897e-09],
        [ 2.0204e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 3.1585e-10,  0.0000e+00, -4.1520e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.8734e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8682e-04],
        [ 3.0257e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1818e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6957e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.7804e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.3124e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.1918e-04],
        [ 5.5287e-06,  0.0000e+00, -2.9420e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.9653e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3429e-05],
        ...,
        [-5.0038e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.2696e-07,  0.0000e+00, -3.1997e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5317e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.2631e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1551e-06],
        [-9.3346e-07,  0.0000e+00,  4.3525e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.6627e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3712e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3173e-06],
        [ 1.2603e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2618e-07],
        [-3.6098e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.1086e-07],
        ...,
        [-4.8700e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1809e-05],
        [ 1.1547e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.1323e-06],
        [-1.6005e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7044e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1665e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1398e-07],
        [-4.0529e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1393e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4899e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7036e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7994e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.8951e-03,  ...,  0.0000e+00,
          0.0000e+00, -4.8898e-04],
        [-8.2499e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.1747e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5250e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3163e-06],
        [-1.2953e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.8372e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.9427e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.7668e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0204e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7248e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  2.3072e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.2516e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.1631e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9574e-05],
        ...,
        [-2.3517e-08,  0.0000e+00, -1.5530e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.4044e-08],
        [-2.3337e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3142e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6035e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0914e-06],
        [ 8.5715e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.4661e-04,  0.0000e+00,  4.9963e-03,  ...,  0.0000e+00,
          0.0000e+00,  5.2723e-04],
        [-7.2353e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3947e-06,  0.0000e+00,  8.4837e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.4228e-06],
        ...,
        [ 4.1238e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.9478e-06],
        [ 4.0099e-09,  0.0000e+00, -1.7550e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.9377e-08],
        [ 0.0000e+00,  0.0000e+00, -1.1741e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.0183e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1165e-09,  0.0000e+00, -1.7345e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.1152e-11],
        [ 0.0000e+00,  0.0000e+00, -9.2523e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.7547e-07],
        [-1.6186e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8498e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3120e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.7598e-08,  0.0000e+00,  5.8897e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.2713e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.8666e-10],
        [-1.2593e-07,  0.0000e+00, -1.6391e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5797e-06],
        [-9.5358e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.9475e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.3359e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.3549e-05],
        [-1.2391e-06,  0.0000e+00, -3.1984e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.3551e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.8153e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.6256e-07,  0.0000e+00,  4.3964e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.6914e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7020e-06],
        ...,
        [ 1.6681e-07,  0.0000e+00, -4.0915e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.0673e-07],
        [ 3.5835e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.6836e-07],
        [ 2.6776e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6287e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.1515e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8007e-07,  0.0000e+00,  1.2376e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.7244e-07],
        [-1.6327e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.6514e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.6498e-08,  0.0000e+00,  3.2093e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.3470e-07],
        [ 2.2012e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0957e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6304e-07,  0.0000e+00, -2.1206e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.2786e-07],
        [ 1.1674e-06,  0.0000e+00, -6.5114e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.8512e-06],
        [ 0.0000e+00,  0.0000e+00, -2.2129e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1093e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.5170e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6889e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.4118e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.4365e-06],
        ...,
        [-1.7343e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6025e-06,  0.0000e+00,  6.1691e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1331e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.1435e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.4553e-07],
        [-3.9931e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5109e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.4399e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2796e-10],
        [-3.4403e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4381e-06],
        [ 1.9657e-05,  0.0000e+00, -1.6281e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.9684e-05],
        ...,
        [-7.7178e-08,  0.0000e+00, -1.3247e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.2715e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.5243e-10,  0.0000e+00, -7.3680e-08,  ...,  0.0000e+00,
          0.0000e+00,  4.6207e-09],
        [ 2.9152e-05,  0.0000e+00, -1.8274e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.0788e-05],
        [-1.6506e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7569e-06,  0.0000e+00,  9.1184e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.9086e-06],
        [-8.5219e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.1660e-07,  0.0000e+00, -1.5320e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.9043e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.5516e-03],
        [ 2.4360e-05,  0.0000e+00, -2.4517e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.9902e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.0990e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7881e-06],
        [ 5.9234e-09,  0.0000e+00, -5.8657e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.1625e-08],
        [-3.3417e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0877e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9683e-06],
        [ 2.5301e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8251e-05],
        [-4.5237e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2093e-06],
        ...,
        [ 3.3486e-07,  0.0000e+00, -5.4976e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.1086e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6620e-07],
        [-2.3113e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0916e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.9298e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.0723e-11,  0.0000e+00, -2.9152e-09,  ...,  0.0000e+00,
          0.0000e+00, -8.3988e-11],
        [-1.3028e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.3721e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3828e-08],
        [ 3.0424e-09,  0.0000e+00, -2.1512e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.4523e-08],
        [ 1.4522e-07,  0.0000e+00, -4.0583e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.6989e-04,  0.0000e+00,  1.8907e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2847e-04],
        [ 9.3233e-08,  0.0000e+00, -2.6520e-07,  ...,  0.0000e+00,
          0.0000e+00,  9.1592e-08],
        [ 9.5123e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0654e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.7490e-06],
        [ 3.8243e-05,  0.0000e+00, -2.8250e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1076e-05,  0.0000e+00,  8.1555e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.5476e-10,  0.0000e+00, -2.0510e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.1287e-09],
        ...,
        [ 6.6186e-09,  0.0000e+00, -6.1566e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.6049e-08],
        [-1.0417e-06,  0.0000e+00, -7.6288e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.9375e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5577e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2575e-09],
        [-4.0395e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.5058e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.1598e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.9335e-05],
        [ 1.2175e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0499e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.4347e-06,  0.0000e+00, -5.9169e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4057e-08,  0.0000e+00, -1.0533e-07,  ...,  0.0000e+00,
          0.0000e+00,  8.5111e-08],
        [ 4.1019e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0681e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.3902e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.8588e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.5475e-11],
        [-5.9934e-06,  0.0000e+00,  4.6886e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.5677e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-3.0460e-08,  0.0000e+00, -3.2955e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.4795e-12,  0.0000e+00, -2.3583e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.7747e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.0158e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.0268e-07,  0.0000e+00,  2.3548e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.0863e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0096e-04,  0.0000e+00, -1.0390e-03,  ...,  0.0000e+00,
          0.0000e+00, -5.6990e-04],
        [-1.0136e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.5058e-06],
        [-7.2197e-07,  0.0000e+00,  5.6832e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.6468e-07],
        ...,
        [ 1.8689e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5811e-09,  0.0000e+00, -6.2396e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.0875e-08],
        [-9.7442e-08,  0.0000e+00, -2.4682e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2576e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.4523e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.2510e-06],
        [ 4.0147e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.7675e-07,  0.0000e+00, -5.9678e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.0148e-06],
        [ 1.5186e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.7442e-08],
        ...,
        [-1.8077e-10,  0.0000e+00, -4.9298e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.1614e-09],
        [-8.1892e-06,  0.0000e+00, -5.8567e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.1906e-06],
        [-6.0199e-07,  0.0000e+00, -5.2964e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.0289e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.3830e-07,  0.0000e+00, -7.2096e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.7094e-07],
        [-4.8203e-08,  0.0000e+00, -8.9660e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.4859e-07],
        [-7.6547e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.7745e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  1.7156e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.2001e-08,  0.0000e+00, -1.4891e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.2168e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.6996e-07,  0.0000e+00,  1.7083e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.4682e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.2279e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0837e-05],
        [ 1.2944e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4526e-09],
        [ 2.6729e-07,  0.0000e+00,  4.9364e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.0287e-06],
        [ 3.7431e-05,  0.0000e+00, -1.0840e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.0656e-04],
        ...,
        [ 2.1560e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2303e-05],
        [ 4.4371e-05,  0.0000e+00, -2.2102e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.4912e-05],
        [ 0.0000e+00,  0.0000e+00, -1.6323e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.2159e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.9418e-08,  0.0000e+00,  1.4587e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.2761e-06,  0.0000e+00, -4.2403e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.5249e-06],
        [-8.5177e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7510e-11,  0.0000e+00, -2.0422e-10,  ...,  0.0000e+00,
          0.0000e+00,  8.4654e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1555e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7319e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3594e-09],
        [-2.8959e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5081e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7746e-05,  0.0000e+00, -1.6145e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.2109e-09,  0.0000e+00, -1.9557e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0253e-07,  0.0000e+00, -3.0970e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.8901e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4684e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9994e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7699e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6044e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4955e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5726e-06],
        [ 2.7796e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -4.9337e-09,  ...,  0.0000e+00,
          0.0000e+00, -5.6544e-09],
        ...,
        [ 3.0839e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5720e-07,  0.0000e+00, -7.8424e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.0811e-06],
        [ 1.4261e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2259e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.3753e-07,  0.0000e+00, -6.2951e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.7314e-07],
        [ 0.0000e+00,  0.0000e+00, -4.6303e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.9919e-05],
        [ 1.2630e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -4.6259e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -1.2035e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.6482e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.4454e-06],
        ...,
        [-8.8398e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1350e-06],
        [-1.2471e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.0273e-07],
        [ 3.8640e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1309e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5426e-07],
        [ 0.0000e+00,  0.0000e+00,  4.7213e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.5053e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -1.2110e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1581e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2033e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2184e-05],
        ...,
        [ 8.2512e-07,  0.0000e+00, -2.4223e-06,  ...,  0.0000e+00,
          0.0000e+00,  4.0207e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7481e-05],
        [ 6.8137e-08,  0.0000e+00,  2.3335e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.4371e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.0184e-08,  0.0000e+00, -2.4023e-07,  ...,  0.0000e+00,
          0.0000e+00,  4.8071e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.0438e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.4333e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.0844e-08],
        ...,
        [-1.7186e-06,  0.0000e+00, -3.3818e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.5138e-06],
        [-2.3014e-04,  0.0000e+00, -5.5541e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.3127e-06,  0.0000e+00,  1.4288e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.7318e-07,  0.0000e+00, -3.8620e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7436e-06],
        [ 3.5564e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8434e-03,  0.0000e+00, -1.5703e-03,  ...,  0.0000e+00,
          0.0000e+00,  4.9428e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2345e-03],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.2504e-06,  0.0000e+00,  7.4507e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.0100e-06],
        [ 6.0190e-07,  0.0000e+00, -2.0542e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7010e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1600e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.3337e-09,  0.0000e+00, -1.3122e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.7931e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.9502e-06],
        [ 3.1469e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.9810e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.4839e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.5011e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00, -2.7998e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.1087e-03],
        [-1.9460e-06,  0.0000e+00, -2.7184e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2028e-05],
        [ 2.4044e-06,  0.0000e+00, -7.7230e-04,  ...,  0.0000e+00,
          0.0000e+00, -7.2504e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.3971e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7922e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6186e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.5146e-07,  0.0000e+00, -8.3320e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.6650e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7233e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0713e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4008e-06],
        [-1.0279e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.4647e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -9.7573e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4247e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2319e-05,  0.0000e+00, -1.5346e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.0544e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2130e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8227e-08],
        [-6.1653e-06,  0.0000e+00, -1.1964e-05,  ...,  0.0000e+00,
          0.0000e+00, -8.4100e-06],
        [ 5.3754e-08,  0.0000e+00, -8.2769e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.5710e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7373e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.2687e-04],
        [ 0.0000e+00,  0.0000e+00, -3.6675e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.5533e-06],
        [ 1.7600e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.1897e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1171e-06],
        [ 0.0000e+00,  0.0000e+00,  4.9001e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.7885e-03],
        ...,
        [ 0.0000e+00,  0.0000e+00, -6.8687e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.5028e-05],
        [ 6.4773e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7605e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7587e-06,  0.0000e+00,  4.1037e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9076e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.6503e-10],
        [ 8.4336e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0458e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3798e-03],
        [-3.9800e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.8889e-09,  0.0000e+00,  1.1634e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.0129e-08],
        [ 0.0000e+00,  0.0000e+00, -8.3518e-06,  ...,  0.0000e+00,
          0.0000e+00, -4.6971e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.1239e-07],
        [-1.8685e-06,  0.0000e+00,  2.0635e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.4384e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.4718e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.1455e-06],
        [-2.6077e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5312e-07],
        [ 9.7572e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 5.0430e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6953e-08,  0.0000e+00, -2.9930e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.6879e-08],
        [ 1.0311e-05,  0.0000e+00,  9.0060e-04,  ...,  0.0000e+00,
          0.0000e+00, -8.9029e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.1154e-05,  0.0000e+00,  2.0695e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.3541e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.3265e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0258e-04],
        [-1.4094e-07,  0.0000e+00,  1.9649e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.0710e-07],
        ...,
        [ 5.6341e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  2.9308e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.7398e-05],
        [ 0.0000e+00,  0.0000e+00,  3.1952e-03,  ...,  0.0000e+00,
          0.0000e+00, -2.5317e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9488e-07,  0.0000e+00,  6.4263e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.7929e-07],
        [ 0.0000e+00,  0.0000e+00, -7.3105e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6878e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8441e-08,  0.0000e+00, -1.2799e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.6341e-07],
        [ 1.2915e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7263e-06],
        [ 7.3528e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.1110e-05],
        ...,
        [-1.2915e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.9195e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.1820e-10,  0.0000e+00,  2.0184e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.8410e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3494e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8180e-06],
        [ 2.2085e-04,  0.0000e+00, -2.9937e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.9367e-05],
        [ 3.6184e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.0698e-07,  0.0000e+00, -6.2722e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.1835e-06],
        [ 0.0000e+00,  0.0000e+00,  3.8730e-09,  ...,  0.0000e+00,
          0.0000e+00,  2.3448e-09],
        [-9.2774e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-4.5282e-05,  0.0000e+00,  1.9817e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.2747e-04],
        [ 3.7307e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.7770e-08],
        [-3.3276e-08,  0.0000e+00, -8.1271e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.3939e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1027e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.0744e-06,  0.0000e+00,  7.3505e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.5054e-06],
        [-6.7888e-

Gradients values: tensor([[-1.4714e-06,  0.0000e+00,  1.0085e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3240e-06],
        [-6.4762e-07,  0.0000e+00,  3.0897e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.6035e-07],
        [ 2.8395e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -9.1739e-11,  ...,  0.0000e+00,
          0.0000e+00, -1.5525e-09],
        [ 0.0000e+00,  0.0000e+00,  1.2619e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3110e-06],
        [ 2.9171e-06,  0.0000e+00, -6.7937e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.5574e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.1119e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6921e-03],
        [-4.0097e-07,  0.0000e+00, -1.4770e-06,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.5600e-05],
        [-2.0096e-11,  0.0000e+00, -2.5482e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.2457e-09],
        [ 9.7862e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2961e-06],
        ...,
        [-1.4111e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5642e-05],
        [ 0.0000e+00,  0.0000e+00, -9.3340e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.2350e-09,  0.0000e+00, -1.3013e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.7703e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1928e-05],
        [ 0.0000e+00,  0.0000e+00, -4.0703e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.9490e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1639e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.4287e-06,  0.0000e+00,  3.3517e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9877e-06],
        ...,
        [-1.0194e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.6313e-08],
        [-3.1707e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4483e-05],
        [ 0.0000e+00,  0.0000e+00,  2.0454e-10,  ...,  0.0000e+00,
          0.0000e+00, -3.9056e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.5627e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2472e-04,  0.0000e+00,  5.7703e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.4817e-03],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.3584e-04],
        [-6.8782e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.0589e-07],
        [ 2.9270e-07,  0.0000e+00, -7.2988e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.3451e-07],
        ...,
        [ 4.7714e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0564e-05],
        [ 1.2008e-07,  0.0000e+00, -3.3406e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.5922e-07],
        [-2.9860e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.0714e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0902e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7678e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4699e-05],
        [-3.1608e-

Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2531e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.2112e-07],
        [ 9.5916e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2711e-08],
        ...,
        [-2.4869e-04,  0.0000e+00, -3.0442e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.5061e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9880e-07],
        [ 2.4135e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.2639e-06],
        [-2.4347e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0497e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.8965e-10,  0.0000e+00, -2.9241e-09,  ...,  0.0000e+00,
          0.0000e+00, -4.1097e-09],
        [-1.2751e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.9213e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2561e-06],
        ...,
        [-4.7024e-07,  0.0000e+00,  4.6308e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8332e-07],
        [-1.1309e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8795e-10,  0.0000e+00, -1.1412e-08,  ...,  0.0000e+00,
          0.0000e+00, -9.8982e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2412e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0501e-09,  0.0000e+00, -3.1030e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.0380e-06],
        [-1.1460e-07,  0.0000e+00,  1.3459e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.5929e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.9112e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2053e-09],
        [ 0.0000e+00,  0.0000e+00, -1.8317e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.2937e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.4306e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.7206e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.3528e-07],
        [-2.5268e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1500e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.4148e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5563e-03],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3724e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.3797e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8030e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.6673e-09,  0.0000e+00, -5.8508e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.1230e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.7760e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.7427e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  5.1376e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.2444e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1684e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2108e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2622e-07],
        [-1.1295e-07,  0.0000e+00, -4.7861e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.8100e-07],
        [-1.1885e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4069e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0041e-08],
        [ 9.9448e-09,  0.0000e+00, -4.6705e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.4923e-08,  0.0000e+00, -1.2716e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8667e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0048e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0597e-07],
        [-1.4153e-04,  0.0000e+00,  8.5669e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3476e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.6586e-06,  0.0000e+00, -1.6200e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2455e-05],
        [-2.0918e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.5302e-07],
        [ 1.8646e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7152e-07],
        [ 1.7597e-08,  0.0000e+00, -4.7336e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7475e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.7048e-08],
        ...,
        [ 7.3321e-11,  0.0000e+00, -8.3059e-10,  ...,  0.0000e+00,
          0.0000e+00,  6.7096e-11],
        [ 0.0000e+00,  0.0000e+00,  2.4674e-09,  ...,  0.0000e+00,
          0.0000e+00,  1.7808e-09],
        [ 1.1147e-05,  0.0000e+00, -2.9212e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0089e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9676e-04,  0.0000e+00,  2.2768e-03,  ...,  0.0000e+00,
          0.0000e+00,  1.2820e-03],
        [ 0.0000e+00,  0.0000e+00, -1.9148e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.1609e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.1927e-06,  0.0000e+00,  5.5095e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.6632e-05],
        [ 0.0000e+00,  0.0000e+00, -3.2127e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.6780e-05],
        [ 1.0627e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2030e-07],
        ...,
        [-7.4647e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7227e-06,  0.0000e+00, -1.2262e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.3954e-06],
        [ 3.5636e-04,  0.0000e+00, -5.9983e-03,  ...,  0.0000e+00,
          0.0000e+00, -1.0378e-03]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.8545e-07],
        [ 2.8463e-07,  0.0000e+00, -8.5917e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.0407e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.1214e-08,  0.0000e+00, -4.4576e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.6055e-07],
        [-3.1744e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.9400e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.7145e-04,  0.0000e+00,  1.6610e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.5900e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.7655e-09,  0.0000e+00, -9.4608e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.7965e-09],
        [ 1.3864e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.8572e-08,  ...,  0.0000e+00,
          0.0000e+00, -8.6223e-09],
        [ 1.9896e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7316e-06],
        ...,
        [ 9.9157e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1189e-03],
        [ 6.0390e-11,  0.0000e+00, -1.4352e-10,  ...,  0.0000e+00,
          0.0000e+00, -7.5275e-11],
        [ 2.0580e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9367e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.9574e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4284e-05],
        [-6.1439e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2617e-09],
        [-1.8925e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8787e-07,  0.0000e+00, -3.9501e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0433e-07],
        [ 1.0528e-05,  0.0000e+00, -3.4153e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.1054e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0438e-06],
        [-2.4942e-07,  0.0000e+00,  5.9633e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.5114e-05],
        [ 0.0000e+00,  0.0000e+00,  2.6897e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.9213e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.1431e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.6895e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0238e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5358e-06],
        [-7.4951e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.7700e-06],
        [-2.3733e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.0227e-09,  0.0000e+00, -2.3412e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.5803e-09],
        [ 1.0190e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1292e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3759e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.5422e-08,  0.0000e+00, -7.3002e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.1700e-08],
        [ 6.8748e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.4980e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8036e-05],
        ...,
        [-1.0720e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4123e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0276e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.4087e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.9531e-06],
        [ 2.7435e-06,  0.0000e+00, -7.6845e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.0613e-06],
        [ 1.5256e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7705e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7166e-07],
        [ 1.1032e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.1324e-08],
        [ 5.2679e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.8722e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -3.2831e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.6416e-03],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7616e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4434e-06],
        [-5.8304e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.4963e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6132e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9544e-06],
        [-5.0336e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2982e-07],
        [-1.1599e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-4.5922e-08,  0.0000e+00, -1.9119e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.2541e-09,  0.0000e+00, -2.6055e-08,  ...,  0.0000e+00,
          0.0000e+00,  3.1621e-09],
        [-1.8763e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.0910e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5222e-08],
        [-2.7386e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9152e-06,  0.0000e+00, -3.8145e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.0531e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8084e-07],
        [ 3.5018e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7299e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0705e-05],
        [ 6.6916e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.9431e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.0933e-08,  0.0000e+00,  3.3048e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.1744e-07],
        [-1.1129e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.2825e-07,  0.0000e+00, -6.1234e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.9517e-06],
        [ 1.6740e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2255e-06],
        [ 1.4017e-04,  0.0000e+00, -2.6861e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.2870e-06,  0.0000e+00, -1.1401e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0797e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0677e-05],
        [ 2.4752e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.8841e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3482e-07],
        [ 4.8389e-08,  0.0000e+00, -3.0871e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.2464e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.4503e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.4776e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2206e-06],
        ...,
        [-3.3160e-11,  0.0000e+00,  1.3385e-10,  ...,  0.0000e+00,
          0.0000e+00,  2.6407e-11],
        [ 2.2205e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1841e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3590e-07],
        [ 1.0042e-10,  0.0000e+00, -2.2753e-10,  ...,  0.0000e+00,
          0.0000e+00,  5.8651e-10],
        [-1.3274e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8944e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.0093e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  3.5191e-04,  ...,  0.0000e+00,
          0.0000e+00,  2.4804e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.1905e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5047e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.1114e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9243e-09],
        [-3.5731e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.1912e-06,  0.0000e+00,  2.3303e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.9596e-06],
        ...,
        [ 8.6594e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8801e-08],
        [-2.9200e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5773e-07,  0.0000e+00, -8.3788e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2085e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3221e-05],
        [ 3.0043e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.2822e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.1890e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 8.8208e-04,  0.0000e+00, -1.6141e-04,  ...,  0.0000e+00,
          0.0000e+00,  7.9306e-04],
        [ 2.7055e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.6280e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.3557e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.2930e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.7582e-08,  0.0000e+00, -1.1175e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.1276e-06],
        [ 2.8307e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.5604e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2260e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.8143e-08],
        [ 4.6370e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7937e-05],
        ...,
        [ 1.8036e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.7080e-10,  0.0000e+00,  5.7443e-08,  ...,  0.0000e+00,
          0.0000e+00,  4.7137e-09],
        [-4.4159e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2757e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.0212e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8357e-07],
        [-1.1491e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2178e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4637e-06,  0.0000e+00, -4.3339e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.8168e-07],
        [-1.3845e-05,  0.0000e+00,  4.2162e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.5851e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.2359e-06,  0.0000e+00, -8.2252e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.2882e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7865e-04,  0.0000e+00, -1.7439e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.6781e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3517e-05],
        [-5.5535e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.7252e-05,  0.0000e+00,  2.1143e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.3006e-05],
        [ 1.6178e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5550e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.2960e-08],
        [-2.7776e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3096e-06],
        [ 2.2781e-06,  0.0000e+00, -3.3415e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.3504e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7173e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9895e-07],
        [ 1.4967e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6367e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4728e-08,  0.0000e+00, -1.3089e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.4688e-04],
        [-2.5688e-09,  0.0000e+00,  3.4006e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.8539e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9215e-07],
        [ 3.7569e-07,  0.0000e+00, -2.3515e-07,  ...,  0.0000e+00,
          0.0000e+00,  9.5321e-07],
        [-2.7336e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.2449e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.1195e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-8.1566e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8745e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -5.7646e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.5345e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.5442e-07,  0.0000e+00, -5.5538e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.4256e-06],
        [-4.9145e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.6425e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.1214e-07],
        ...,
        [ 1.9109e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.5628e-08],
        [-2.5313e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7285e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0890e-10,  0.0000e+00, -7.7276e-09,  ...,  0.0000e+00,
          0.0000e+00, -3.1519e-10],
        [-5.1686e-04,  0.0000e+00, -2.6523e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2787e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.1309e-04],
        [-2.6838e-07,  0.0000e+00,  7.0195e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.3354e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.5387e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5974e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.2347e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.0450e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.0561e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0192e-05,  0.0000e+00, -3.0917e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.8113e-06,  0.0000e+00,  2.0824e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4471e-05,  0.0000e+00,  4.1892e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 4.0200e-09,  0.0000e+00, -6.5766e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.1355e-08],
        [ 4.8960e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.2104e-05],
        [ 4.2413e-06,  0.0000e+00, -5.4449e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0737e-09,  0.0000e+00, -6.6066e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.1807e-07],
        [ 0.0000e+00,  0.0000e+00,  5.7273e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1903e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.3272e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.6584e-07,  0.0000e+00,  2.2237e-06,  ...,  0.0000e+00,
          0.0000e+00,  6.7064e-07],
        [ 1.3373e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.7276e-05],
        ...,
        [ 1.0814e-08,  0.0000e+00, -1.0158e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.9359e-08],
        [-9.8341e-09,  0.0000e+00, -4.1513e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.7076e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.1553e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.4552e-07],
        [ 6.4009e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.3531e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8719e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1833e-06],
        [ 1.3497e-05,  0.0000e+00, -9.2712e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.9941e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.6894e-06],
        ...,
        [ 4.8441e-05,  0.0000e+00, -5.3337e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.7722e-05,  0.0000e+00, -5.0962e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.5729e-05],
        [ 1.5442e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.2188e-06,  0.0000e+00, -1.7383e-05,  ...,  0.0000e+00,
          0.0000e+00, -7.6248e-06],
        [ 1.2313e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.0525e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  2.8831e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.3641e-07],
        [-4.6356e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.2506e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5307e-04],
        ...,
        [-4.2400e-11,  0.0000e+00, -5.8381e-10,  ...,  0.0000e+00,
          0.0000e+00, -2.1851e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2990e-10],
        [ 0.0000e+00,  0.0000e+00,  1.5193e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.1129e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5918e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7224e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6435e-07],
        [-4.1374e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.2864e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6705e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.2788e-08,  0.0000e+00, -1.0569e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2187e-07],
        [ 0.0000e+00,  0.0000e+00,  4.0967e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7645e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3392e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.2944e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0732e-06],
        [ 0.0000e+00,  0.0000e+00, -5.5525e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.6006e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5582e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3232e-07,  0.0000e+00, -7.8095e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.6517e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.5760e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.2886e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5586e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4326e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5633e-06],
        [ 6.0516e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.6431e-07,  0.0000e+00, -9.3219e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.8346e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.0674e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4883e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7353e-07,  0.0000e+00, -4.0367e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.5824e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.3764e-08,  0.0000e+00,  3.0043e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4873e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6881e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.3981e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.8864e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0427e-05],
        [-5.3576e-04,  0.0000e+00,  1.8539e-03,  ...,  0.0000e+00,
          0.0000e+00, -8.1362e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7264e-06],
        [ 1.4924e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0805e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5854e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4062e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2417e-09],
        [ 0.0000e+00,  0.0000e+00, -4.2249e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1233e-05],
        [-9.1387e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.4162e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8035e-09,  0.0000e+00, -3.1999e-09,  ...,  0.0000e+00,
          0.0000e+00, -3.6576e-09],
        [-6.0916e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.8697e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8856e-08,  0.0000e+00, -1.0505e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.3127e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.1192e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.4610e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8776e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6042e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.3738e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9695e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 4.0136e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.8037e-08],
        [ 4.3114e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8878e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  3.4929e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0428e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.1182e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.1122e-08,  0.0000e+00, -2.2828e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.1568e-08],
        [-1.5841e-07,  0.0000e+00,  1.6831e-05,  ...,  0.0000e+00,
          0.0000e+00,  7.8518e-06],
        [ 3.4832e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.5219e-04],
        ...,
        [-1.3226e-06,  0.0000e+00,  1.1803e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2161e-06],
        [-1.7732e-07,  0.0000e+00,  3.3698e-06,  ...,  0.0000e+00,
          0.0000e+00,  5.2070e-07],
        [-3.1771e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0881e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.7428e-09,  0.0000e+00, -1.1859e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.7903e-08],
        [ 8.9143e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9029e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7085e-07],
        [ 3.4097e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5985e-05],
        [ 0.0000e+00,  0.0000e+00, -3.2003e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.8001e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.4759e-08,  0.0000e+00, -2.1109e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3494e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.9110e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.3722e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.8297e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.9236e-06],
        ...,
        [-9.2363e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1395e-05],
        [-2.2847e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6198e-08],
        [ 1.9482e-06,  0.0000e+00,  1.7251e-05,  ...,  0.0000e+00,
          0.0000e+00,  8.2046e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.2284e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.2690e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.8439e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.9612e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.2018e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0305e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0856e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7774e-07],
        [-3.6459e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.5263e-

Gradients values: tensor([[-1.6726e-06,  0.0000e+00, -3.1737e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4355e-07,  0.0000e+00,  1.7062e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.2179e-07],
        [ 8.9771e-10,  0.0000e+00,  2.3595e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.3464e-09,  0.0000e+00, -4.1884e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.1255e-09],
        [-4.8954e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.2482e-06],
        [-2.8203e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.0409e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.4269e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.2290e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0535e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5236e-10,  0.0000e+00, -5.3338e-09,  ...,  0.0000e+00,
          0.0000e+00, -9.1204e-10],
        ...,
        [-3.1246e-07,  0.0000e+00, -7.7829e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.0092e-07],
        [ 1.5726e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1647e-08],
        [ 0.0000e+00,  0.0000e+00, -1.2931e-06,  ...,  0.0000e+00,
          0.0000e+00, -8.8129e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.4975e-07,  0.0000e+00,  1.1800e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.1915e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.9643e-06],
        [ 8.6804e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3033e-09],
        [-4.6144e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8271e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 4.7822e-09,  0.0000e+00, -3.6242e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.9967e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2248e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.6046e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6070e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9701e-03],
        [-6.6351e-

Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.9662e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.7522e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 5.7380e-10,  0.0000e+00,  4.4833e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.4902e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7599e-09,  0.0000e+00, -1.3133e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.4041e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.5848e-08,  0.0000e+00,  3.1551e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1566e-05,  0.0000e+00, -1.3870e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0102e-07,  0.0000e+00, -1.2608e-07,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.4603e-10,  0.0000e+00, -6.5001e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.9056e-10],
        [-1.2717e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1490e-05],
        [-1.1145e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2733e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.1769e-09],
        [-3.6143e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.6451e-08],
        [-1.4001e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.2295e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1856e-07],
        [ 2.7189e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4309e-06],
        [ 2.5133e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.9979e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5423e-06],
        [-6.6040e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.7990e-07],
        [-1.0706e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7786e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5173e-08],
        [-2.6300e-06,  0.0000e+00,  1.9962e-05,  ...,  0.0000e+00,
          0.0000e+00, -6.4230e-06],
        [ 2.2035e-08,  0.0000e+00, -5.3116e-08,  ...,  0.0000e+00,
          0.0000e+00,  2.7897e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.6865e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7319e-05],
        [ 1.0010e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9848e-05],
        [-8.1976e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2299e-07],
        [-1.3450e-08,  0.0000e+00, -3.2835e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.7369e-08],
        ...,
        [ 1.5435e-06,  0.0000e+00,  8.5649e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.7393e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3047e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4575e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.7511e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0200e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5133e-06,  0.0000e+00,  1.4048e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.4421e-06],
        [ 3.2353e-09,  0.0000e+00, -6.4311e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-8.5361e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.0392e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6226e-07],
        [ 5.6374e-06,  0.0000e+00,  1.4450e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4406e-06],
        [ 0.0000e+00,  0.0000e+00, -3.9744e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.8862e-07],
        [-6.6796e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4923e-09,  0.0000e+00, -6.3682e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2486e-08],
        ...,
        [-1.3352e-06,  0.0000e+00,  2.3446e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.1894e-06],
        [ 0.0000e+00,  0.0000e+00,  5.9363e-07,  ...,  0.0000e+00,
          0.0000e+00,  7.6169e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.6073e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0000e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3409e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4261e-07],
        [-1.0910e-11,  0.0000e+00, -6.2499e-13,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1389e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1204e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2395e-09,  0.0000e+00, -2.3189e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.4193e-09],
        [-1.3162e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.8350e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.3419e-10,  ...,  0.0000e+00,
          0.0000e+00,  6.6752e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7154e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.5804e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7158e-09],
        [-2.0970e-07,  0.0000e+00, -1.7924e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9553e-10,  0.0000e+00, -1.1708e-09,  ...,  0.0000e+00,
          0.0000e+00, -3.4908e-10],
        [-2.9436e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6681e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.5657e-05,  0.0000e+00, -7.5474e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.1133e-05],
        [-1.4324e-08,  0.0000e+00, -1.7676e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  3.7582e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.6773e-06,  0.0000e+00, -1.2254e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.5471e-06],
        [-4.6977e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4792e-04,  0.0000e+00,  1.7106e-03,  ...,  0.0000e+00,
          0.0000e+00,  5.0146e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6525e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.4451e-05],
        ...,
        [-2.9009e-06,  0.0000e+00,  8.9179e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.3951e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.0246e-06],
        [ 3.8043e-09,  0.0000e+00, -6.4644e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.4611e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6677e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1045e-07],
        [ 0.0000e+00,  0.0000e+00, -1.8398e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.9490e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5700e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5181e-06,  0.0000e+00,  5.6446e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.2247e-06],
        ...,
        [ 4.6930e-10,  0.0000e+00, -3.1397e-09,  ...,  0.0000e+00,
          0.0000e+00,  2.1353e-10],
        [ 1.0367e-08,  0.0000e+00, -3.9652e-07,  ...,  0.0000e+00,
          0.0000e+00, -7.0600e-08],
        [ 0.0000e+00,  0.0000e+00, -8.0624e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6525e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9115e-09],
        [-1.4144e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.7956e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.7693e-04,  0.0000e+00, -2.4569e-03,  ...,  0.0000e+00,
          0.0000e+00,  2.1523e-03],
        [-6.5503e-09,  0.0000e+00, -5.0126e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.6732e-08],
        [-8.6082e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.6633e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.1636e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0285e-13,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.8343e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.1956e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.9121e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -8.3893e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.3403e-08,  0.0000e+00,  1.4860e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2831e-09,  0.0000e+00, -1.2945e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.2192e-07,  0.0000e+00,  2.2901e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.4181e-06],
        [ 1.4452e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3390e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9711e-06,  0.0000e+00, -4.2012e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2212e-06,  0.0000e+00,  6.3298e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.1561e-06],
        [ 6.4549e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6899e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.6419e-09],
        [ 5.2590e-11,  0.0000e+00, -1.3251e-10,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.4170e-08,  0.0000e+00,  2.5697e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1011e-05,  0.0000e+00,  3.0812e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.7987e-05],
        [-1.4148e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1565e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4315e-06,  0.0000e+00, -9.0295e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.3789e-06],
        [ 4.1165e-08,  0.0000e+00, -7.3692e-08,  ...,  0.0000e+00,
          0.0000e+00,  5.8445e-08],
        [-4.1648e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0410e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.6324e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.6258e-05],
        [ 8.4533e-09,  0.0000e+00, -1.4121e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.4569e-08],
        ...,
        [-3.9004e-07,  0.0000e+00, -8.2045e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4310e-05,  0.0000e+00, -3.0140e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.4368e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.7180e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.9758e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.1804e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.8469e-07],
        [-1.1269e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.4100e-06],
        [-2.5459e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 2.1456e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8489e-06],
        [-1.8615e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.1265e-08,  0.0000e+00, -3.7546e-08,  ...,  0.0000e+00,
          0.0000e+00,  8.2673e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.3766e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.4659e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0603e-

Gradients values: tensor([[-2.6979e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9157e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0197e-06],
        [-2.5175e-08,  0.0000e+00,  5.1113e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.2412e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.4507e-07],
        [-5.3518e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.8323e-06,  0.0000e+00,  7.0028e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6009e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.9148e-09,  0.0000e+00,  1.1412e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1108e-07,  0.0000e+00, -1.1257e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.8652e-09,  0.0000e+00,  1.2765e-07,  ...,  0.0000e+00,
          0.0000e+00,  5.4204e-09],
        [-1.6953e-09,  0.0000e+00, -4.4863e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.5902e-09],
        [-4.6058e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7105e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.5795e-08,  0.0000e+00,  3.6848e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.0395e-08],
        [-4.3306e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.6248e-09,  0.0000e+00, -3.7179e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.7009e-09],
        [-2.4492e-09,  0.0000e+00, -5.9999e-09,  ...,  0.0000e+00,
          0.0000e+00, -5.0138e-09],
        [ 1.4088e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1664e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.5567e-10,  0.0000e+00, -6.0212e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.7394e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7542e-04],
        [-1.9995e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.5257e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  1.3824e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.9276e-09],
        [-2.4991e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9527e-05],
        ...,
        [-9.9644e-07,  0.0000e+00, -1.5541e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1461e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8420e-07],
        [ 0.0000e+00,  0.0000e+00, -3.0913e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.5621e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.9658e-06,  0.0000e+00,  2.9378e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.3176e-09,  0.0000e+00,  9.0194e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2264e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5853e-06],
        [ 0.0000e+00,  0.0000e+00, -2.4818e-05,  ...,  0.0000e+00,
          0.0000e+00, -4.8148e-06],
        [-2.6921e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.2935e-07,  0.0000e+00, -2.1951e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.8435e-07],
        [ 0.0000e+00,  0.0000e+00, -9.4643e-07,  ...,  0.0000e+00,
          0.0000e+00,  3.1399e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.6754e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.6839e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3201e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0566e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -2.3121e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.1124e-06],
        ...,
        [-5.6443e-08,  0.0000e+00,  5.3288e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.4180e-07],
        [ 2.7757e-09,  0.0000e+00, -5.2407e-08,  ...,  0.0000e+00,
          0.0000e+00, -8.4505e-09],
        [-8.2935e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6250e-06],
        [-4.4750e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.8053e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.0599e-05],
        [-1.0682e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.9985e-11],
        [ 1.4542e-07,  0.0000e+00, -4.1042e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.2459e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.1102e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.4014e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9811e-09,  0.0000e+00, -1.2465e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.2524e-09],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.9535e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0454e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.8088e-05],
        [-6.6261e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.2416e-06,  0.0000e+00,  6.1304e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.8102e-06],
        [ 5.2156e-06,  0.0000e+00, -5.9766e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.1635e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4032e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.1081e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3476e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.9079e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.1227e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2315e-07],
        [ 0.0000e+00,  0.0000e+00, -6.4688e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.4088e-06],
        ...,
        [-2.0571e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7388e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7481e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3595e-07],
        [-2.3632e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.1285e-10,  0.0000e+00, -2.5343e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.9347e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6608e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-6.9321e-05,  0.0000e+00,  2.5284e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.1671e-04],
        [ 0.0000e+00,  0.0000e+00, -2.2843e-07,  ...,  0.0000e+00,
          0.0000e+00,  5.3344e-08],
        [ 2.1206e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7084e-03],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0335e-04],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-9.8303e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1276e-08,  0.0000e+00, -5.6678e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.2141e-08],
        [ 0.0000e+00,  0.0000e+00,  5.3580e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0112e-06],
        [ 0.0000e+00,  0.0000e+00, -3.3583e-06,  ...,  0.0000e+00,
          0.0000e+00,  7.1747e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.2715e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.2068e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.4260e-07],
        [ 1.7416e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.2647e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.7123e-06],
        [-8.6630e-08,  0.0000e+00, -4.2851e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.2870e-07],
        [ 1.6615e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-7.4959e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6371e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7575e-06],
        [ 1.8706e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.7116e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.5412e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.2070e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.4695e-06],
        [ 1.0365e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9006e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6433e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.6559e-08,  0.0000e+00, -1.4942e-07,  ...,  0.0000e+00,
          0.0000e+00,  7.0656e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  3.4956e-08,  ...,  0.0000e+00,
          0.0000e+00,  2.8064e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4358e-05],
        [-3.9092e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8832e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.9300e-05],
        [-1.4397e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.3143e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1154e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.9888e-08],
        [ 4.5607e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0790e-06],
        [ 7.1047e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.8927e-10],
        ...,
        [-2.9722e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.4084e-05],
        [-1.5719e-07,  0.0000e+00, -1.7959e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.6525e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.2818e-08,  0.0000e+00,  3.8738e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.2132e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8721e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5882e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7324e-08],
        [ 0.0000e+00,  0.0000e+00, -8.5418e-08,  ...,  0.0000e+00,
          0.0000e+00, -9.9672e-08],
        ...,
        [ 2.2240e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8956e-06],
        [ 0.0000e+00,  0.0000e+00, -9.9557e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.9435e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9150e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1725e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.5631e-06],
        [-1.5537e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8827e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7915e-05],
        [-4.7458e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.4574e-08],
        [ 6.2218e-08,  0.0000e+00,  1.5726e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.4490e-07],
        ...,
        [ 3.8670e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7410e-07],
        [-3.0863e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6694e-07],
        [ 1.0370e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2838e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6471e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0010e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.7124e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.9457e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1924e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5607e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1940e-07],
        ...,
        [ 8.1508e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.4484e-09],
        [-3.5836e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.3158e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.9757e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.3040e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4668e-05],
        [-2.0773e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.6398e-11,  0.0000e+00, -4.8515e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.8759e-09,  0.0000e+00, -1.7417e-08,  ...,  0.0000e+00,
          0.0000e+00,  2.9873e-09],
        [-1.0353e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 3.6129e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.9513e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.2883e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8416e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.2509e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1605e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.5144e-09],
        [ 5.6558e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.4769e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0374e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.6136e-06],
        [-5.1246e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2934e-04],
        [-1.7058e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6493e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.1324e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.1672e-06,  0.0000e+00, -1.4186e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.2449e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.6373e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.1311e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.2284e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.3681e-07],
        ...,
        [-1.7418e-08,  0.0000e+00,  2.6261e-08,  ...,  0.0000e+00,
          0.0000e+00,  3.1593e-08],
        [ 3.1255e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.5199e-08],
        [ 0.0000e+00,  0.0000e+00,  1.4836e-09,  ...,  0.0000e+00,
          0.0000e+00,  1.0085e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3845e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.2032e-05],
        [ 4.3071e-09,  0.0000e+00, -2.9936e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.8102e-08],
        [ 1.0136e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.4704e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.9120e-07],
        [-1.1731e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0054e-05],
        [ 1.0160e-04,  0.0000e+00, -1.5894e-04,  ...,  0.0000e+00,
          0.0000e+00, -9.3219e-05],
        ...,
        [-1.6392e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.9880e-08,  0.0000e+00, -2.2293e-07,  ...,  0.0000e+00,
          0.0000e+00,  2.0325e-08],
        [ 2.7820e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6437e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.7221e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.5432e-06,  0.0000e+00, -1.7352e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.8695e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.4225e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7674e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5056e-08,  0.0000e+00, -3.9070e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.0224e-08],
        ...,
        [-5.2565e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.8082e-05],
        [-1.9531e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1940e-03],
        [ 0.0000e+00,  0.0000e+00,  2.0293e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.1379e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3383e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.6709e-08],
        [-2.0244e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.4933e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.3086e-07,  0.0000e+00,  5.6971e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.7314e-04],
        [-6.1568e-09,  0.0000e+00, -3.2976e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.8513e-05,  0.0000e+00,  6.0102e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9928e-05],
        [-4.3243e-08,  0.0000e+00,  1.9333e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.9989e-06,  0.0000e+00,  3.0781e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1117e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.1844e-08,  0.0000e+00,  7.9957e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5044e-07,  0.0000e+00, -3.9996e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.3702e-07],
        [ 8.3292e-07,  0.0000e+00, -3.0903e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.7459e-11,  0.0000e+00, -1.8273e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.5219e-10,  0.0000e+00, -1.1849e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.5405e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.8601e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.0963e-07,  0.0000e+00,  3.1607e-06,  ...,  0.0000e+00,
          0.0000e+00,  8.8480e-08],
        [ 1.5778e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3974e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 3.3628e-11,  0.0000e+00, -6.8973e-10,  ...,  0.0000e+00,
          0.0000e+00, -2.9189e-10],
        [ 1.2725e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0844e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0319e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.0284e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.4938e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4975e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 7.9395e-07,  0.0000e+00, -6.7052e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.4368e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.0398e-10,  0.0000e+00,  2.7825e-08,  ...,  0.0000e+00,
          0.0000e+00, -9.5017e-10],
        ...,
        [-2.7153e-09,  0.0000e+00, -1.3866e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.3459e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.3032e-05],
        [ 6.2897e-07,  0.0000e+00, -1.2168e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.7178e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.1845e-04,  0.0000e+00, -6.0680e-04,  ...,  0.0000e+00,
          0.0000e+00, -4.8472e-04],
        [ 0.0000e+00,  0.0000e+00,  6.1370e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.2937e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9081e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.6926e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4686e-07],
        [-2.5396e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8584e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2895e-07],
        [ 3.3276e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0589e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6664e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.2634e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.5957e-08],
        [-8.2962e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.2638e-12,  0.0000e+00, -1.9853e-11,  ...,  0.0000e+00,
          0.0000e+00,  1.8181e-12],
        [-5.3890e-09,  0.0000e+00, -3.3505e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.2108e-08],
        [-8.3835e-09,  0.0000e+00, -2.9006e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.1031e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  5.4688e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.5048e-08],
        [ 0.0000e+00,  0.0000e+00,  6.3161e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.2430e-09,  0.0000e+00,  1.2931e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.9917e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.9212e-07],
        [-6.9728e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6046e-08,  0.0000e+00,  8.8952e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.7544e-08],
        [-3.2469e-07,  0.0000e+00, -2.1553e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.3374e-06],
        [ 3.9102e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0205e-06],
        [-3.5079e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.9640e-05,  0.0000e+00, -5.3782e-05,  ...,  0.0000e+00,
          0.0000e+00, -5.8970e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.2748e-04],
        [ 0.0000e+00,  0.0000e+00, -1.3367e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.5250e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.8017e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.7629e-07],
        [ 0.0000e+00,  0.0000e+00, -1.2365e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.6282e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0394e-06],
        ...,
        [ 1.4953e-09,  0.0000e+00, -1.0652e-07,  ...,  0.0000e+00,
          0.0000e+00, -7.1333e-08],
        [-2.8427e-06,  0.0000e+00,  6.0201e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.0982e-06],
        [ 0.0000e+00,  0.0000e+00, -4.9002e-04,  ...,  0.0000e+00,
          0.0000e+00,  3.2081e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.5093e-04,  0.0000e+00,  1.4654e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6141e-08,  0.0000e+00, -1.1887e-06,  ...,  0.0000e+00,
          0.0000e+00, -9.5940e-08],
        [-6.2124e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0418e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7735e-08],
        [-1.5096e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1462e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.3278e-06],
        ...,
        [-1.1942e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9170e-08],
        [-3.3733e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.4300e-10,  0.0000e+00, -1.8818e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.2025e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.7951e-09,  0.0000e+00, -1.8137e-08,  ...,  0.0000e+00,
          0.0000e+00,  2.1421e-09],
        [ 0.0000e+00,  0.0000e+00,  1.0977e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.9218e-08],
        [ 1.7872e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.4628e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.2804e-07],
        [ 2.4241e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.1188e-05],
        ...,
        [ 3.4321e-08,  0.0000e+00, -8.0328e-08,  ...,  0.0000e+00,
          0.0000e+00, -1.7322e-07],
        [ 3.3635e-08,  0.0000e+00,  2.4159e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.3930e-08],
        [ 4.3691e-09,  0.0000e+00,  1.6569e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.1821e-10,  0.0000e+00, -5.5046e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1896e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.9577e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2426e-05],
        [-7.5262e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.4282e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.4339e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.7856e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.7218e-08,  0.0000e+00,  8.6884e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.8767e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7147e-09]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[0.0000e+00, 0.0000e+00, 3.0229e-07,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+0

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6615e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9653e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0778e-07],
        ...,
        [-5.2217e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9600e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0502e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8247e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.6935e-08,  0.0000e+00, -1.2700e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.0638e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.3070e-09,  0.0000e+00, -4.4456e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.3901e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  3.7087e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.4679e-09,  0.0000e+00, -2.1575e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.4899e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4967e-08],
        [-3.3785e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7011e-10],
        [ 8.5672e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3726e-03],
        [-9.6304e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-3.9189e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.8333e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.7824e-06],
        [-8.0382e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0297e-07,  0.0000e+00, -1.1185e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6714e-04],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.5602e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.7967e-08],
        [-4.7795e-08,  0.0000e+00, -6.8376e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.1244e-07,  0.0000e+00, -2.7752e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5505e-06],
        [ 5.5515e-06,  0.0000e+00,  1.9021e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9661e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.9599e-05,  0.0000e+00, -1.1175e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.8376e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9944e-05],
        [ 1.2141e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3768e-06],
        [-3.8486e-06,  0.0000e+00,  3.3810e-06,  ...,  0.0000e+00,
          0.0000e+00, -3.0176e-06],
        ...,
        [ 2.6021e-07,  0.0000e+00,  4.4421e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7740e-08],
        [-4.8396e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.3053e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 9.9378e-09,  0.0000e+00, -4.3883e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1199e-05],
        [ 0.0000e+00,  0.0000e+00,  3.6719e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.3530e-05],
        [ 5.5674e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.3403e-05],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.2411e-05],
        [ 1.1969e-07,  0.0000e+00, -2.6441e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.4456e-06],
        [ 0.0000e+00,  0.0000e+00,  2.0902e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.6362e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1060e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.3787e-07,  0.0000e+00,  1.0124e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.9443e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1236e-08],
        [-5.8737e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.9747e-05,  0.0000e+00, -7.5145e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.0034e-08,  0.0000e+00,  1.0541e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.8761e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -2.1333e-03,  ...,  0.0000e+00,
          0.0000e+00, -5.1239e-03],
        [ 2.8592e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-9.0910e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -6.6737e-10,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3031e-08],
        [ 1.1680e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.4100e-07,  0.0000e+00, -7.9258e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.1765e-07],
        [ 0.0000e+00,  0.0000e+00,  6.3264e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.6122e-06],
        [-1.4345e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6678e-04],
        ...,
        [-4.2933e-08,  0.0000e+00, -2.6401e-07,  ...,  0.0000e+00,
          0.0000e+00, -8.9160e-08],
        [ 5.5590e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.6730e-06],
        [ 1.7818e-08,  0.0000e+00, -4.4996e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.5460e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5944e-09],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.2353e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3292e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6967e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.7560e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4648e-07],
        [-8.7729e-06,  0.0000e+00,  7.8912e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.0713e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4283e-11,  0.0000e+00, -1.3522e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  3.3479e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.3519e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6513e-08,  0.0000e+00,  8.2138e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.8928e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.5827e-07],
        [ 5.1887e-12,  0.0000e+00, -4.3225e-10,  ...,  0.0000e+00,
          0.0000e+00, -6.7852e-11],
        ...,
        [-5.6655e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.2244e-08,  0.0000e+00, -1.1934e-07,  ...,  0.0000e+00,
          0.0000e+00, -9.6836e-08],
        [-2.0708e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.8482e-09,  0.0000e+00,  1.7311e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.8939e-08,  0.0000e+00, -3.8644e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.3564e-08],
        [ 4.2219e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.7021e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.5054e-08],
        [ 3.5916e-07,  0.0000e+00, -2.4429e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -3.9848e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.0699e-06],
        [ 3.6748e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.3911e-08],
        [ 1.4561e-06,  0.0000e+00, -1.9363e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.2047e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.2481e-08,  0.0000e+00, -5.8367e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.0354e-07],
        [-1.8701e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.6208e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.6143e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.1779e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9916e-09],
        [-2.3964e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.2341e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5655e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.9746e-08,  0.0000e+00,  1.9788e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.1394e-07],
        [ 0.0000e+00,  0.0000e+00, -4.5458e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.8310e-06],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.7362e-07,  0.0000e+00, -2.0721e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.4383e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -4.6780e-09,  ...,  0.0000e+00,
          0.0000e+00,  3.6218e-09],
        ...,
        [-2.2364e-07,  0.0000e+00,  3.6906e-07,  ...,  0.0000e+00,
          0.0000e+00, -2.1938e-07],
        [ 1.0992e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1534e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7873e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.0633e-08,  0.0000e+00,  1.5020e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.3792e-08],
        [-8.0791e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0614e-09],
        [ 0.0000e+

Gradients values: tensor([[-1.5319e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.0646e-06,  0.0000e+00,  1.6365e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.6288e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.5384e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3704e-12],
        [-1.2937e-06,  0.0000e+00,  4.4628e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.0890e-06],
        [ 0.0000e+00,  0.0000e+00,  2.5588e-04,  ...,  0.0000e+00,
          0.0000e+00, -6.6344e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.1767e-10,  ...,  0.0000e+00,
          0.0000e+00,  3.9125e-11],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4632e-09,  0.0000e+00, -2.0660e-08,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.3305e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.7480e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6271e-10],
        [ 2.8197e-09,  0.0000e+00, -4.2417e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-2.3753e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9003e-06],
        [-8.4341e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0020e-07],
        [ 0.0000e+00,  0.0000e+00,  1.8139e-04,  ...,  0.0000e+00,
          0.0000e+00,  1.1092e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.2010e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5851e-08],
        [-8.0719e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.7245e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.6408e-09],
        [-2.3346e-08,  0.0000e+00, -1.3188e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.4117e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6901e-06],
        [ 1.0647e-06,  0.0000e+00, -1.6681e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.6192e-06],
        [ 1.2920e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.2555e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.7048e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.5505e-07],
        [ 0.0000e+00,  0.0000e+00,  1.2533e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.0840e-09],
        [ 1.4063e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.1512e-09,  0.0000e+00, -2.8276e-08,  ...,  0.0000e+00,
          0.0000e+00, -4.9316e-09],
        [-5.6711e-07,  0.0000e+00,  1.6579e-07,  ...,  0.0000e+00,
          0.0000e+00, -5.2302e-07],
        [ 0.0000e+00,  0.0000e+00, -4.6991e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.6889e-07],
        ...,
        [-4.4008e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.8355e-06,  0.0000e+00, -3.1527e-06,  ...,  0.0000e+00,
          0.0000e+00,  3.8071e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3206e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3315e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.3817e-06,  0.0000e+00,  2.3786e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.2196e-06],
        [ 1.0099e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.6521e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.3437e-07],
        [ 1.2453e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0049e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.7681e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4817e-06],
        [ 7.7029e-08,  0.0000e+00, -2.3257e-05,  ...,  0.0000e+00,
          0.0000e+00, -3.5936e-06],
        [-1.1999e-05,  0.0000e+00,  3.9841e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.7327e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.1463e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3153e-05],
        [ 2.0819e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1581e-09],
        [ 2.7586e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.4584e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8952e-08],
        [-7.1191e-04,  0.0000e+00,  5.7954e-04,  ...,  0.0000e+00,
          0.0000e+00, -1.2777e-03],
        [ 9.3443e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-3.2177e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.8931e-05],
        [ 1.2022e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.0134e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6713e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.8879e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.4544e-05],
        [-9.4508e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0478e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.2896e-04,  0.0000e+00,  9.8842e-04,  ...,  0.0000e+00,
          0.0000e+00,  5.7951e-04],
        [ 4.6709e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.1768e-08],
        [ 1.5525e-09,  0.0000e+00, -3.5745e-08,  ...,  0.0000e+00,
          0.0000e+00, -6.1470e-09],
        ...,
        [ 1.8844e-05,  0.0000e+00, -2.5178e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.7569e-05],
        [-1.2456e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.0056e-07,  0.0000e+00, -3.1104e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.9096e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3432e-07,  0.0000e+00,  1.0430e-07,  ...,  0.0000e+00,
          0.0000e+00,  9.2539e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.9102e-07,  0.0000e+00, -1.4505e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.7064e-07],
        [ 1.8911e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.5202e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.1951e-09],
        ...,
        [ 8.3472e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.9485e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.5919e-07,  0.0000e+00,  2.0539e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.0416e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.0327e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3129e-08],
        [ 3.4996e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00, -3.6173e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.6940e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1257e-06],
        [-1.4365e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 7.8828e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5476e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.7069e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.1437e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  5.5961e-05,  ...,  0.0000e+00,
          0.0000e+00, -2.5251e-05],
        [-3.5377e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2626e-05],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.1032e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-5.5583e-07,  0.0000e+00,  1.3123e-06,  ...,  0.0000e+00,
          0.0000e+00, -6.7175e-07],
        [ 0.0000e+00,  0.0000e+00,  1.2763e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2009e-05],
        [ 0.0000e+00,  0.0000e+00,  4.9353e-10,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0873e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -9.7544e-10,  ...,  0.0000e+00,
          0.0000e+00, -6.6895e-10],
        [ 0.0000e+

Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3648e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.8098e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.7097e-04,  ...,  0.0000e+00,
          0.0000e+00, -3.7437e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.0145e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.5891e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.9797e-07,  0.0000e+00, -4.3341e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.2294e-07],
        [ 0.0000e+00,  0.0000e+00, -5.7854e-07,  ...,  0.0000e+00,
     

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  9.2968e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3324e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.3110e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.0251e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.6418e-08],
        [-4.5674e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.0195e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.0003e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.0528e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.6056e-09,  0.0000e+00, -3.0532e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.8648e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.4853e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 1.3429e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 3.5838e-03,  0.0000e+00, -7.1646e-04,  ...,  0.0000e+00,
          0.0000e+00,  9.7177e-04],
        [ 9.0233e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.2506e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.4715e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.3567e-04],
        [-1.0226e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.1619e-06],
        [-8.5826e-10,  0.0000e+00, -8.9510e-09,  ...,  0.0000e+00,
          0.0000e+00, -2.1169e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 6.7715e-08,  0.0000e+00, -2.1967e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.0136e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.9660e-05,  0.0000e+00, -9.0563e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.6588e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.7884e-07],
        [ 4.4086e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.7731e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.8705e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.4425e-07],
        [ 2.0874e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.4997e-06,  0.0000e+00, -6.5104e-06,  ...,  0.0000e+00,
          0.0000e+00,  2.2921e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  5.3295e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.5158e-07],
        [ 2.9385e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.3504e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 3.6887e-06,  0.0000e+00,  8.2844e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6264e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0271e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.0984e-06,  0.0000e+00,  6.3558e-05,  ...,  0.0000e+00,
          0.0000e+00,  3.4515e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1482e-03],
        [-6.5579e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.8144e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -8.0121e-08],
        [-2.2135e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.7971e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.9283e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0905e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -9.3828e-08],
        [-1.3834e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.1379e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -7.6966e-08],
        ...,
        [-2.8899e-07,  0.0000e+00, -2.1310e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.0275e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.6330e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.9404e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.1131e-06,  0.0000e+00, -9.0492e-05,  ...,  0.0000e+00,
          0.0000e+00, -1.8353e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.4813e-10],
        [-6.1166e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.2696e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.6525e-07,  0.0000e+00,  5.6297e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.4932e-07],
        ...,
        [-1.6135e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0129e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  1.8797e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8927e-06,  0.0000e+00,  2.0531e-05,  ...,  0.0000e+00,
          0.0000e+00,  5.8947e-07],
        [-1.1137e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5064e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6080e-08],
        [-2.3273e-06,  0.0000e+00, -2.4337e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -1.4593e-08,  ...,  0.0000e+00,
          0.0000e+00, -7.0848e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.3479e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.2970e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.3707e-07],
        [-7.0073e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.0197e-08],
        ...,
        [-6.8309e-09,  0.0000e+00, -1.3836e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.1917e-08],
        [ 0.0000e+00,  0.0000e+00, -1.7811e-06,  ...,  0.0000e+00,
          0.0000e+00, -7.8585e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.3099e-04]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.3128e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-6.3027e-06,  0.0000e+00,  1.0228e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.0006e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.5668e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.3463e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9638e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.4741e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -5.5457e-08],
        [ 4.9833e-07,  0.0000e+00, -9.8657e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.1614e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.4360e-05,  0.0000e+00, -2.5054e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2316e-07,  0.0000e+00, -2.7083e-08,  ...,  0.0000e+00,
          0.0000e+00, -3.2329e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.3028e-06],
        ...,
        [-8.9211e-10,  0.0000e+00, -5.4507e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5643e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8071e-06,  0.0000e+00, -7.6733e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.4834e-06]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.0916e-10],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.6517e-07],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.2278e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  5.0179e-08],
        [ 5.3373e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.4056e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2025e-05],
        ...,
        [-5.4478e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.3717e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.8517e-08,  0.0000e+00, -6.6500e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-3.7823e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.4364e-08],
        [ 1.2924e-07,  0.0000e+00, -2.6170e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.1071e-07],
        [ 1.2856e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 5.3458e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7818e-04,  0.0000e+00, -1.1793e-03,  ...,  0.0000e+00,
          0.0000e+00, -7.6370e-04],
        [ 1.1301e-12,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.9860e-11],
        ...,
        [-1.0419e-07,  0.0000e+00,  6.7421e-07,  ...,  0.0000e+00,
          0.0000e+00, -3.7975e-06],
        [ 2.5867e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.5561e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.9641e-09,  0.0000e+00, -9.7639e-08,  ...,  0.0000e+00,
          0.0000e+00,  1.5146e-09],
        [ 3.0661e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.5235e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.7799e-04,  0.0000e+00,  3.1029e-03,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.2854e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -7.2007e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4777e-06,  0.0000e+00, -3.6991e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5714e-07,  0.0000e+00, -4.1593e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-5.8913e-09,  0.0000e+00,  1.5647e-07,  ...,  0.0000e+00,
          0.0000e+00,  1.5913e-08],
        [ 0.0000e+00,  0.0000e+00,  2.1327e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.8265e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8108e-05],
        [-1.2779e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.2567e-10],
        [ 0.0000e+00,  0.0000e+00,  8.3299e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.5566e-07],
        ...,
        [ 5.2573e-09,  0.0000e+00,  7.7035e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.7650e-08,  0.0000e+00,  4.7690e-07,  ...,  0.0000e+00,
          0.0000e+00,  9.3134e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.2613e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6764e-03,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.6905e-04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.6490e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.8241e-11,  0.0000e+00,  8.9824e-10,  ...,  0.0000e+00,
          0.0000e+00,  1.4879e-10],
        [ 1.0841e-05,  0.0000e+00, -7.6013e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  6.4975e-06],
        ...,
        [-4.6989e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  2.0648e-06],
        [ 3.1292e-06,  0.0000e+00,  6.8790e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.7507e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 9.8027e-08,  0.0000e+00, -4.7229e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.5345e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0106e-07],
        [ 8.0195e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 2.3159e-11,  0.0000e+00, -8.3189e-10,  ...,  0.0000e+00,
          0.0000e+00, -1.7751e-10],
        [ 1.0250e-08,  0.0000e+00, -3.0908e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.0654e-08],
        [-2.0345e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.6230e-07],
        ...,
        [ 1.6860e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.3326e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.7047e-08],
        [-5.8224e-07,  0.0000e+00,  1.2534e-06,  ...,  0.0000e+00,
          0.0000e+00, -5.4234e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.3394e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-4.0759e-09,  0.0000e+00, -4.5759e-08,  ...,  0.0000e+00,
          0.0000e+00, -5.6570e-08],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.8402e-09,  0.0000e+00, -1.1191e-07,  ...,  0.0000e+00,
          0.0000e+00, -1.1749e-08],
        [-2.4181e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.8391e-06],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.8244e-06],
        [-3.1838e-09,  0.0000e+00,  2.3356e-08,  ...,  0.0000e+00,
          0.0000e+00, -2.3239e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.3554e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.1738e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5452e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.5116e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9691e-03],
        ...,
        [-1.4663e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  3.0139e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.7845e-05],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 8.7660e-13,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-2.6640e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 1.3360e-05,  0.0000e+00, -3.4213e-05,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4503e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6651e-07,  0.0000e+00, -1.3278e-06,  ...,  0.0000e+00,
          0.0000e+00,  1.7473e-07],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.4556e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.0386e-07],
        [ 0.0000e+00,  0.0000e+00,  1.4129e-09,  ...,  0.0000e+00,
          0.0000e+00,  8.1923e-10]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -3.1253e-06],
        [ 4.3855e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.0044e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.3269e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  6.0576e-10,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-1.1690e-07,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.9713e-07],
        [ 0.0000e+00,  0.0000e+00, -1.0214e-05,  ...,  0.0000e+00,
          0.0000e+00,  2.8757e-06],
        [ 0.0000e+00,  0.0000e+00,  2.6083e-07,  ...,  0.0000e+00,
          0.0000e+00, -7.0315e-08]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 7.3726e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -6.7601e-09],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  5.4811e-11,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-8.8269e-10,  0.0000e+00, -1.3301e-09,  ...,  0.0000e+00,
          0.0000e+00, -1.1667e-09],
        ...,
        [ 0.0000e+00,  0.0000e+00, -3.7860e-05,  ...,  0.0000e+00,
          0.0000e+00,  1.4522e-06],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-6.2257e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.6067e-07],
        [-2.1088e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 6.6428e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.7639e-10,  0.0000e+00, -1.3826e-09,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  1.7028e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.3082e-07,  0.0000e+00, -2.2835e-08,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5687e-05,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.0386e-04],
        [-1.5234e-05,  0.0000e+00,  2.7657e-04,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-8.8199e-05,  0.0000e+00,  1.6428e-03,  ...,  0.0000e+00,
          0.0000e+00,  3.5204e-04],
        [ 0.0000e+00,  0.0000e+00, -1.8720e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-4.6497e-07,  0.0000e+00, -1.3156e-06,  ...,  0.0000e+00,
          0.0000e+00, -2.4346e-06],
        [-8.5492e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [-5.6472e-08,  0.0000e+00,  8.9711e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 6.4453e-06,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-2.6204e-06,  0.0000e+00,  1.1150e-06,  ...,  0.0000e+00,
          0.0000e+00, -1.2266e-06],
        [-1.3933e-09,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  1.1643e-10],
        [-2.5850e-

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 4.9497e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 5.5658e-08,  0.0000e+00, -8.4887e-07,  ...,  0.0000e+00,
          0.0000e+00, -4.7716e-08],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -4.0806e-07],
        ...,
        [ 2.7923e-11,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.5718e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  1.1514e-04,  ...,  0.0000e+00,
          0.0000e+00, -2.3997e-05]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-1.6746e-06,  0.0000e+00,  3.1915e-07,  ...,  0.0000e+00,
          0.0000e+00, -6.2044e-07],
        [ 1.8794e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+

Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9719e-08],
        [ 1.4300e-04,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.0222e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -1.0947e-08],
        ...,
        [ 0.0000e+00,  0.0000e+00, -2.1533e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-7.0204e-10,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00, -2.9322e-09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  7.0149e-07]])
Backward pass: Gradients shape: torch.Size([64, 128])
Gradients values: tensor([[-7.3159e-08,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  4.3085e-07],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-1.9633e-

In [15]:
print(model.explainable)

[tensor([[ 2.3839e-02, -5.8466e-03,  5.9758e-03,  ...,  1.7368e-02,
          8.9825e-03,  2.3738e-02],
        [ 3.9045e-02,  1.5037e-02, -1.3869e-04,  ...,  3.9311e-02,
          1.7824e-02,  5.9217e-03],
        [ 8.4408e-03, -5.1010e-04,  2.5691e-02,  ...,  1.4301e-02,
          3.9605e-02,  2.8213e-02],
        ...,
        [ 9.3567e-03,  2.7215e-02,  2.3847e-02,  ..., -1.0386e-02,
          3.9111e-02, -3.2332e-05],
        [-5.5109e-03,  2.2485e-02, -6.0024e-03,  ..., -1.0203e-02,
          1.1226e-03,  4.0364e-02],
        [ 3.8047e-02,  5.5602e-03,  3.1558e-02,  ...,  2.5590e-02,
          4.1677e-02, -6.0021e-03]]), tensor([ 0.0017, -0.0186, -0.0334, -0.0201,  0.0115, -0.0041,  0.0229, -0.0456,
        -0.0130,  0.0087, -0.0133,  0.0084,  0.0027,  0.0115,  0.0151,  0.0367,
         0.0281, -0.0224, -0.0074, -0.0018, -0.0173, -0.0050,  0.0172, -0.0037,
         0.0111,  0.0030,  0.0179,  0.0126,  0.0099,  0.0349, -0.0024, -0.0256,
        -0.0425, -0.0187, -0.0155, -0.0207,  0

In [10]:
print(model.fc1.weight.data[:])

tensor([[ 0.0007, -0.0111, -0.0226,  ..., -0.0528, -0.0509, -0.0127],
        [-0.0060, -0.0219, -0.0113,  ..., -0.0063,  0.0117,  0.0366],
        [-0.0145,  0.0238, -0.0271,  ...,  0.0280,  0.0426, -0.0143],
        ...,
        [ 0.0091, -0.0142,  0.0320,  ...,  0.0352,  0.0364,  0.0087],
        [ 0.0221,  0.0335, -0.0170,  ...,  0.0320, -0.0005,  0.0402],
        [-0.0177,  0.0455,  0.0135,  ...,  0.0097,  0.0263, -0.0220]])
