In [1]:
import os
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split

from torch_geometric.nn import global_mean_pool
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch
from torch_geometric.nn import GCNConv, VGAE

import os
import kagglehub
from kagglehub import KaggleDatasetAdapter

import pandas as pd

from tqdm import tqdm
from tqdm.contrib import tmap
from tqdm.contrib.concurrent import process_map

from torchvision import transforms

from concurrent.futures import ProcessPoolExecutor

from lib.lib import SiameseSignatureDataset

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

# Data Preparation

## prepare data from mallapraveen/signature-matching
## and construct it using data.csv

In [12]:
df = pd.read_csv('data.csv')

def dataset_path():
    path = kagglehub.dataset_download("mallapraveen/signature-matching")
    return os.path.join(path, 'custom\\full')

def transform(**kwargs):
    return transforms.Compose([
        transforms.Grayscale(num_output_channels=kwargs['num_output_channels']),
        transforms.Resize(kwargs['resize']),
        transforms.ToTensor(),
    ])
    
dataset = SiameseSignatureDataset(
    root_dir=dataset_path(),
    signer_folders=df,
    transform=transform(num_output_channels=1, resize=(32, 32)
))

Loaded 85246 signature images (genuine + forged)


## split the data
### train dataset & validation dataset

In [13]:
total_size = len(dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size
train_dataset, val_dataset = random_split(
    dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)
print(f"Dataset sizes - Train: {train_size}, Validation: {val_size}")

Dataset sizes - Train: 68196, Validation: 17050


In [14]:
train_dataset[0]

(Data(x=[1024, 3], edge_index=[2, 3968]),
 Data(x=[1024, 3], edge_index=[2, 3968]),
 0)

## load the data using dataloader

In [15]:
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=4
)

val_loader = DataLoader(
    val_dataset,
    batch_size=256,
    shuffle=False,
    num_workers=4
)

In [16]:
next(iter(train_loader))

[DataBatch(x=[32768, 3], edge_index=[2, 126976], batch=[32768], ptr=[33]),
 DataBatch(x=[32768, 3], edge_index=[2, 126976], batch=[32768], ptr=[33]),
 tensor([1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0,
         0, 0, 0, 1, 0, 0, 1, 0])]

# Model Preparation

In [17]:
class GNNEncoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, latent_dim):
        super(GNNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv_mu = GCNConv(hidden_channels, latent_dim)
        self.conv_logvar = GCNConv(hidden_channels, latent_dim)

    def forward(self, x, edge_index):
        # Step 1: Aggregate node features from neighbors
        x = F.relu(self.conv1(x, edge_index))

        # Step 2: Output mean and log variance
        mu = self.conv_mu(x, edge_index)
        logvar = self.conv_logvar(x, edge_index)

        return mu, logvar

In [73]:
class SiameseNetwork(nn.Module):
    def __init__(self, fe_model, latent_dim):
        super(SiameseNetwork, self).__init__()
        self.encoder = fe_model
        self.embedding_dim = latent_dim
        
        self.projector = nn.Sequential(
            nn.Linear(self.embedding_dim, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),

            nn.Linear(128, 64),
            nn.ReLU(inplace=True),

            nn.Linear(64, 2)
        )

    def forward_once(self, x, edge_index, batch):
        mu, _ = self.encoder(x, edge_index)
        graph_emb = global_mean_pool(mu, batch) 
        # x = torch.flatten(x, 1)
        # return x
        return graph_emb

    def forward(self, x1, x2,
               edge_index1, edge_index2,
               batch):
        emb1 = self.forward_once(x1, edge_index1, batch)
        emb2 = self.forward_once(x2, edge_index2, batch)

        # Combine embeddings (abs difference works well for verification)
        combined = torch.abs(emb1 - emb2)

        # Predict same/forged
        out = self.projector(combined)
        return out

# Hyperparameters

In [74]:
w_d = 1e-5
epochs = 50
learning_rate = 1e-3
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

# Training Preparation

In [75]:
img1, _, _ = next(iter(train_loader))

input_dim = img1.x.shape[1]
hidden_dim = 64
latent_dim = 128

In [76]:
# Load your trained GNN-VAE
checkpoint = torch.load('VGAE_Model.pt', map_location=device)
vgae = VGAE(GNNEncoder(in_channels=input_dim, hidden_channels=hidden_dim, latent_dim=latent_dim)).to(device)
vgae.load_state_dict(checkpoint)
vgae.eval()

VGAE(
  (encoder): GNNEncoder(
    (conv1): GCNConv(3, 64)
    (conv_mu): GCNConv(64, 128)
    (conv_logvar): GCNConv(64, 128)
  )
  (decoder): InnerProductDecoder()
)

In [77]:
model = SiameseNetwork(vgae, latent_dim=128).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

## train steps

In [78]:
def train_step(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss, correct, total = 0.0, 0, 0

    for x1, x2, label in tqdm(dataloader, desc="Training", leave=False):
        x1, x2, label = x1.to(device), x2.to(device), label.to(device)

        # Forward
        output = model(x1.x.to(device),
                    x2.x.to(device),
                    x1.edge_index.to(device),
                    x2.edge_index.to(device),
                    x1.batch)  # logits shape: [batch, 2]

        print(output)
        
        loss = criterion(output, label)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        total_loss += loss.item() * x1.size(0)
        preds = torch.argmax(output, dim=1)
        correct += (preds == label).sum().item()
        total += label.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy

## validation steps

In [79]:
def val_step(model, dataloader, criterion, device):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    all_labels = []
    all_preds = []
    all_probs = []

    with torch.no_grad():
        for x1, x2, label in tqdm(dataloader, desc="Validating", leave=False):
            x1, x2, label = x1.to(device), x2.to(device), label.to(device)
            output = model(x1.x.to(device),
                    x2.x.to(device),
                    x1.edge_index.to(device),
                    x2.edge_index.to(device),
                    x1.batch)

            loss = criterion(output, label)
            total_loss += loss.item() * x1.size(0)

            probs = torch.softmax(output, dim=1)[:, 1]  # Probability of class 1 ("genuine")
            preds = torch.argmax(output, dim=1)

            all_labels.extend(label.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

            correct += (preds == label).sum().item()
            total += label.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy, np.array(all_labels), np.array(all_preds), np.array(all_probs)


# Training Phase

In [80]:
writer = SummaryWriter(log_dir="runs/siamese_signature_experiment")

best_auc = 0.0

for epoch in range(epochs):
    train_loss, train_acc = train_step(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, y_true, y_pred, y_prob = val_step(model, val_loader, criterion, device)

    # --- Confusion Matrix ---
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    fig_cm, ax_cm = plt.subplots(figsize=(4, 4))
    disp.plot(ax=ax_cm, cmap="Blues", colorbar=False)
    writer.add_figure("ConfusionMatrix/val", fig_cm, global_step=epoch)
    plt.close(fig_cm)

    # --- ROC Curve and AUC ---
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)
    fig_roc, ax_roc = plt.subplots()
    ax_roc.plot(fpr, tpr, color='blue', lw=2, label=f"AUC = {roc_auc:.3f}")
    ax_roc.plot([0, 1], [0, 1], color='gray', linestyle='--')
    ax_roc.set_xlabel("False Positive Rate")
    ax_roc.set_ylabel("True Positive Rate")
    ax_roc.legend(loc="lower right")
    writer.add_figure("ROC/val", fig_roc, global_step=epoch)
    plt.close(fig_roc)

    # --- Precision, Recall, F1 ---
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="binary"
    )

    # Log scalar metrics
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Loss/val", val_loss, epoch)
    writer.add_scalar("Accuracy/train", train_acc, epoch)
    writer.add_scalar("Accuracy/val", val_acc, epoch)
    writer.add_scalar("AUC/val", roc_auc, epoch)
    writer.add_scalar("Precision/val", precision, epoch)
    writer.add_scalar("Recall/val", recall, epoch)
    writer.add_scalar("F1/val", f1, epoch)

    print(f"Epoch [{epoch+1}/{epochs}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} "
          f"| Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} "
          f"| AUC: {roc_auc:.4f} | F1: {f1:.4f}")

    # --- Save best model by AUC ---
    if roc_auc > best_auc:
        best_auc = roc_auc
        torch.save(model.state_dict(), "best_siamese_signature.pth")

writer.close()


Training:   0%|                                                                    | 1/2132 [00:22<13:08:45, 22.21s/it]

tensor([[-0.0717,  0.0536],
        [-0.0675,  0.0492],
        [-0.0857,  0.0579],
        [-0.0815,  0.0552],
        [-0.0734,  0.0619],
        [-0.0684,  0.0462],
        [-0.0760,  0.0601],
        [-0.0856,  0.0561],
        [-0.0746,  0.0558],
        [-0.0804,  0.0523],
        [-0.0642,  0.0634],
        [-0.0599,  0.0670],
        [-0.0648,  0.0626],
        [-0.0850,  0.0530],
        [-0.0612,  0.0661],
        [-0.0723,  0.0476],
        [-0.0750,  0.0463],
        [-0.0908,  0.0584],
        [-0.0791,  0.0569],
        [-0.0736,  0.0497],
        [-0.0593,  0.0593],
        [-0.0798,  0.0512],
        [-0.0657,  0.0462],
        [-0.0831,  0.0482],
        [-0.0763,  0.0576],
        [-0.0766,  0.0608],
        [-0.0692,  0.0648],
        [-0.0645,  0.0522],
        [-0.0642,  0.0443],
        [-0.0708,  0.0518],
        [-0.0845,  0.0677],
        [-0.0738,  0.0581]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0664,  0.0611],
        [-0.0859,  0.0604],
    

Training:   0%|▎                                                                      | 9/2132 [00:22<53:49,  1.52s/it]

tensor([[-0.0815,  0.0559],
        [-0.0892,  0.0567],
        [-0.0849,  0.0582],
        [-0.0794,  0.0624],
        [-0.0775,  0.0471],
        [-0.0813,  0.0652],
        [-0.0731,  0.0534],
        [-0.0799,  0.0536],
        [-0.0753,  0.0591],
        [-0.0690,  0.0580],
        [-0.0686,  0.0491],
        [-0.0770,  0.0617],
        [-0.0698,  0.0562],
        [-0.0868,  0.0577],
        [-0.0804,  0.0668],
        [-0.0712,  0.0578],
        [-0.0799,  0.0559],
        [-0.0883,  0.0603],
        [-0.0736,  0.0686],
        [-0.0809,  0.0628],
        [-0.0806,  0.0699],
        [-0.0830,  0.0605],
        [-0.0670,  0.0573],
        [-0.0676,  0.0347],
        [-0.0884,  0.0538],
        [-0.0802,  0.0603],
        [-0.0725,  0.0627],
        [-0.0797,  0.0697],
        [-0.0841,  0.0688],
        [-0.0781,  0.0680],
        [-0.0746,  0.0462],
        [-0.0830,  0.0619]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0783,  0.0630],
        [-0.0934,  0.0679],
    

Training:   1%|▌                                                                     | 17/2132 [00:22<19:13,  1.83it/s]

tensor([[-0.0907,  0.0559],
        [-0.0825,  0.0658],
        [-0.0753,  0.0686],
        [-0.0793,  0.0590],
        [-0.0764,  0.0681],
        [-0.0762,  0.0572],
        [-0.0925,  0.0574],
        [-0.0709,  0.0507],
        [-0.0893,  0.0657],
        [-0.0901,  0.0690],
        [-0.0648,  0.0450],
        [-0.0772,  0.0561],
        [-0.0950,  0.0633],
        [-0.0728,  0.0491],
        [-0.0813,  0.0509],
        [-0.0918,  0.0641],
        [-0.0679,  0.0565],
        [-0.0780,  0.0649],
        [-0.0735,  0.0708],
        [-0.0757,  0.0560],
        [-0.0830,  0.0559],
        [-0.0726,  0.0617],
        [-0.0821,  0.0695],
        [-0.0680,  0.0722],
        [-0.0758,  0.0563],
        [-0.0878,  0.0625],
        [-0.0941,  0.0674],
        [-0.0863,  0.0635],
        [-0.0744,  0.0519],
        [-0.0884,  0.0544],
        [-0.0650,  0.0674],
        [-0.0685,  0.0557]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0807,  0.0586],
        [-0.0832,  0.0556],
    

Training:   1%|▋                                                                     | 21/2132 [00:22<12:59,  2.71it/s]

tensor([[-0.0790,  0.0473],
        [-0.0802,  0.0663],
        [-0.0765,  0.0570],
        [-0.0816,  0.0525],
        [-0.0786,  0.0771],
        [-0.0791,  0.0521],
        [-0.0809,  0.0542],
        [-0.0816,  0.0550],
        [-0.0868,  0.0661],
        [-0.0793,  0.0518],
        [-0.0715,  0.0688],
        [-0.0742,  0.0611],
        [-0.0738,  0.0581],
        [-0.0743,  0.0566],
        [-0.0829,  0.0453],
        [-0.0820,  0.0501],
        [-0.0900,  0.0577],
        [-0.0689,  0.0521],
        [-0.0868,  0.0539],
        [-0.0975,  0.0589],
        [-0.0938,  0.0565],
        [-0.0760,  0.0608],
        [-0.0788,  0.0596],
        [-0.0825,  0.0567],
        [-0.0998,  0.0699],
        [-0.0820,  0.0550],
        [-0.0824,  0.0709],
        [-0.0844,  0.0540],
        [-0.0880,  0.0649],
        [-0.0715,  0.0578],
        [-0.0814,  0.0433],
        [-0.0837,  0.0693]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0897,  0.0604],
        [-0.0638,  0.0581],
    

Training:   1%|▉                                                                     | 27/2132 [00:23<07:33,  4.65it/s]

tensor([[-0.0754,  0.0563],
        [-0.0701,  0.0545],
        [-0.0844,  0.0584],
        [-0.0804,  0.0612],
        [-0.0636,  0.0539],
        [-0.0819,  0.0499],
        [-0.0591,  0.0490],
        [-0.0654,  0.0483],
        [-0.0752,  0.0492],
        [-0.0765,  0.0667],
        [-0.0742,  0.0498],
        [-0.0689,  0.0561],
        [-0.0805,  0.0470],
        [-0.0628,  0.0575],
        [-0.0699,  0.0642],
        [-0.0862,  0.0518],
        [-0.0843,  0.0637],
        [-0.0856,  0.0556],
        [-0.0724,  0.0699],
        [-0.0835,  0.0696],
        [-0.0815,  0.0495],
        [-0.0673,  0.0619],
        [-0.0736,  0.0361],
        [-0.0763,  0.0611],
        [-0.0819,  0.0520],
        [-0.0759,  0.0441],
        [-0.0738,  0.0469],
        [-0.0721,  0.0619],
        [-0.0795,  0.0510],
        [-0.0598,  0.0502],
        [-0.0860,  0.0548],
        [-0.0946,  0.0669]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0782,  0.0557],
        [-0.0724,  0.0636],
    

Training:   2%|█                                                                     | 33/2132 [00:23<04:31,  7.73it/s]

tensor([[-0.0737,  0.0517],
        [-0.0762,  0.0391],
        [-0.0792,  0.0654],
        [-0.0772,  0.0552],
        [-0.0826,  0.0615],
        [-0.0928,  0.0516],
        [-0.0713,  0.0442],
        [-0.0811,  0.0648],
        [-0.0810,  0.0557],
        [-0.0720,  0.0626],
        [-0.0688,  0.0649],
        [-0.0681,  0.0478],
        [-0.0553,  0.0502],
        [-0.0683,  0.0534],
        [-0.0742,  0.0573],
        [-0.0845,  0.0533],
        [-0.0778,  0.0438],
        [-0.0851,  0.0631],
        [-0.0875,  0.0557],
        [-0.0853,  0.0499],
        [-0.0888,  0.0576],
        [-0.0740,  0.0615],
        [-0.0720,  0.0597],
        [-0.0807,  0.0628],
        [-0.0889,  0.0567],
        [-0.0800,  0.0527],
        [-0.0788,  0.0540],
        [-0.0790,  0.0549],
        [-0.0685,  0.0440],
        [-0.0863,  0.0572],
        [-0.0824,  0.0619],
        [-0.0711,  0.0601]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0693,  0.0494],
        [-0.0675,  0.0593],
    

Training:   2%|█▎                                                                    | 39/2132 [00:23<03:03, 11.43it/s]

tensor([[-0.0680,  0.0483],
        [-0.0634,  0.0576],
        [-0.0669,  0.0531],
        [-0.0790,  0.0441],
        [-0.0796,  0.0531],
        [-0.0859,  0.0498],
        [-0.0741,  0.0442],
        [-0.0735,  0.0395],
        [-0.0590,  0.0511],
        [-0.0587,  0.0428],
        [-0.0704,  0.0526],
        [-0.0685,  0.0471],
        [-0.0623,  0.0634],
        [-0.0713,  0.0471],
        [-0.0692,  0.0402],
        [-0.0698,  0.0549],
        [-0.0600,  0.0648],
        [-0.0875,  0.0447],
        [-0.0712,  0.0516],
        [-0.0760,  0.0540],
        [-0.0646,  0.0613],
        [-0.0810,  0.0498],
        [-0.0734,  0.0512],
        [-0.0752,  0.0554],
        [-0.0829,  0.0603],
        [-0.0781,  0.0534],
        [-0.0736,  0.0447],
        [-0.0735,  0.0528],
        [-0.0819,  0.0471],
        [-0.0899,  0.0480],
        [-0.0716,  0.0438],
        [-0.0794,  0.0462]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0710,  0.0452],
        [-0.0611,  0.0395],
    

Training:   2%|█▍                                                                    | 45/2132 [00:23<02:14, 15.51it/s]

tensor([[-0.0630,  0.0345],
        [-0.0657,  0.0489],
        [-0.0677,  0.0532],
        [-0.0843,  0.0573],
        [-0.0609,  0.0400],
        [-0.0768,  0.0486],
        [-0.0854,  0.0418],
        [-0.0725,  0.0478],
        [-0.0530,  0.0569],
        [-0.0780,  0.0546],
        [-0.0606,  0.0451],
        [-0.0624,  0.0571],
        [-0.0627,  0.0372],
        [-0.0708,  0.0497],
        [-0.0583,  0.0559],
        [-0.0706,  0.0456],
        [-0.0805,  0.0464],
        [-0.0728,  0.0578],
        [-0.0596,  0.0448],
        [-0.0648,  0.0471],
        [-0.0637,  0.0429],
        [-0.0727,  0.0434],
        [-0.0708,  0.0527],
        [-0.0809,  0.0584],
        [-0.0676,  0.0553],
        [-0.0658,  0.0544],
        [-0.0718,  0.0570],
        [-0.0628,  0.0606],
        [-0.0788,  0.0445],
        [-0.0584,  0.0436],
        [-0.0811,  0.0629],
        [-0.0687,  0.0473]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0727,  0.0562],
        [-0.0616,  0.0502],
    

Training:   2%|█▌                                                                    | 48/2132 [00:24<02:03, 16.83it/s]

tensor([[-0.0752,  0.0640],
        [-0.0633,  0.0409],
        [-0.0780,  0.0495],
        [-0.0712,  0.0516],
        [-0.0552,  0.0491],
        [-0.0670,  0.0452],
        [-0.0682,  0.0405],
        [-0.0798,  0.0566],
        [-0.0550,  0.0519],
        [-0.0475,  0.0292],
        [-0.0642,  0.0494],
        [-0.0582,  0.0541],
        [-0.0532,  0.0521],
        [-0.0591,  0.0489],
        [-0.0606,  0.0464],
        [-0.0653,  0.0335],
        [-0.0795,  0.0642],
        [-0.0714,  0.0498],
        [-0.0712,  0.0356],
        [-0.0690,  0.0528],
        [-0.0617,  0.0439],
        [-0.0686,  0.0523],
        [-0.0784,  0.0558],
        [-0.0664,  0.0597],
        [-0.0787,  0.0548],
        [-0.0488,  0.0550],
        [-0.0487,  0.0431],
        [-0.0688,  0.0373],
        [-0.0758,  0.0538],
        [-0.0744,  0.0399],
        [-0.0654,  0.0452],
        [-0.0689,  0.0641]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0423,  0.0402],
        [-0.0674,  0.0601],
    

Training:   3%|█▊                                                                    | 54/2132 [00:24<01:45, 19.72it/s]

tensor([[-0.0619,  0.0480],
        [-0.0723,  0.0352],
        [-0.0585,  0.0547],
        [-0.0641,  0.0533],
        [-0.0851,  0.0386],
        [-0.0682,  0.0355],
        [-0.0641,  0.0670],
        [-0.0592,  0.0404],
        [-0.0627,  0.0265],
        [-0.0544,  0.0397],
        [-0.0645,  0.0555],
        [-0.0755,  0.0480],
        [-0.0625,  0.0452],
        [-0.0604,  0.0461],
        [-0.0624,  0.0474],
        [-0.0728,  0.0419],
        [-0.0650,  0.0517],
        [-0.0391,  0.0446],
        [-0.0730,  0.0505],
        [-0.0452,  0.0382],
        [-0.0547,  0.0218],
        [-0.0437,  0.0373],
        [-0.0600,  0.0413],
        [-0.0689,  0.0606],
        [-0.0672,  0.0285],
        [-0.0842,  0.0543],
        [-0.0671,  0.0482],
        [-0.0530,  0.0474],
        [-0.0728,  0.0436],
        [-0.0726,  0.0376],
        [-0.0466,  0.0291],
        [-0.0616,  0.0387]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0748,  0.0514],
        [-0.0683,  0.0336],
    

Training:   3%|█▉                                                                    | 60/2132 [00:24<01:40, 20.71it/s]

tensor([[-0.0574,  0.0232],
        [-0.0574,  0.0591],
        [-0.0648,  0.0476],
        [-0.0514,  0.0392],
        [-0.0720,  0.0453],
        [-0.0458,  0.0512],
        [-0.0627,  0.0445],
        [-0.0485,  0.0357],
        [-0.0746,  0.0521],
        [-0.0580,  0.0323],
        [-0.0402,  0.0461],
        [-0.0626,  0.0343],
        [-0.0700,  0.0519],
        [-0.0433,  0.0410],
        [-0.0850,  0.0496],
        [-0.0592,  0.0453],
        [-0.0528,  0.0417],
        [-0.0661,  0.0497],
        [-0.0562,  0.0506],
        [-0.0828,  0.0505],
        [-0.0530,  0.0534],
        [-0.0514,  0.0433],
        [-0.0543,  0.0496],
        [-0.0687,  0.0625],
        [-0.0538,  0.0334],
        [-0.0367,  0.0388],
        [-0.0774,  0.0539],
        [-0.0857,  0.0497],
        [-0.0737,  0.0444],
        [-0.0622,  0.0350],
        [-0.0654,  0.0592],
        [-0.0473,  0.0340]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0671,  0.0431],
        [-0.0806,  0.0579],
    

Training:   3%|██                                                                    | 63/2132 [00:24<01:39, 20.72it/s]

tensor([[-0.0792,  0.0476],
        [-0.0577,  0.0541],
        [-0.0716,  0.0618],
        [-0.0593,  0.0640],
        [-0.0855,  0.0523],
        [-0.0675,  0.0388],
        [-0.0639,  0.0620],
        [-0.0579,  0.0442],
        [-0.0690,  0.0505],
        [-0.0586,  0.0511],
        [-0.0672,  0.0436],
        [-0.0707,  0.0465],
        [-0.0756,  0.0445],
        [-0.0667,  0.0600],
        [-0.0544,  0.0354],
        [-0.0861,  0.0352],
        [-0.0689,  0.0527],
        [-0.0482,  0.0182],
        [-0.0607,  0.0479],
        [-0.0598,  0.0372],
        [-0.0490,  0.0486],
        [-0.0740,  0.0487],
        [-0.0324,  0.0445],
        [-0.0433,  0.0390],
        [-0.0606,  0.0498],
        [-0.0399,  0.0314],
        [-0.0582,  0.0415],
        [-0.0730,  0.0480],
        [-0.0608,  0.0386],
        [-0.0640,  0.0519],
        [-0.0519,  0.0369],
        [-0.0676,  0.0475]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0501,  0.0192],
        [-0.0656,  0.0398],
    

Training:   3%|██▏                                                                   | 67/2132 [00:24<01:36, 21.43it/s]

tensor([[-0.0389,  0.0436],
        [-0.0359,  0.0321],
        [-0.0613,  0.0372],
        [-0.0452,  0.0497],
        [-0.0650,  0.0388],
        [-0.0784,  0.0411],
        [-0.0760,  0.0442],
        [-0.0692,  0.0567],
        [-0.0719,  0.0493],
        [-0.0533,  0.0456],
        [-0.0774,  0.0541],
        [-0.0598,  0.0560],
        [-0.0604,  0.0463],
        [-0.0522,  0.0393],
        [-0.0482,  0.0478],
        [-0.0572,  0.0556],
        [-0.0589,  0.0342],
        [-0.0364,  0.0371],
        [-0.0567,  0.0492],
        [-0.0759,  0.0477],
        [-0.0563,  0.0334],
        [-0.0516,  0.0394],
        [-0.0682,  0.0382],
        [-0.0609,  0.0412],
        [-0.0721,  0.0465],
        [-0.0703,  0.0210],
        [-0.0660,  0.0515],
        [-0.0808,  0.0459],
        [-0.0482,  0.0392],
        [-0.0432,  0.0460],
        [-0.0612,  0.0415],
        [-0.0725,  0.0506]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0451,  0.0593],
        [-0.0574,  0.0273],
    

Training:   4%|██▍                                                                   | 75/2132 [00:25<01:36, 21.40it/s]

tensor([[-0.0514,  0.0175],
        [-0.0711,  0.0475],
        [-0.0599,  0.0438],
        [-0.0769,  0.0507],
        [-0.0238,  0.0229],
        [-0.0777,  0.0561],
        [-0.0499,  0.0589],
        [-0.0385,  0.0412],
        [-0.0674,  0.0461],
        [-0.0526,  0.0311],
        [-0.0654,  0.0530],
        [-0.0448,  0.0332],
        [-0.0701,  0.0563],
        [-0.0392,  0.0412],
        [-0.0631,  0.0346],
        [-0.0678,  0.0478],
        [-0.0472,  0.0386],
        [-0.0651,  0.0484],
        [-0.0705,  0.0404],
        [-0.0606,  0.0475],
        [-0.0693,  0.0526],
        [-0.0671,  0.0671],
        [-0.0586,  0.0382],
        [-0.0702,  0.0428],
        [-0.0415,  0.0400],
        [-0.0640,  0.0490],
        [-0.0368,  0.0215],
        [-0.0498,  0.0489],
        [-0.0553,  0.0352],
        [-0.0600,  0.0428],
        [-0.0627,  0.0427],
        [-0.0586,  0.0233]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0421,  0.0335],
        [-0.0551,  0.0377],
    

Training:   4%|██▌                                                                   | 79/2132 [00:25<01:35, 21.49it/s]

tensor([[-0.0755,  0.0527],
        [-0.0643,  0.0345],
        [-0.0524,  0.0499],
        [-0.0558,  0.0281],
        [-0.0514,  0.0602],
        [-0.0584,  0.0452],
        [-0.0381,  0.0489],
        [-0.0516,  0.0262],
        [-0.0632,  0.0465],
        [-0.0666,  0.0467],
        [-0.0317,  0.0384],
        [-0.0504,  0.0408],
        [-0.0587,  0.0252],
        [-0.0527,  0.0453],
        [-0.0742,  0.0433],
        [-0.0547,  0.0483],
        [-0.0794,  0.0460],
        [-0.0193,  0.0285],
        [-0.0561,  0.0508],
        [-0.0389,  0.0443],
        [-0.0465,  0.0335],
        [-0.0588,  0.0490],
        [-0.0581,  0.0523],
        [-0.0693,  0.0475],
        [-0.0743,  0.0532],
        [-0.0483,  0.0273],
        [-0.0437,  0.0319],
        [-0.0438,  0.0391],
        [-0.0564,  0.0422],
        [-0.0760,  0.0369],
        [-0.0529,  0.0407],
        [-0.0493,  0.0454]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0663,  0.0423],
        [-0.0635,  0.0417],
    

Training:   4%|██▋                                                                   | 83/2132 [00:25<01:30, 22.59it/s]

tensor([[-0.0490,  0.0120],
        [-0.0324,  0.0354],
        [-0.0569,  0.0374],
        [-0.0332,  0.0401],
        [-0.0682,  0.0335],
        [-0.0772,  0.0635],
        [-0.0333,  0.0360],
        [-0.0679,  0.0583],
        [-0.0540,  0.0574],
        [-0.0615,  0.0518],
        [-0.0692,  0.0504],
        [-0.0673,  0.0492],
        [-0.0573,  0.0372],
        [-0.0876,  0.0520],
        [-0.0713,  0.0506],
        [-0.0502,  0.0335],
        [-0.0485,  0.0444],
        [-0.0578,  0.0293],
        [-0.0690,  0.0431],
        [-0.0296,  0.0248],
        [-0.0636,  0.0389],
        [-0.0371,  0.0216],
        [-0.0691,  0.0487],
        [-0.0335,  0.0103],
        [-0.0638,  0.0460],
        [-0.0331,  0.0204],
        [-0.0685,  0.0538],
        [-0.0560,  0.0481],
        [-0.0702,  0.0458],
        [-0.0754,  0.0537],
        [-0.0666,  0.0515],
        [-0.0774,  0.0413]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0197,  0.0047],
        [-0.0603,  0.0547],
    

Training:   4%|██▊                                                                   | 87/2132 [00:25<01:29, 22.79it/s]

tensor([[-0.0338,  0.0218],
        [-0.0580,  0.0604],
        [-0.0458,  0.0260],
        [-0.0749,  0.0328],
        [-0.0604,  0.0498],
        [-0.0559,  0.0322],
        [-0.0406,  0.0377],
        [-0.0670,  0.0470],
        [-0.0580,  0.0534],
        [-0.0505,  0.0389],
        [-0.0678,  0.0324],
        [-0.0216,  0.0063],
        [-0.0636,  0.0449],
        [-0.0544,  0.0486],
        [-0.0581,  0.0502],
        [-0.0802,  0.0439],
        [-0.0542,  0.0547],
        [-0.0757,  0.0487],
        [-0.0712,  0.0428],
        [-0.0713,  0.0322],
        [-0.0463,  0.0218],
        [-0.0579,  0.0515],
        [-0.0696,  0.0511],
        [-0.0601,  0.0385],
        [-0.0323,  0.0309],
        [-0.0238,  0.0404],
        [-0.0463,  0.0347],
        [-0.0155,  0.0380],
        [-0.0569,  0.0472],
        [-0.0360,  0.0098],
        [-0.0649,  0.0413],
        [-0.0590,  0.0553]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0549,  0.0329],
        [-0.0654,  0.0312],
    

Training:   4%|███                                                                   | 95/2132 [00:26<01:28, 22.97it/s]

tensor([[-0.0343,  0.0220],
        [-0.0586,  0.0484],
        [-0.0260,  0.0132],
        [-0.0533,  0.0241],
        [-0.0120, -0.0016],
        [-0.0355,  0.0138],
        [-0.0628,  0.0428],
        [-0.0563,  0.0423],
        [-0.0329,  0.0120],
        [-0.0717,  0.0398],
        [-0.0358,  0.0029],
        [-0.0528,  0.0388],
        [-0.0344,  0.0398],
        [-0.0697,  0.0303],
        [-0.0097,  0.0147],
        [-0.0619,  0.0387],
        [-0.0561,  0.0295],
        [-0.0716,  0.0531],
        [-0.0623,  0.0396],
        [-0.0561,  0.0459],
        [-0.0391,  0.0241],
        [-0.0659,  0.0419],
        [-0.0639,  0.0339],
        [-0.0101,  0.0297],
        [-0.0025, -0.0027],
        [-0.0042,  0.0353],
        [-0.0173,  0.0066],
        [-0.0591,  0.0398],
        [-0.0719,  0.0565],
        [-0.0539,  0.0270],
        [-0.0524,  0.0317],
        [-0.0357,  0.0129]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0564,  0.0627],
        [-0.0172,  0.0040],
    

Training:   5%|███▎                                                                  | 99/2132 [00:26<01:28, 23.04it/s]

tensor([[-0.0733,  0.0415],
        [-0.0696,  0.0440],
        [-0.0222,  0.0151],
        [-0.0074,  0.0034],
        [-0.0459,  0.0408],
        [-0.0017,  0.0222],
        [-0.0707,  0.0544],
        [-0.0421,  0.0382],
        [-0.0306,  0.0040],
        [-0.0702,  0.0607],
        [-0.0478,  0.0531],
        [-0.0635,  0.0493],
        [-0.0338,  0.0203],
        [-0.0324,  0.0358],
        [-0.0298,  0.0146],
        [-0.0281,  0.0263],
        [-0.0260,  0.0183],
        [-0.0637,  0.0398],
        [-0.0611,  0.0421],
        [-0.0589,  0.0352],
        [-0.0666,  0.0403],
        [-0.0657,  0.0540],
        [-0.0345,  0.0204],
        [-0.0438,  0.0382],
        [ 0.0383, -0.0017],
        [-0.0500,  0.0407],
        [-0.0425,  0.0244],
        [-0.0287,  0.0345],
        [-0.0396,  0.0195],
        [-0.0600,  0.0603],
        [-0.0568,  0.0393],
        [-0.0460,  0.0259]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0587,  0.0401],
        [-0.0203,  0.0139],
    

Training:   5%|███▎                                                                 | 103/2132 [00:26<01:28, 22.90it/s]

tensor([[ 0.0091,  0.0168],
        [-0.0685,  0.0418],
        [ 0.0393, -0.0212],
        [-0.0576,  0.0580],
        [-0.0622,  0.0366],
        [-0.0494,  0.0475],
        [-0.0480,  0.0463],
        [-0.0538,  0.0351],
        [-0.0664,  0.0365],
        [-0.0472,  0.0378],
        [-0.0762,  0.0516],
        [-0.0465,  0.0267],
        [-0.0548,  0.0457],
        [-0.0240,  0.0250],
        [-0.0412,  0.0211],
        [-0.0246,  0.0118],
        [ 0.0113,  0.0038],
        [-0.0687,  0.0346],
        [-0.0595,  0.0464],
        [-0.0564,  0.0469],
        [-0.0499,  0.0281],
        [-0.0306,  0.0265],
        [-0.0687,  0.0454],
        [ 0.0062,  0.0044],
        [ 0.0155, -0.0182],
        [-0.0715,  0.0467],
        [-0.0689,  0.0595],
        [-0.0764,  0.0576],
        [-0.0150,  0.0147],
        [-0.0381,  0.0374],
        [-0.0198,  0.0177],
        [-0.0675,  0.0355]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[ 0.0026,  0.0250],
        [-0.0429,  0.0536],
    

Training:   5%|███▌                                                                 | 111/2132 [00:26<01:27, 23.00it/s]

tensor([[-0.0533,  0.0399],
        [-0.0026, -0.0030],
        [-0.0689,  0.0519],
        [-0.0329,  0.0072],
        [-0.0547,  0.0496],
        [-0.0593,  0.0353],
        [-0.0072, -0.0120],
        [-0.0708,  0.0428],
        [-0.0481,  0.0216],
        [-0.0610,  0.0400],
        [-0.0713,  0.0509],
        [-0.0460,  0.0380],
        [-0.0036,  0.0034],
        [-0.0166, -0.0031],
        [-0.0174, -0.0037],
        [-0.0817,  0.0341],
        [-0.0465,  0.0370],
        [-0.0389,  0.0457],
        [ 0.0019, -0.0055],
        [ 0.0021, -0.0124],
        [-0.0434,  0.0212],
        [-0.0558,  0.0426],
        [-0.0633,  0.0681],
        [-0.0733,  0.0485],
        [-0.0600,  0.0490],
        [-0.0443,  0.0278],
        [-0.0432,  0.0233],
        [-0.0596,  0.0454],
        [-0.0661,  0.0650],
        [-0.0367,  0.0194],
        [ 0.0270, -0.0026],
        [-0.0617,  0.0556]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0720,  0.0515],
        [-0.0645,  0.0562],
    

Training:   5%|███▋                                                                 | 115/2132 [00:27<01:26, 23.36it/s]

tensor([[-0.0791,  0.0601],
        [-0.0493,  0.0374],
        [-0.0489,  0.0361],
        [-0.0238,  0.0099],
        [-0.0075,  0.0062],
        [-0.0526,  0.0436],
        [-0.0553,  0.0315],
        [-0.0480,  0.0424],
        [-0.0494,  0.0299],
        [ 0.0112, -0.0176],
        [-0.0715,  0.0551],
        [-0.0336,  0.0075],
        [-0.0629,  0.0346],
        [-0.0444,  0.0335],
        [-0.0114,  0.0133],
        [ 0.0094,  0.0147],
        [ 0.0385, -0.0227],
        [-0.0759,  0.0548],
        [-0.0582,  0.0297],
        [-0.0518,  0.0407],
        [ 0.0068,  0.0032],
        [ 0.0329, -0.0228],
        [-0.0841,  0.0508],
        [-0.0622,  0.0390],
        [-0.0218,  0.0293],
        [-0.0550,  0.0427],
        [-0.0593,  0.0306],
        [-0.0745,  0.0554],
        [ 0.0156, -0.0087],
        [-0.0078,  0.0303],
        [-0.0153,  0.0343],
        [ 0.0375, -0.0076]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[ 0.0246, -0.0040],
        [-0.0378,  0.0477],
    

Training:   6%|███▊                                                                 | 119/2132 [00:27<01:24, 23.76it/s]

tensor([[-0.0844,  0.0592],
        [-0.0592,  0.0374],
        [-0.0129,  0.0055],
        [-0.0125,  0.0110],
        [-0.0072,  0.0328],
        [ 0.0106,  0.0072],
        [ 0.0438, -0.0293],
        [-0.0430,  0.0532],
        [-0.0516,  0.0515],
        [-0.0624,  0.0487],
        [-0.0630,  0.0566],
        [-0.0675,  0.0312],
        [-0.0199,  0.0211],
        [-0.0646,  0.0452],
        [ 0.0330,  0.0042],
        [-0.0521,  0.0404],
        [-0.0751,  0.0556],
        [-0.0670,  0.0372],
        [-0.0393,  0.0273],
        [ 0.0128, -0.0014],
        [ 0.0301, -0.0157],
        [-0.0514,  0.0393],
        [-0.0338,  0.0293],
        [-0.0675,  0.0446],
        [ 0.0096, -0.0123],
        [ 0.0556, -0.0238],
        [-0.0502,  0.0202],
        [-0.0405,  0.0277],
        [-0.0544,  0.0333],
        [-0.0124,  0.0369],
        [ 0.0250, -0.0359],
        [ 0.0069, -0.0022]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0621,  0.0346],
        [-0.0166,  0.0392],
    

Training:   6%|████                                                                 | 127/2132 [00:27<01:27, 22.94it/s]

tensor([[-0.0639,  0.0553],
        [-0.0161,  0.0116],
        [-0.0336,  0.0049],
        [-0.0402,  0.0401],
        [-0.0692,  0.0569],
        [ 0.0261, -0.0060],
        [-0.0752,  0.0627],
        [ 0.0160, -0.0012],
        [-0.0775,  0.0361],
        [-0.0519,  0.0471],
        [-0.0486,  0.0285],
        [-0.0522,  0.0269],
        [-0.0402,  0.0193],
        [ 0.0445, -0.0068],
        [-0.0517,  0.0261],
        [-0.0030,  0.0041],
        [-0.0084,  0.0142],
        [-0.0604,  0.0454],
        [ 0.0164, -0.0033],
        [-0.0583,  0.0402],
        [ 0.0046, -0.0333],
        [ 0.0599, -0.0374],
        [-0.0329,  0.0263],
        [-0.0216,  0.0253],
        [-0.0634,  0.0488],
        [-0.0100, -0.0135],
        [ 0.0142, -0.0108],
        [-0.0056, -0.0140],
        [-0.0439,  0.0552],
        [-0.0034,  0.0077],
        [-0.0639,  0.0415],
        [-0.0283,  0.0015]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0593,  0.0373],
        [-0.0272,  0.0229],
    

Training:   6%|████▏                                                                | 131/2132 [00:27<01:27, 22.94it/s]

tensor([[-0.0745,  0.0500],
        [ 0.0367, -0.0359],
        [ 0.0347, -0.0227],
        [-0.0464,  0.0382],
        [ 0.0239, -0.0422],
        [ 0.0824, -0.0507],
        [ 0.0345, -0.0374],
        [-0.0438,  0.0393],
        [-0.0388,  0.0161],
        [ 0.0225, -0.0282],
        [-0.0188,  0.0048],
        [ 0.0525, -0.0183],
        [-0.0419,  0.0298],
        [-0.0753,  0.0538],
        [-0.0558,  0.0448],
        [ 0.0061,  0.0074],
        [-0.0187,  0.0111],
        [ 0.0461, -0.0431],
        [-0.0687,  0.0576],
        [-0.0063, -0.0221],
        [-0.0880,  0.0546],
        [ 0.0380, -0.0315],
        [ 0.0229, -0.0194],
        [ 0.0410, -0.0061],
        [ 0.0273, -0.0249],
        [ 0.0118, -0.0186],
        [-0.0684,  0.0349],
        [ 0.1015, -0.0576],
        [ 0.0293, -0.0185],
        [-0.0827,  0.0637],
        [-0.0348,  0.0449],
        [-0.0441,  0.0457]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0705,  0.0232],
        [-0.0022,  0.0019],
    

Training:   7%|████▍                                                                | 139/2132 [00:28<01:23, 23.83it/s]

tensor([[-0.0471,  0.0267],
        [ 0.0518, -0.0098],
        [ 0.0983, -0.0660],
        [ 0.0825, -0.0616],
        [-0.0617,  0.0383],
        [-0.0067,  0.0082],
        [-0.0761,  0.0644],
        [ 0.0430, -0.0393],
        [-0.0606,  0.0514],
        [-0.0673,  0.0473],
        [-0.0544,  0.0420],
        [ 0.0343, -0.0178],
        [-0.0441,  0.0315],
        [-0.0617,  0.0440],
        [-0.0338,  0.0302],
        [-0.0645,  0.0414],
        [ 0.0366, -0.0245],
        [-0.0432,  0.0218],
        [-0.0778,  0.0675],
        [-0.0411,  0.0443],
        [-0.0343,  0.0267],
        [ 0.1063, -0.0595],
        [ 0.0259, -0.0290],
        [-0.0117,  0.0112],
        [-0.0369,  0.0448],
        [-0.0273,  0.0270],
        [-0.0509,  0.0582],
        [-0.0709,  0.0530],
        [ 0.0785, -0.0307],
        [ 0.0229, -0.0209],
        [-0.0181, -0.0010],
        [-0.0386,  0.0430]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0510,  0.0222],
        [ 0.1111, -0.0627],
    

Training:   7%|████▋                                                                | 145/2132 [00:28<01:20, 24.76it/s]

tensor([[ 0.0174, -0.0012],
        [-0.0214,  0.0176],
        [-0.0352, -0.0033],
        [-0.0832,  0.0504],
        [-0.0754,  0.0531],
        [-0.0428,  0.0277],
        [-0.0536,  0.0434],
        [-0.0306,  0.0177],
        [-0.0700,  0.0592],
        [-0.0029,  0.0128],
        [ 0.0851, -0.0293],
        [-0.0307,  0.0445],
        [ 0.0109,  0.0008],
        [-0.0130,  0.0215],
        [ 0.0911, -0.0382],
        [-0.0297,  0.0282],
        [-0.0595,  0.0460],
        [ 0.1506, -0.0945],
        [-0.0716,  0.0506],
        [ 0.1302, -0.0518],
        [ 0.0969, -0.0786],
        [-0.0607,  0.0601],
        [-0.0470,  0.0319],
        [-0.0601,  0.0581],
        [-0.0919,  0.0698],
        [-0.0502,  0.0370],
        [-0.0847,  0.0645],
        [-0.0639,  0.0549],
        [-0.0537,  0.0336],
        [-0.0413,  0.0383],
        [-0.0153,  0.0211],
        [-0.0630,  0.0453]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0432,  0.0338],
        [-0.0217, -0.0009],
    

Training:   7%|████▊                                                                | 148/2132 [00:28<01:23, 23.68it/s]

tensor([[-4.0337e-02,  2.2096e-02],
        [ 2.2993e-02, -3.3492e-02],
        [-5.0182e-02,  2.4443e-02],
        [-1.7686e-02,  8.2477e-03],
        [ 3.6492e-02, -1.9325e-02],
        [ 5.5658e-02, -5.3737e-02],
        [ 1.0516e-01, -5.9137e-02],
        [ 1.1198e-02,  1.5895e-02],
        [-3.6063e-02,  3.5786e-02],
        [ 3.0736e-02, -9.3668e-03],
        [ 1.2171e-01, -9.3463e-02],
        [ 4.4842e-02, -4.8635e-02],
        [ 2.9640e-03,  6.9629e-03],
        [-2.2970e-02,  2.2764e-02],
        [ 8.5365e-02, -6.4864e-02],
        [-2.1411e-02,  2.6347e-02],
        [ 4.8532e-02, -4.8769e-02],
        [-1.4616e-02,  4.2149e-03],
        [ 4.8172e-02, -5.3986e-02],
        [ 1.0467e-01, -5.1385e-02],
        [-5.4068e-02,  4.2914e-02],
        [-5.7412e-02,  6.2535e-02],
        [-6.3942e-02,  5.8418e-02],
        [-6.3171e-02,  4.7538e-02],
        [-5.6678e-02,  5.0552e-02],
        [-5.3356e-02,  4.2483e-02],
        [-5.5014e-02,  5.7455e-02],
        [-3.5579e-02,  4.234

Training:   7%|████▉                                                                | 154/2132 [00:28<01:29, 22.18it/s]

tensor([[-0.0186,  0.0275],
        [-0.0702,  0.0620],
        [-0.0151,  0.0161],
        [-0.0336,  0.0332],
        [-0.0543,  0.0248],
        [ 0.1838, -0.1030],
        [ 0.0885, -0.0481],
        [ 0.1052, -0.0703],
        [-0.0375,  0.0451],
        [-0.0685,  0.0514],
        [-0.0502,  0.0376],
        [ 0.1327, -0.0846],
        [-0.0425,  0.0098],
        [-0.0037, -0.0178],
        [-0.0514,  0.0427],
        [-0.0361,  0.0394],
        [-0.0296,  0.0264],
        [-0.0703,  0.0711],
        [-0.0150,  0.0285],
        [-0.0372,  0.0224],
        [ 0.0419, -0.0573],
        [ 0.0171,  0.0028],
        [-0.0532,  0.0487],
        [-0.0354,  0.0097],
        [ 0.1726, -0.1009],
        [ 0.0516, -0.0269],
        [ 0.1260, -0.0758],
        [ 0.1414, -0.1063],
        [-0.0288,  0.0221],
        [-0.0690,  0.0674],
        [ 0.0597, -0.0305],
        [-0.0668,  0.0406]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[ 0.0111, -0.0132],
        [-0.0421,  0.0357],
    

Training:   7%|█████                                                                | 158/2132 [00:28<01:28, 22.32it/s]

tensor([[-0.0366,  0.0240],
        [ 0.1225, -0.0698],
        [ 0.0662, -0.0677],
        [-0.0619,  0.0547],
        [-0.0511,  0.0422],
        [-0.0032, -0.0083],
        [-0.0783,  0.0595],
        [-0.0528,  0.0378],
        [ 0.0670, -0.0731],
        [ 0.1773, -0.1436],
        [-0.0546,  0.0473],
        [-0.0343,  0.0221],
        [ 0.0280, -0.0333],
        [ 0.1379, -0.0544],
        [ 0.1133, -0.1058],
        [ 0.0328, -0.0043],
        [-0.0674,  0.0598],
        [ 0.1530, -0.1440],
        [ 0.1973, -0.1137],
        [ 0.0707, -0.0577],
        [-0.0720,  0.0447],
        [-0.0549,  0.0462],
        [ 0.0600, -0.0509],
        [ 0.0630, -0.0283],
        [-0.0751,  0.0501],
        [ 0.0034, -0.0036],
        [-0.0713,  0.0507],
        [ 0.0147,  0.0090],
        [ 0.0707, -0.0560],
        [ 0.2112, -0.0824],
        [ 0.2346, -0.1280],
        [-0.0444,  0.0383]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([[-0.0327,  0.0191],
        [-0.0736,  0.0591],
    

                                                                                                                       

KeyboardInterrupt: 