In [1]:
# All required python standard libraries
import os
import time

In [2]:
# All torch related imports 
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms
from torch import nn, optim
import torchvision

In [3]:
# using cv2 to read an image
import cv2

In [4]:
# All sci-kit related imports 
import pandas as pd
import numpy as np

In [5]:
from tqdm.notebook import tqdm as tq

In [6]:
def accuracy_finder(predictions , labels):
    values, max_indices = torch.max(predictions, dim=1)
    #accuracy = ( max_indices == labels ).sum()/max_indices.size()[0]
    accuracy = ( max_indices == labels ).sum()
    return accuracy

In [7]:
def csv_preprocessor(base_dir:str, directory:str):
    return os.path.join(base_dir,directory).replace("\\","/")

In [8]:
def return_all_image_list_from_processed_csv(csv_file):
    ### This returns the entire list full of images to be loaded into cpu
    ###
    ###
    ALL_IMAGES = []
    start = time.time()
    for i, items in tq(enumerate(csv_file.iloc[:,1])):
        image = cv2.imread(items, cv2.COLOR_BGR2RGB)
        resized = cv2.resize(image,(224,224))
        ALL_IMAGES.append(resized)
    
    print("Tt took us approximately {} seconds".format(time.time()-start))  
    return ALL_IMAGES

In [9]:
# pytorch device configurations 
BATCH_SIZE = 8
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
train_csv_directory = os.path.join(os.getcwd(),'guides\\isolated-dataset-csv\\IsolatedTrain.csv')

In [11]:
class IsolatedCharacterDataset(Dataset):
    def __init__(self, csv_dir_path,  transforms=None, custom_transform=None ):
        ### complete dataset path
        self.dataset_csv = pd.read_csv(csv_dir_path, usecols=["labels","directory"])  
        self.dataset_csv["directory"] = self.dataset_csv["directory"].map(lambda x: csv_preprocessor(base_dir=str(os.getcwd()), directory=str(x)))
        self.dataset_csv_numpy = self.dataset_csv.to_numpy()
        self.ALL_IMAGES = return_all_image_list_from_processed_csv(csv_file=self.dataset_csv)
        
        ### labels
        self.labels = self.dataset_csv_numpy[:,0]
        ### images directories
        self.image_directories = self.dataset_csv_numpy[:,1]
        ### transformations to apply on images
        self.transforms = transforms
        
    def __getitem__(self, index):
        # convert labels to tensor 
        label = torch.tensor(self.labels[index])
        
        # load single image from list of all preloaded images
        image = self.ALL_IMAGES[index]
        
        if self.transforms:
            ## apply transforms 
            image = self.transforms(image)
            image = image.float()
        label = label.long().subtract(1)
        return image, label 
    
    def __len__(self):
        rows , _ = self.dataset_csv_numpy.shape
        return rows

In [12]:
DATA_NORMALIZER = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),transforms.Resize((224,224))])

In [13]:
TRAIN_DATASET = IsolatedCharacterDataset(csv_dir_path=train_csv_directory,transforms=DATA_NORMALIZER)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…


Tt took us approximately 9.617926120758057 seconds


In [14]:
TRAIN_LOADER = DataLoader(dataset=TRAIN_DATASET,batch_size=BATCH_SIZE,shuffle= True)

In [15]:
class block(nn.Module):
    def __init__(self, in_channels, intermediate_channels, identity_downsample=None, stride=1):
        super(block, self).__init__()
        self.expansion = 4
        self.conv1 = nn.Conv2d(in_channels, intermediate_channels, kernel_size=1, stride=1, padding=0)
        self.bn1 = nn.BatchNorm2d(intermediate_channels)
        self.conv2 = nn.Conv2d(intermediate_channels, intermediate_channels, kernel_size=3, stride=stride, padding=1,)
        self.bn2 = nn.BatchNorm2d(intermediate_channels)
        self.conv3 = nn.Conv2d(intermediate_channels, intermediate_channels * self.expansion, kernel_size=1, stride=1, padding=0,)
        self.bn3 = nn.BatchNorm2d(intermediate_channels * self.expansion)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        self.stride = stride
    
    #Identity block
    def forward(self, x):
        identity = x.clone()

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.bn3(x)
        #x = self.relu(x) #custom 

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        x = self.relu(x)
        return x

In [16]:
class ResNet(nn.Module):
    def __init__(self, block, layers, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Essentially the entire ResNet architecture are in these 4 lines below
        self.layer1 = self._make_layer(block, layers[0], intermediate_channels=64, stride=1)
        self.layer2 = self._make_layer(block, layers[1], intermediate_channels=128, stride=2)
        self.layer3 = self._make_layer(block, layers[2], intermediate_channels=256, stride=2)
        self.layer4 = self._make_layer(block, layers[3], intermediate_channels=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * 4, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

    def _make_layer(self, block, num_residual_blocks, intermediate_channels, stride):
        identity_downsample = None
        layers = []

        # Either if we half the input space for ex, 56x56 -> 28x28 (stride=2), or channels change
        # we need to adapt the Identity (skip connection) so it will be able to be added
        # to the layer that's ahead
        if stride != 1 or self.in_channels != intermediate_channels * 4:
            identity_downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, intermediate_channels * 4, kernel_size=1, stride=stride,),
                nn.BatchNorm2d(intermediate_channels * 4),
            )

        layers.append(block(self.in_channels, intermediate_channels, identity_downsample, stride))

        # The expansion size is always 4 for ResNet 50,101,152
        self.in_channels = intermediate_channels * 4

        # For example for first resnet layer: 256 will be mapped to 64 as intermediate layer,
        # then finally back to 256. Hence no identity downsample is needed, since stride = 1,
        # and also same amount of channels.
        for i in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, intermediate_channels))

        return nn.Sequential(*layers)

In [17]:
def ResNet50(img_channel=3, num_classes=171):
    return ResNet(block, [2, 3, 5, 2], img_channel, num_classes)
net = ResNet50()

In [18]:
optimizer =  optim.Adam(net.parameters(), lr=0.07) # learning rate 
# defining the loss function
criterion =  nn.CrossEntropyLoss()
net = net.to(DEVICE)
criterion = criterion.to(DEVICE)

In [19]:
from tqdm import tqdm 

In [22]:
def training(epochs:int):
    all_training_losses = []
    all_training_accuracy = []
    for epoch in tq(range(epochs)):
        total_epoch_loss = 0
        total_accuracy_epoch = 0
        for i, data in tqdm(enumerate(TRAIN_LOADER, 0)): 
            image,label = data
            optimizer.zero_grad(set_to_none=True)
            
            label = label.to(DEVICE)
            image = image.to(DEVICE)
            output = net(image)
            
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                print(output,label)
                total_epoch_loss += loss
                batches_training_accuracy = accuracy_finder(predictions=output, labels=label)
                total_accuracy_epoch = total_accuracy_epoch  + batches_training_accuracy   
            
            if i+1 % 1000 == 0: 
                print("Batch : {}/{}".format(i, len(TRAIN_LOADER))) 
        # total epoch loss 
        total_epoch_loss = total_epoch_loss / len(TRAIN_LOADER)
        # total epoch accuracy 
        total_accuracy_epoch = total_accuracy_epoch /len(TRAIN_LOADER)
        
        # display the epoch training loss
        print("epoch : {}/{}, loss = {:.8f}, acc = {:.8f}".format(epoch + 1, epochs, total_epoch_loss, total_accuracy_epoch ))
        all_training_losses.append(total_epoch_loss)
        all_training_accuracy.append(total_accuracy_epoch)
        
    print("Training completed")
    return all_training_accuracy, all_training_losses

In [None]:
t_acc, t_loss = training(50)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=50.0), HTML(value='')))

1it [00:00,  2.10it/s]

tensor([[ -52.8453,  -52.0761,  -52.4239,  ...,  -52.3777,  -51.6944,
          -52.6524],
        [ -52.5069,  -51.7647,  -52.0854,  ...,  -52.0446,  -51.3818,
          -52.3134],
        [-225.8761, -227.9313, -228.3135,  ..., -228.8150, -228.7944,
         -227.8481],
        ...,
        [ -52.7307,  -51.9523,  -52.3015,  ...,  -52.2616,  -51.5829,
          -52.5240],
        [ -52.8812,  -52.1021,  -52.4496,  ...,  -52.4077,  -51.7139,
          -52.6738],
        [ -52.7279,  -51.9741,  -52.2941,  ...,  -52.2733,  -51.5909,
          -52.5402]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([141,  31,  72, 150,  85,  46,  54, 102], device='cuda:0')


2it [00:00,  2.08it/s]

tensor([[ -22.1193,  -22.0255,  -21.9458,  ...,  -21.8794,  -21.8682,
          -21.9354],
        [-242.6664, -242.5898, -243.8684,  ..., -243.2708, -242.7329,
         -243.8258],
        [ -21.0820,  -21.0541,  -20.9609,  ...,  -20.9039,  -20.9158,
          -20.9185],
        ...,
        [ -36.3211,  -36.1931,  -36.1892,  ...,  -36.0558,  -36.0604,
          -36.2113],
        [ -20.8528,  -20.8486,  -20.7446,  ...,  -20.6990,  -20.7206,
          -20.7015],
        [ -20.4120,  -20.4039,  -20.2905,  ...,  -20.2415,  -20.2600,
          -20.2631]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 28, 134, 109,  95, 143, 101,  95, 145], device='cuda:0')


3it [00:01,  2.06it/s]

tensor([[ -16.1836,  -16.1423,  -16.2125,  ...,  -16.1184,  -16.1668,
          -16.1718],
        [  -6.6120,   -6.5539,   -6.6136,  ...,   -6.5482,   -6.5899,
           -6.5784],
        [  -4.6380,   -4.5667,   -4.6146,  ...,   -4.5649,   -4.5928,
           -4.5929],
        ...,
        [  -5.9708,   -5.9078,   -5.9771,  ...,   -5.9110,   -5.9434,
           -5.9429],
        [-231.2216, -231.4631, -231.8701,  ..., -231.1488, -231.4102,
         -231.6853],
        [  -4.6750,   -4.6034,   -4.6528,  ...,   -4.6018,   -4.6310,
           -4.6310]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([124, 101,  71, 123, 117, 152, 124, 122], device='cuda:0')


4it [00:01,  2.03it/s]

tensor([[-164.9739, -164.9459, -165.1649,  ..., -164.5119, -165.0007,
         -164.9942],
        [  -6.7625,   -6.7435,   -6.7899,  ...,   -6.7340,   -6.7560,
           -6.7778],
        [ -16.0200,  -16.0016,  -16.0383,  ...,  -15.9602,  -16.0011,
          -16.0448],
        ...,
        [  -4.5776,   -4.5442,   -4.5786,  ...,   -4.5406,   -4.5557,
           -4.5870],
        [  -5.2078,   -5.1620,   -5.2036,  ...,   -5.1580,   -5.1721,
           -5.2107],
        [ -90.0829,  -90.0693,  -90.2031,  ...,  -89.8208,  -90.1057,
          -90.1215]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 35,  98, 150,  63, 140,  67, 118,  18], device='cuda:0')


5it [00:02,  2.03it/s]

tensor([[ -33.2099,  -33.2214,  -33.2644,  ...,  -33.1559,  -33.2214,
          -33.2635],
        [ -47.2399,  -47.2898,  -47.2356,  ...,  -47.0781,  -47.2431,
          -47.2473],
        [ -38.4561,  -38.5130,  -38.4946,  ...,  -38.4223,  -38.5076,
          -38.5372],
        ...,
        [ -33.5247,  -33.5484,  -33.5426,  ...,  -33.4189,  -33.5051,
          -33.5659],
        [-139.5794, -139.4393, -139.5784,  ..., -139.0315, -139.3972,
         -139.1848],
        [ -36.1353,  -36.2004,  -36.2052,  ...,  -36.1034,  -36.1685,
          -36.2073]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 14, 125,  13,  89, 136,  17, 101,   5], device='cuda:0')


6it [00:02,  2.03it/s]

tensor([[ -10.1715,  -10.1826,  -10.1995,  ...,  -10.1391,  -10.1801,
          -10.1978],
        [  -9.4422,   -9.4449,   -9.4622,  ...,   -9.4213,   -9.4503,
           -9.4699],
        [ -11.0931,  -11.0968,  -11.1147,  ...,  -11.0720,  -11.1118,
          -11.1273],
        ...,
        [-180.5931, -180.4279, -180.5596,  ..., -179.7745, -180.3143,
         -179.8947],
        [ -11.9806,  -11.9881,  -11.9902,  ...,  -11.9385,  -11.9911,
          -12.0109],
        [ -10.8045,  -10.8043,  -10.8136,  ...,  -10.7737,  -10.8028,
          -10.8308]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([103, 167,  71,  82,  61,  95,  60, 113], device='cuda:0')


7it [00:03,  2.03it/s]

tensor([[ -2.3628,  -2.3507,  -2.3609,  ...,  -2.3571,  -2.3706,  -2.3873],
        [ -5.4194,  -5.3989,  -5.4040,  ...,  -5.3642,  -5.3923,  -5.4071],
        [-36.0470, -36.0076, -36.0109,  ..., -35.8540, -35.9804, -35.9089],
        ...,
        [ -2.6510,  -2.6389,  -2.6462,  ...,  -2.6353,  -2.6477,  -2.6644],
        [-13.6596, -13.6445, -13.6412,  ..., -13.5409, -13.6312, -13.6140],
        [-28.7571, -28.7343, -28.7235,  ..., -28.5713, -28.7019, -28.6416]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60, 130, 112, 117,  37,  62, 160,  27], device='cuda:0')


8it [00:03,  2.03it/s]

tensor([[ -0.8159,  -0.7938,  -0.7932,  ...,  -0.7987,  -0.8037,  -0.8287],
        [-58.1896, -58.0926, -58.0960,  ..., -57.8812, -58.0282, -57.9097],
        [-41.0442, -40.9708, -40.9740,  ..., -40.8329, -40.9376, -40.8571],
        ...,
        [ -3.6058,  -3.5820,  -3.5902,  ...,  -3.5633,  -3.5805,  -3.6006],
        [-19.0146, -18.9759, -18.9680,  ..., -18.8977, -18.9593, -18.9274],
        [ -4.9529,  -4.9285,  -4.9389,  ...,  -4.8957,  -4.9178,  -4.9379]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 61,  24,  29,  67,  65,  39, 101,  68], device='cuda:0')


9it [00:04,  2.03it/s]

tensor([[ -1.5136,  -1.4903,  -1.4970,  ...,  -1.4983,  -1.4980,  -1.5270],
        [ -0.7006,  -0.6788,  -0.6864,  ...,  -0.6909,  -0.6905,  -0.7144],
        [-14.2401, -14.2153, -14.2070,  ..., -14.1520, -14.1845, -14.1865],
        ...,
        [-58.6989, -58.6223, -58.6012,  ..., -58.4600, -58.5520, -58.4548],
        [ -0.3392,  -0.3185,  -0.3215,  ...,  -0.3312,  -0.3278,  -0.3559],
        [-35.0203, -34.9704, -35.0083,  ..., -34.8561, -34.9394, -34.8741]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([126,  88,  60, 130, 119,  66,  14, 138], device='cuda:0')


10it [00:04,  2.03it/s]

tensor([[ -3.1409,  -3.1181,  -3.1351,  ...,  -3.1210,  -3.1149,  -3.1491],
        [ -1.4383,  -1.4166,  -1.4330,  ...,  -1.4328,  -1.4220,  -1.4568],
        [ -2.2612,  -2.2388,  -2.2572,  ...,  -2.2503,  -2.2413,  -2.2764],
        ...,
        [-36.3290, -36.2881, -36.2853,  ..., -36.2082, -36.2480, -36.2033],
        [ -0.4294,  -0.4102,  -0.4219,  ...,  -0.4328,  -0.4206,  -0.4563],
        [ -0.3441,  -0.3253,  -0.3364,  ...,  -0.3475,  -0.3355,  -0.3711]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 10, 157,  16, 106,  49,  21,  45, 140], device='cuda:0')


11it [00:05,  2.03it/s]

tensor([[ -5.6142,  -5.5924,  -5.6090,  ...,  -5.6050,  -5.5940,  -5.6198],
        [-37.4593, -37.4083, -37.4369,  ..., -37.3706, -37.3926, -37.3434],
        [ -0.8738,  -0.8576,  -0.8799,  ...,  -0.8887,  -0.8671,  -0.9090],
        ...,
        [ -7.5499,  -7.5270,  -7.5460,  ...,  -7.5364,  -7.5241,  -7.5483],
        [ -2.7655,  -2.7486,  -2.7722,  ...,  -2.7675,  -2.7473,  -2.7907],
        [ -0.3554,  -0.3392,  -0.3599,  ...,  -0.3731,  -0.3501,  -0.3953]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([129,  85,  35, 169, 165,  91, 124,  80], device='cuda:0')


12it [00:05,  2.03it/s]

tensor([[ -0.3963,  -0.3838,  -0.4146,  ...,  -0.4301,  -0.3239,  -0.4507],
        [-10.3214, -10.2964, -10.3129,  ..., -10.3235, -10.1092, -10.3226],
        [ -3.6584,  -3.6462,  -3.6697,  ...,  -3.6798,  -3.4493,  -3.6931],
        ...,
        [ -0.3902,  -0.3777,  -0.4085,  ...,  -0.4238,  -0.3180,  -0.4445],
        [-19.5990, -19.5666, -19.5960,  ..., -19.5879, -19.3694, -19.5797],
        [-10.7545, -10.7298, -10.7571,  ..., -10.7647, -10.5850, -10.7645]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 66, 126,  41,  11,   0, 124,  67,  11], device='cuda:0')


13it [00:06,  2.03it/s]

tensor([[-0.8005, -0.8638, -0.9058,  ..., -0.9212, -0.6543, -0.9393],
        [-1.2137, -1.2797, -1.3177,  ..., -1.3331, -0.9946, -1.3496],
        [-0.3589, -0.4212, -0.4622,  ..., -0.4796, -0.2901, -0.4988],
        ...,
        [-0.6908, -0.7556, -0.7940,  ..., -0.8104, -0.5588, -0.8294],
        [-0.8671, -0.9318, -0.9722,  ..., -0.9876, -0.7093, -1.0058],
        [-1.9700, -2.0332, -2.0658,  ..., -2.0845, -1.7960, -2.1004]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([138, 104, 146, 132, 118,  68,   1,  76], device='cuda:0')


14it [00:06,  2.03it/s]

tensor([[-0.3584, -0.4143, -0.5473,  ..., -0.5663, -0.2849, -0.5848],
        [-0.3317, -0.3901, -0.5210,  ..., -0.5402, -0.2661, -0.5581],
        [-0.3317, -0.3901, -0.5210,  ..., -0.5402, -0.2661, -0.5581],
        ...,
        [-0.3317, -0.3901, -0.5210,  ..., -0.5402, -0.2661, -0.5581],
        [-0.4202, -0.4710, -0.6088,  ..., -0.6275, -0.3289, -0.6466],
        [-1.8222, -1.8485, -2.0025,  ..., -2.0237, -1.6455, -2.0438]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 12, 113, 100, 124,  24, 106,  24,  53], device='cuda:0')


15it [00:07,  2.03it/s]

tensor([[-0.3276, -0.3788, -0.6030,  ..., -0.6235, -0.2582, -0.6411],
        [-0.3300, -0.3809, -0.6053,  ..., -0.6258, -0.2598, -0.6435],
        [-0.3241, -0.3763, -0.5996,  ..., -0.6201, -0.2558, -0.6375],
        ...,
        [-0.3084, -0.3632, -0.5840,  ..., -0.6046, -0.2458, -0.6214],
        [-0.3084, -0.3632, -0.5840,  ..., -0.6046, -0.2458, -0.6214],
        [-8.1569, -8.1460, -8.4086,  ..., -8.4411, -7.9334, -8.4515]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([127, 170,  40, 164,  87,  14, 121,  27], device='cuda:0')


16it [00:07,  2.03it/s]

tensor([[-2.0616, -2.0987, -2.4184,  ..., -2.4419, -1.9709, -2.3572],
        [-1.8677, -1.8967, -2.2241,  ..., -2.2480, -1.7702, -2.1600],
        [-0.2886, -0.3402, -0.6502,  ..., -0.6720, -0.2289, -0.5909],
        ...,
        [-0.2886, -0.3402, -0.6502,  ..., -0.6720, -0.2289, -0.5909],
        [-0.2886, -0.3402, -0.6502,  ..., -0.6720, -0.2289, -0.5909],
        [-0.2886, -0.3402, -0.6502,  ..., -0.6720, -0.2289, -0.5909]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 91,   3,  28,  35,  95, 165,   1, 168], device='cuda:0')


17it [00:08,  2.03it/s]

tensor([[-1.3737, -1.3734, -1.8147,  ..., -1.7405, -1.2781, -1.6522],
        [-0.2721, -0.3011, -0.7187,  ..., -0.6419, -0.2150, -0.5642],
        [-0.2721, -0.3011, -0.7187,  ..., -0.6419, -0.2150, -0.5642],
        ...,
        [-0.4300, -0.4537, -0.8753,  ..., -0.7980, -0.3654, -0.7182],
        [-0.2721, -0.3011, -0.7187,  ..., -0.6419, -0.2150, -0.5642],
        [-0.2721, -0.3011, -0.7187,  ..., -0.6419, -0.2150, -0.5642]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  4,  75,  65,  62,  27,  44, 167,  59], device='cuda:0')


18it [00:08,  2.04it/s]

tensor([[-0.2587, -0.2668, -0.7893,  ..., -0.6157, -0.2042, -0.5412],
        [-0.2587, -0.2668, -0.7893,  ..., -0.6157, -0.2042, -0.5412],
        [-0.2587, -0.2668, -0.7893,  ..., -0.6157, -0.2042, -0.5412],
        ...,
        [-0.2587, -0.2668, -0.7893,  ..., -0.6157, -0.2042, -0.5412],
        [-0.5407, -0.5314, -1.0689,  ..., -0.8965, -0.4645, -0.8124],
        [-0.2587, -0.2668, -0.7893,  ..., -0.6157, -0.2042, -0.5412]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 41,   2,  38, 130,  35,  10, 145,  41], device='cuda:0')


19it [00:09,  2.03it/s]

tensor([[-0.2484, -0.2369, -0.7597,  ..., -0.5932, -0.1963, -0.5218],
        [-0.3331, -0.3216, -0.8438,  ..., -0.6792, -0.2803, -0.6071],
        [-0.2484, -0.2369, -0.7597,  ..., -0.5932, -0.1963, -0.5218],
        ...,
        [-0.2527, -0.2413, -0.7640,  ..., -0.5977, -0.2007, -0.5262],
        [-0.2484, -0.2369, -0.7597,  ..., -0.5932, -0.1963, -0.5218],
        [-0.2484, -0.2369, -0.7597,  ..., -0.5932, -0.1963, -0.5218]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 64, 141,  82,  42,  55, 125, 169, 164], device='cuda:0')


20it [00:09,  2.04it/s]

tensor([[-0.5421, -0.5136, -1.0379,  ..., -0.8816, -0.4611, -0.8092],
        [-0.2409, -0.2111, -0.7339,  ..., -0.5740, -0.1666, -0.5055],
        [-0.2409, -0.2111, -0.7339,  ..., -0.5740, -0.1666, -0.5055],
        ...,
        [-0.2409, -0.2111, -0.7339,  ..., -0.5740, -0.1666, -0.5055],
        [-0.2409, -0.2111, -0.7339,  ..., -0.5740, -0.1666, -0.5055],
        [-0.2409, -0.2111, -0.7339,  ..., -0.5740, -0.1666, -0.5055]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 43, 119,  24,  54, 100,   8, 127,  27], device='cuda:0')


21it [00:10,  2.03it/s]

tensor([[-0.2360, -0.1890, -0.7116,  ..., -0.5580, -0.1410, -0.4922],
        [-0.2360, -0.1890, -0.7116,  ..., -0.5580, -0.1410, -0.4922],
        [-0.2360, -0.1890, -0.7116,  ..., -0.5580, -0.1410, -0.4922],
        ...,
        [-0.2360, -0.1890, -0.7116,  ..., -0.5580, -0.1410, -0.4922],
        [-0.8442, -0.7958, -1.3253,  ..., -1.1739, -0.7353, -1.1055],
        [-0.2360, -0.1890, -0.7116,  ..., -0.5580, -0.1410, -0.4922]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([119,  86,  78,  77, 144,  13,  69,  14], device='cuda:0')


22it [00:10,  2.03it/s]

tensor([[-0.2334, -0.1704, -0.6926,  ..., -0.5448, -0.1193, -0.4816],
        [-0.2334, -0.1704, -0.6926,  ..., -0.5448, -0.1193, -0.4816],
        [-0.2946, -0.2321, -0.7540,  ..., -0.6073, -0.1809, -0.5435],
        ...,
        [-0.2434, -0.1805, -0.7027,  ..., -0.5552, -0.1294, -0.4916],
        [-0.2334, -0.1704, -0.6926,  ..., -0.5448, -0.1193, -0.4816],
        [-2.7361, -2.6652, -3.2139,  ..., -3.0672, -2.5738, -2.9987]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([114, 148,  11, 144,  22, 105, 121,  25], device='cuda:0')


23it [00:11,  2.04it/s]

tensor([[-0.2329, -0.1549, -0.6764,  ..., -0.5342, -0.1010, -0.4734],
        [-2.1648, -2.0802, -2.6186,  ..., -2.4813, -1.9944, -2.4141],
        [-0.9152, -0.8373, -1.3605,  ..., -1.2243, -0.7733, -1.1584],
        ...,
        [-0.2329, -0.1549, -0.6764,  ..., -0.5342, -0.1010, -0.4734],
        [-0.2329, -0.1549, -0.6764,  ..., -0.5342, -0.1010, -0.4734],
        [-0.3075, -0.2294, -0.7494,  ..., -0.6091, -0.1751, -0.5479]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 99, 131,  41, 140,  31, 102, 158,  40], device='cuda:0')


24it [00:11,  2.04it/s]

tensor([[-0.3016, -0.2097, -0.7294,  ..., -0.5939, -0.1532, -0.5351],
        [-0.2343, -0.1422, -0.6629,  ..., -0.5259, -0.0859, -0.4674],
        [-0.2343, -0.1422, -0.6629,  ..., -0.5259, -0.0859, -0.4674],
        ...,
        [-0.2343, -0.1422, -0.6629,  ..., -0.5259, -0.0859, -0.4674],
        [-0.2343, -0.1422, -0.6629,  ..., -0.5259, -0.0859, -0.4674],
        [-3.6114, -3.5026, -4.0548,  ..., -3.9234, -3.3956, -3.8573]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([169,  28, 136, 138,  10, 145, 143, 165], device='cuda:0')


25it [00:12,  2.03it/s]

tensor([[-0.4217, -0.3179, -0.8364,  ..., -0.7076, -0.2355, -0.6482],
        [-0.2375, -0.1321, -0.6518,  ..., -0.5197, -0.0533, -0.4635],
        [-1.9969, -1.8834, -2.4197,  ..., -2.2920, -1.7757, -2.2274],
        ...,
        [-0.2375, -0.1321, -0.6518,  ..., -0.5197, -0.0533, -0.4635],
        [-0.2390, -0.1337, -0.6533,  ..., -0.5213, -0.0548, -0.4650],
        [-0.3081, -0.2026, -0.7219,  ..., -0.5911, -0.1221, -0.5345]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([136, 159,  46, 169,  57, 104,  59,  14], device='cuda:0')


26it [00:12,  2.04it/s]

tensor([[-0.2426, -0.1248, -0.6433,  ..., -0.5160, -0.0091, -0.4618],
        [-2.0904, -1.9620, -2.4996,  ..., -2.3738, -1.8119, -2.3139],
        [-0.2495, -0.1318, -0.6500,  ..., -0.5230, -0.0155, -0.4687],
        ...,
        [-0.3218, -0.2051, -0.7227,  ..., -0.5968, -0.0868, -0.5409],
        [-0.2423, -0.1245, -0.6430,  ..., -0.5157, -0.0088, -0.4615],
        [-0.4612, -0.3434, -0.8631,  ..., -0.7376, -0.2211, -0.6810]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([131, 122, 115,   6,  55,   0,  78,  73], device='cuda:0')


27it [00:13,  2.04it/s]

tensor([[-0.2403, -0.1405, -0.6573,  ..., -0.5350,  0.0109, -0.4825],
        [-0.4402, -0.3428, -0.8595,  ..., -0.7380, -0.1866, -0.6833],
        [-1.6478, -1.5383, -2.0756,  ..., -1.9527, -1.3585, -1.8962],
        ...,
        [-0.4496, -0.3516, -0.8694,  ..., -0.7482, -0.1938, -0.6930],
        [-0.2189, -0.1192, -0.6364,  ..., -0.5135,  0.0304, -0.4613],
        [-0.2429, -0.1431, -0.6599,  ..., -0.5376,  0.0085, -0.4850]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 23, 147, 116,  26,   9,   8, 108,  10], device='cuda:0')


28it [00:13,  2.04it/s]

tensor([[-0.1991, -0.1160, -0.6318,  ..., -0.5130,  0.0647, -0.4627],
        [-0.2347, -0.1525, -0.6676,  ..., -0.5498,  0.0309, -0.4987],
        [-0.2633, -0.1802, -0.6957,  ..., -0.5777,  0.0053, -0.5272],
        ...,
        [-0.1991, -0.1160, -0.6318,  ..., -0.5130,  0.0647, -0.4627],
        [-0.1991, -0.1160, -0.6318,  ..., -0.5130,  0.0647, -0.4627],
        [-3.5255, -3.4305, -3.9867,  ..., -3.8681, -3.1797, -3.8060]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([170,  86, 128, 125, 114, 160, 133, 108], device='cuda:0')


29it [00:14,  2.03it/s]

tensor([[-1.2368, -1.1675, -1.6942,  ..., -1.5795, -0.9301, -1.4974],
        [-0.1826, -0.1148, -0.6289,  ..., -0.5142,  0.0946, -0.4361],
        [-0.1826, -0.1148, -0.6289,  ..., -0.5142,  0.0946, -0.4361],
        ...,
        [-0.1838, -0.1160, -0.6301,  ..., -0.5154,  0.0934, -0.4373],
        [-0.1826, -0.1148, -0.6289,  ..., -0.5142,  0.0946, -0.4361],
        [-0.9250, -0.8580, -1.3803,  ..., -1.2674, -0.6232, -1.1862]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([151, 107,  25,  84,  37,  75, 137,   9], device='cuda:0')


30it [00:14,  2.03it/s]

tensor([[-0.1834, -0.1301, -0.6423,  ..., -0.5314,  0.1064, -0.4274],
        [-0.1949, -0.1412, -0.6538,  ..., -0.5427,  0.0955, -0.4392],
        [-1.8062, -1.7478, -2.2859,  ..., -2.1735, -1.4752, -2.0596],
        ...,
        [-0.1712, -0.1173, -0.6298,  ..., -0.5187,  0.1190, -0.4151],
        [-0.1692, -0.1154, -0.6278,  ..., -0.5168,  0.1202, -0.4132],
        [-0.1692, -0.1154, -0.6278,  ..., -0.5168,  0.1202, -0.4132]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 26, 111,  42, 144,  35,  98,  42, 106], device='cuda:0')


31it [00:15,  2.03it/s]

tensor([[-0.1880, -0.1468, -0.6575,  ..., -0.5501,  0.1143, -0.4236],
        [-0.2059, -0.1652, -0.6758,  ..., -0.5682,  0.0974, -0.4415],
        [-0.1793, -0.1383, -0.6486,  ..., -0.5416,  0.1232, -0.4144],
        ...,
        [-0.2153, -0.1741, -0.6851,  ..., -0.5779,  0.0890, -0.4504],
        [-0.1587, -0.1177, -0.6282,  ..., -0.5209,  0.1419, -0.3936],
        [-2.6407, -2.5840, -3.1344,  ..., -3.0228, -2.2806, -2.8849]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([135,  64,  97,  37, 137,  36,  85,  13], device='cuda:0')


32it [00:15,  2.03it/s]

tensor([[-0.1508, -0.1215, -0.6301,  ..., -0.5263,  0.1600, -0.3771],
        [-0.1508, -0.1215, -0.6301,  ..., -0.5263,  0.1600, -0.3771],
        [-0.1527, -0.1235, -0.6321,  ..., -0.5283,  0.1582, -0.3790],
        ...,
        [-0.2108, -0.1832, -0.6922,  ..., -0.5884,  0.1011, -0.4389],
        [-1.1958, -1.1609, -1.6866,  ..., -1.5801, -0.8619, -1.4259],
        [-0.1915, -0.1629, -0.6717,  ..., -0.5677,  0.1206, -0.4190]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 48,  41,  59, 151,  88,  21,  98, 124], device='cuda:0')


33it [00:16,  2.03it/s]

tensor([[-0.1454, -0.1269, -0.6334,  ..., -0.5329,  0.1747, -0.3635],
        [-0.2426, -0.2249, -0.7324,  ..., -0.6309,  0.0814, -0.4629],
        [-0.1538, -0.1354, -0.6420,  ..., -0.5414,  0.1668, -0.3719],
        ...,
        [-0.3674, -0.3530, -0.8605,  ..., -0.7596, -0.0427, -0.5903],
        [-0.1723, -0.1542, -0.6611,  ..., -0.5602,  0.1491, -0.3909],
        [-0.1495, -0.1310, -0.6376,  ..., -0.5371,  0.1708, -0.3675]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60,  45, 134, 114,  46, 112,   0,  51], device='cuda:0')


34it [00:16,  2.03it/s]

tensor([[-0.5584, -0.5741, -1.0834,  ..., -0.9858, -0.2468, -0.7943],
        [-0.1740, -0.1902, -0.6957,  ..., -0.5983,  0.1305, -0.4101],
        [-0.1539, -0.1710, -0.6758,  ..., -0.5776,  0.1501, -0.3913],
        ...,
        [-0.1537, -0.1709, -0.6759,  ..., -0.5782,  0.1509, -0.3902],
        [-0.1204, -0.1364, -0.6407,  ..., -0.5434,  0.1837, -0.3552],
        [-0.1188, -0.1347, -0.6390,  ..., -0.5417,  0.1853, -0.3536]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 17, 120, 133,   3, 119,  33, 158, 138], device='cuda:0')


35it [00:17,  2.03it/s]

tensor([[-9.4587e-02, -1.4191e-01, -6.4404e-01,  ..., -5.4981e-01,
          1.9468e-01, -3.4445e-01],
        [-9.4587e-02, -1.4191e-01, -6.4404e-01,  ..., -5.4981e-01,
          1.9468e-01, -3.4445e-01],
        [-1.1685e-01, -1.6602e-01, -6.6828e-01,  ..., -5.7374e-01,
          1.7151e-01, -3.6882e-01],
        ...,
        [-1.6465e-01, -2.1421e-01, -7.1734e-01,  ..., -6.2250e-01,
          1.2544e-01, -4.1849e-01],
        [-9.6559e-02, -1.4412e-01, -6.4623e-01,  ..., -5.5202e-01,
          1.9262e-01, -3.4659e-01],
        [-2.9378e-01, -3.4503e-01, -8.4836e-01,  ..., -7.5348e-01,
         -6.0789e-04, -5.4999e-01]], device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 96,  83, 168, 137,  12, 161,  93,  77], device='cuda:0')


36it [00:17,  2.03it/s]

tensor([[-0.0750, -0.1513, -0.6511,  ..., -0.5259,  0.2004, -0.3386],
        [-0.1746, -0.2544, -0.7553,  ..., -0.6289,  0.1009, -0.4425],
        [-0.1801, -0.2617, -0.7627,  ..., -0.6337,  0.0950, -0.4509],
        ...,
        [-0.1378, -0.2181, -0.7185,  ..., -0.5907,  0.1365, -0.4064],
        [-1.4997, -1.5675, -2.0883,  ..., -1.9601, -1.2005, -1.7658],
        [-0.0769, -0.1532, -0.6531,  ..., -0.5277,  0.1993, -0.3404]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([120,  24, 155,  47,   0,  65, 122, 147], device='cuda:0')


37it [00:18,  2.03it/s]

tensor([[-0.2317, -0.3621, -0.8602,  ..., -0.7051,  0.0107, -0.5355],
        [-0.1063, -0.2356, -0.7337,  ..., -0.5756,  0.1329, -0.4096],
        [-0.0384, -0.1618, -0.6592,  ..., -0.5054,  0.2037, -0.3348],
        ...,
        [-0.0398, -0.1634, -0.6608,  ..., -0.5068,  0.2021, -0.3364],
        [-1.0298, -1.1624, -1.6703,  ..., -1.5082, -0.7812, -1.3385],
        [-0.0384, -0.1618, -0.6592,  ..., -0.5054,  0.2037, -0.3348]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([55, 66, 66, 90, 26, 73, 67, 78], device='cuda:0')


38it [00:18,  2.03it/s]

tensor([[-0.0562, -0.2312, -0.7268,  ..., -0.5401,  0.1490, -0.3926],
        [-0.0208, -0.1891, -0.6840,  ..., -0.5028,  0.1887, -0.3493],
        [-0.0067, -0.1732, -0.6682,  ..., -0.4882,  0.2046, -0.3330],
        ...,
        [-0.0477, -0.2214, -0.7172,  ..., -0.5319,  0.1589, -0.3826],
        [-0.1283, -0.2947, -0.7911,  ..., -0.6114,  0.0836, -0.4555],
        [-0.1209, -0.2888, -0.7845,  ..., -0.6035,  0.0905, -0.4489]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  5,  15, 102,  77, 108,  42,  37,  72], device='cuda:0')


39it [00:19,  2.03it/s]

tensor([[ 0.0160, -0.1915, -0.6839,  ..., -0.4783,  0.1979, -0.3389],
        [-0.0030, -0.2149, -0.7078,  ..., -0.4992,  0.1761, -0.3631],
        [-0.0447, -0.2530, -0.7467,  ..., -0.5425,  0.1369, -0.4010],
        ...,
        [ 0.0204, -0.1857, -0.6781,  ..., -0.4741,  0.2033, -0.3331],
        [-0.0543, -0.2761, -0.7697,  ..., -0.5529,  0.1179, -0.4250],
        [-0.0400, -0.2460, -0.7399,  ..., -0.5363,  0.1426, -0.3946]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([160, 151, 123, 135,  96, 111,  30,  11], device='cuda:0')


40it [00:19,  2.03it/s]

tensor([[-0.4751, -0.7202, -1.2132,  ..., -0.9869, -0.3177, -0.8586],
        [ 0.0339, -0.2112, -0.7013,  ..., -0.4735,  0.1886, -0.3478],
        [ 0.0241, -0.2213, -0.7113,  ..., -0.4835,  0.1777, -0.3580],
        ...,
        [ 0.0413, -0.2016, -0.6915,  ..., -0.4644,  0.1976, -0.3376],
        [ 0.0432, -0.1991, -0.6889,  ..., -0.4627,  0.2000, -0.3350],
        [-0.0290, -0.2864, -0.7771,  ..., -0.5386,  0.1154, -0.4244]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 13,  37,  34, 166, 132,   0,  63,  43], device='cuda:0')


41it [00:20,  2.03it/s]

tensor([[ 0.0815, -0.2132, -0.7005,  ..., -0.4540,  0.1948, -0.3385],
        [-0.1135, -0.4393, -0.9291,  ..., -0.6580, -0.0265, -0.5696],
        [ 0.0815, -0.2132, -0.7005,  ..., -0.4540,  0.1948, -0.3385],
        ...,
        [ 0.0815, -0.2132, -0.7005,  ..., -0.4540,  0.1948, -0.3385],
        [-0.0067, -0.3157, -0.8050,  ..., -0.5456,  0.1013, -0.4429],
        [-0.0095, -0.3212, -0.8100,  ..., -0.5499,  0.0895, -0.4493]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([134,  50,  71,  10,  98, 104,  96, 128], device='cuda:0')


42it [00:20,  2.03it/s]

tensor([[ 0.0788, -0.2740, -0.7593,  ..., -0.4850,  0.1431, -0.3910],
        [ 0.0800, -0.2658, -0.7507,  ..., -0.4832,  0.1508, -0.3816],
        [ 0.0732, -0.2854, -0.7706,  ..., -0.4933,  0.1321, -0.4039],
        ...,
        [ 0.1143, -0.2282, -0.7129,  ..., -0.4478,  0.1879, -0.3436],
        [ 0.1122, -0.2311, -0.7158,  ..., -0.4502,  0.1847, -0.3467],
        [-0.3960, -0.7403, -1.2282,  ..., -0.9614, -0.3227, -0.8583]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 69,  90,  24, 141,  79,  68,  29,  30], device='cuda:0')


43it [00:21,  2.03it/s]

tensor([[-0.2993, -0.6859, -1.1708,  ..., -0.8911, -0.2622, -0.7939],
        [ 0.1428, -0.2434, -0.7255,  ..., -0.4433,  0.1796, -0.3496],
        [ 0.1374, -0.2509, -0.7329,  ..., -0.4480,  0.1724, -0.3571],
        ...,
        [ 0.1191, -0.2762, -0.7585,  ..., -0.4685,  0.1479, -0.3836],
        [ 0.1163, -0.2819, -0.7642,  ..., -0.4729,  0.1419, -0.3904],
        [ 0.1427, -0.2436, -0.7256,  ..., -0.4434,  0.1796, -0.3497]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 37, 136, 135,  51, 145,  81,  45, 156], device='cuda:0')


44it [00:21,  2.03it/s]

tensor([[ 0.1501, -0.2830, -0.7626,  ..., -0.4588,  0.1464, -0.3820],
        [ 0.1332, -0.3102, -0.7898,  ..., -0.4757,  0.1196, -0.4107],
        [-0.1187, -0.5421, -1.0255,  ..., -0.7291, -0.1131, -0.6436],
        ...,
        [ 0.1636, -0.2637, -0.7431,  ..., -0.4438,  0.1657, -0.3614],
        [ 0.1540, -0.2790, -0.7585,  ..., -0.4558,  0.1502, -0.3783],
        [ 0.1606, -0.2685, -0.7479,  ..., -0.4481,  0.1605, -0.3669]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([156, 127,  20,  73,  91, 158, 110,  11], device='cuda:0')


45it [00:22,  2.03it/s]

tensor([[ 0.1860, -0.2760, -0.7528,  ..., -0.4409,  0.1587, -0.3657],
        [ 0.1433, -0.3392, -0.8166,  ..., -0.4867,  0.0976, -0.4322],
        [ 0.1702, -0.2972, -0.7743,  ..., -0.4586,  0.1375, -0.3880],
        ...,
        [-0.0282, -0.4890, -0.9698,  ..., -0.6603, -0.0539, -0.5829],
        [ 0.1860, -0.2760, -0.7528,  ..., -0.4409,  0.1587, -0.3657],
        [ 0.1179, -0.3795, -0.8570,  ..., -0.5136,  0.0564, -0.4754]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([168,  89,  22,  65, 131,  33,  14,   2], device='cuda:0')


46it [00:22,  2.03it/s]

tensor([[ 0.1912, -0.3103, -0.7440,  ..., -0.4237,  0.1292, -0.3936],
        [ 0.1660, -0.3281, -0.7648,  ..., -0.4511,  0.1104, -0.4113],
        [ 0.1996, -0.2966, -0.7317,  ..., -0.4154,  0.1430, -0.3789],
        ...,
        [ 0.1784, -0.3289, -0.7617,  ..., -0.4392,  0.1116, -0.4135],
        [ 0.1713, -0.3372, -0.7697,  ..., -0.4470,  0.1035, -0.4219],
        [ 0.2017, -0.2931, -0.7286,  ..., -0.4137,  0.1463, -0.3754]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([154, 123,   5,  50, 129, 128,  48,  64], device='cuda:0')


47it [00:23,  2.03it/s]

tensor([[ 0.2136, -0.3107, -0.7080,  ..., -0.3906,  0.1328, -0.3861],
        [ 0.2004, -0.3299, -0.7243,  ..., -0.4049,  0.1132, -0.4066],
        [ 0.1999, -0.3295, -0.7241,  ..., -0.4044,  0.1136, -0.4058],
        ...,
        [ 0.1046, -0.4182, -0.8176,  ..., -0.5025,  0.0246, -0.4963],
        [ 0.2040, -0.3271, -0.7215,  ..., -0.4013,  0.1165, -0.4040],
        [ 0.2125, -0.3127, -0.7096,  ..., -0.3915,  0.1309, -0.3882]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  9,  27, 122,  72, 134,  91,  58, 116], device='cuda:0')


48it [00:23,  2.03it/s]

tensor([[ 0.2196, -0.3332, -0.6938,  ..., -0.3733,  0.1138, -0.4025],
        [ 0.1872, -0.3846, -0.7335,  ..., -0.4112,  0.0624, -0.4584],
        [ 0.2223, -0.3287, -0.6906,  ..., -0.3711,  0.1182, -0.3977],
        ...,
        [ 0.2223, -0.3287, -0.6906,  ..., -0.3711,  0.1182, -0.3977],
        [ 0.2019, -0.3601, -0.7149,  ..., -0.3939,  0.0867, -0.4316],
        [ 0.2197, -0.3326, -0.6936,  ..., -0.3733,  0.1144, -0.4018]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([156,  35, 128,  45,  75,  34,   8,  24], device='cuda:0')


49it [00:24,  2.03it/s]

tensor([[ 0.2219, -0.3534, -0.6823,  ..., -0.3614,  0.0964, -0.4167],
        [ 0.2087, -0.3781, -0.6982,  ..., -0.3774,  0.0719, -0.4441],
        [ 0.2119, -0.3724, -0.6940,  ..., -0.3733,  0.0768, -0.4380],
        ...,
        [ 0.2278, -0.3471, -0.6763,  ..., -0.3550,  0.1028, -0.4102],
        [ 0.2269, -0.3479, -0.6772,  ..., -0.3560,  0.1019, -0.4111],
        [ 0.2186, -0.3629, -0.6869,  ..., -0.3663,  0.0865, -0.4278]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 51, 169, 119,  27,  33, 157, 119,  84], device='cuda:0')


50it [00:24,  2.03it/s]

tensor([[ 0.2281, -0.3700, -0.6673,  ..., -0.3454,  0.1067, -0.4282],
        [ 0.2179, -0.3878, -0.6778,  ..., -0.3554,  0.0894, -0.4478],
        [ 0.2030, -0.4118, -0.6936,  ..., -0.3743,  0.0660, -0.4742],
        ...,
        [ 0.2305, -0.3657, -0.6647,  ..., -0.3421,  0.1111, -0.4234],
        [ 0.2305, -0.3657, -0.6647,  ..., -0.3421,  0.1111, -0.4234],
        [ 0.2305, -0.3657, -0.6647,  ..., -0.3421,  0.1111, -0.4234]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 63,  57,  84,  14, 147,  93,  23,   1], device='cuda:0')


51it [00:25,  2.03it/s]

tensor([[ 0.2235, -0.3637, -0.6629,  ..., -0.3413,  0.1051, -0.4506],
        [ 0.2222, -0.3664, -0.6635,  ..., -0.3411,  0.1025, -0.4535],
        [ 0.2281, -0.3547, -0.6583,  ..., -0.3348,  0.1137, -0.4404],
        ...,
        [ 0.2233, -0.3645, -0.6625,  ..., -0.3399,  0.1043, -0.4514],
        [ 0.2306, -0.3519, -0.6557,  ..., -0.3321,  0.1165, -0.4373],
        [ 0.2222, -0.3648, -0.6641,  ..., -0.3414,  0.1042, -0.4515]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([126, 123, 143,  85, 155, 127,   7,  16], device='cuda:0')


52it [00:25,  2.03it/s]

tensor([[ 0.2235, -0.3458, -0.6541,  ..., -0.3299,  0.1145, -0.4569],
        [ 0.2282, -0.3410, -0.6490,  ..., -0.3247,  0.1192, -0.4519],
        [ 0.2266, -0.3425, -0.6507,  ..., -0.3263,  0.1183, -0.4535],
        ...,
        [ 0.2259, -0.3433, -0.6516,  ..., -0.3272,  0.1167, -0.4543],
        [ 0.1881, -0.4027, -0.6872,  ..., -0.3721,  0.0626, -0.5194],
        [ 0.2115, -0.3662, -0.6640,  ..., -0.3445,  0.0947, -0.4799]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 51, 118, 161, 142,  35, 132,  10, 143], device='cuda:0')


53it [00:26,  2.03it/s]

tensor([[ 0.2048, -0.3637, -0.6614,  ..., -0.3440,  0.0907, -0.5019],
        [ 0.2157, -0.3450, -0.6510,  ..., -0.3296,  0.1076, -0.4807],
        [ 0.2237, -0.3328, -0.6446,  ..., -0.3198,  0.1196, -0.4670],
        ...,
        [ 0.0828, -0.5041, -0.7807,  ..., -0.4756, -0.0445, -0.6480],
        [ 0.2237, -0.3328, -0.6446,  ..., -0.3198,  0.1196, -0.4670],
        [ 0.2167, -0.3449, -0.6504,  ..., -0.3277,  0.1087, -0.4805]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60, 162,  53, 149, 148,  49,   2,  72], device='cuda:0')


54it [00:26,  2.03it/s]

tensor([[ 0.1459, -0.4361, -0.6736,  ..., -0.4016,  0.0215, -0.6023],
        [ 0.2157, -0.3298, -0.6133,  ..., -0.3189,  0.1155, -0.4855],
        [ 0.1683, -0.4067, -0.6522,  ..., -0.3748,  0.0476, -0.5709],
        ...,
        [ 0.2171, -0.3272, -0.6121,  ..., -0.3172,  0.1177, -0.4826],
        [ 0.1817, -0.3824, -0.6427,  ..., -0.3622,  0.0670, -0.5448],
        [ 0.2009, -0.3545, -0.6256,  ..., -0.3380,  0.0930, -0.5137]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 32,  83, 137, 161,  98,  18, 150,   3], device='cuda:0')


55it [00:27,  2.03it/s]

tensor([[ 0.2086, -0.3239, -0.5840,  ..., -0.3166,  0.1138, -0.4986],
        [ 0.1925, -0.3482, -0.5967,  ..., -0.3359,  0.0915, -0.5258],
        [ 0.1374, -0.4087, -0.6504,  ..., -0.3933,  0.0347, -0.5879],
        ...,
        [ 0.2000, -0.3384, -0.5896,  ..., -0.3268,  0.1005, -0.5151],
        [ 0.2086, -0.3239, -0.5840,  ..., -0.3166,  0.1138, -0.4986],
        [ 0.2083, -0.3246, -0.5843,  ..., -0.3172,  0.1132, -0.4994]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 46,  88, 100,  66, 115, 122, 166,  56], device='cuda:0')


56it [00:27,  2.03it/s]

tensor([[ 0.1626, -0.3754, -0.5880,  ..., -0.3628,  0.0604, -0.5740],
        [ 0.1985, -0.3227, -0.5599,  ..., -0.3180,  0.1081, -0.5150],
        [ 0.1985, -0.3227, -0.5599,  ..., -0.3180,  0.1081, -0.5150],
        ...,
        [ 0.1972, -0.3240, -0.5613,  ..., -0.3193,  0.1067, -0.5163],
        [ 0.1839, -0.3467, -0.5706,  ..., -0.3361,  0.0876, -0.5421],
        [ 0.1985, -0.3227, -0.5599,  ..., -0.3180,  0.1081, -0.5150]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 69,  54,   8,  59, 170,  13,  66,  47], device='cuda:0')


57it [00:28,  2.03it/s]

tensor([[ 0.1008, -0.4470, -0.6072,  ..., -0.4247, -0.0096, -0.6315],
        [ 0.1461, -0.3826, -0.5710,  ..., -0.3692,  0.0486, -0.5615],
        [ 0.1379, -0.3930, -0.5777,  ..., -0.3801,  0.0368, -0.5733],
        ...,
        [ 0.1836, -0.3289, -0.5412,  ..., -0.3250,  0.0956, -0.5036],
        [ 0.1869, -0.3235, -0.5396,  ..., -0.3211,  0.1006, -0.4975],
        [ 0.1744, -0.3359, -0.5522,  ..., -0.3342,  0.0886, -0.5102]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 34, 110,  84, 163,  89,  33, 100,  92], device='cuda:0')


58it [00:28,  2.03it/s]

tensor([[ 0.1666, -0.3366, -0.5275,  ..., -0.3349,  0.0817, -0.4946],
        [ 0.1738, -0.3260, -0.5226,  ..., -0.3258,  0.0916, -0.4832],
        [ 0.1732, -0.3271, -0.5229,  ..., -0.3267,  0.0906, -0.4844],
        ...,
        [ 0.1646, -0.3419, -0.5281,  ..., -0.3375,  0.0781, -0.5005],
        [ 0.1449, -0.3650, -0.5434,  ..., -0.3606,  0.0569, -0.5246],
        [ 0.1660, -0.3387, -0.5272,  ..., -0.3352,  0.0807, -0.4969]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([153, 154,  97, 116, 145, 169,  38, 158], device='cuda:0')


59it [00:29,  2.03it/s]

tensor([[ 0.1470, -0.3515, -0.5154,  ..., -0.3493,  0.0866, -0.4946],
        [ 0.1470, -0.3500, -0.5163,  ..., -0.3489,  0.0881, -0.4927],
        [ 0.1510, -0.3390, -0.5178,  ..., -0.3413,  0.0964, -0.4808],
        ...,
        [ 0.1596, -0.3303, -0.5088,  ..., -0.3320,  0.1048, -0.4719],
        [ 0.1586, -0.3318, -0.5092,  ..., -0.3331,  0.1034, -0.4735],
        [ 0.1596, -0.3303, -0.5088,  ..., -0.3320,  0.1048, -0.4719]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 22,  84, 167,  58, 148,  43, 165, 137], device='cuda:0')


60it [00:29,  2.04it/s]

tensor([[ 0.1419, -0.3395, -0.4988,  ..., -0.3425,  0.1115, -0.4669],
        [ 0.1433, -0.3371, -0.4985,  ..., -0.3406,  0.1136, -0.4645],
        [ 0.1421, -0.3392, -0.4987,  ..., -0.3422,  0.1118, -0.4666],
        ...,
        [ 0.1016, -0.4056, -0.5209,  ..., -0.3928,  0.0583, -0.5359],
        [ 0.1442, -0.3360, -0.4979,  ..., -0.3395,  0.1147, -0.4633],
        [ 0.1432, -0.3375, -0.4982,  ..., -0.3408,  0.1133, -0.4648]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([160,  82,  38,  73,  74,  65,   0,  80], device='cuda:0')


61it [00:30,  2.04it/s]

tensor([[ 0.1421, -0.3575, -0.4965,  ..., -0.3612,  0.1090, -0.4718],
        [ 0.1507, -0.3456, -0.4902,  ..., -0.3504,  0.1193, -0.4596],
        [ 0.1447, -0.3551, -0.4930,  ..., -0.3584,  0.1113, -0.4692],
        ...,
        [ 0.1523, -0.3431, -0.4896,  ..., -0.3483,  0.1214, -0.4571],
        [ 0.1415, -0.3605, -0.4948,  ..., -0.3630,  0.1073, -0.4748],
        [ 0.1481, -0.3496, -0.4912,  ..., -0.3537,  0.1159, -0.4635]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  6,  68, 144,  14, 144,  87,  26, 115], device='cuda:0')


62it [00:30,  2.03it/s]

tensor([[ 0.1573, -0.3515, -0.4838,  ..., -0.3581,  0.1254, -0.4533],
        [ 0.1573, -0.3515, -0.4838,  ..., -0.3581,  0.1254, -0.4533],
        [ 0.1169, -0.4148, -0.5033,  ..., -0.4129,  0.0742, -0.5165],
        ...,
        [ 0.1549, -0.3539, -0.4864,  ..., -0.3607,  0.1230, -0.4558],
        [ 0.1559, -0.3529, -0.4853,  ..., -0.3597,  0.1240, -0.4548],
        [ 0.1573, -0.3515, -0.4838,  ..., -0.3581,  0.1254, -0.4533]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 20, 126, 123,  37,  82,  27,  60, 148], device='cuda:0')


63it [00:31,  2.03it/s]

tensor([[ 0.1494, -0.3767, -0.4854,  ..., -0.3824,  0.1141, -0.4672],
        [ 0.1596, -0.3610, -0.4803,  ..., -0.3690,  0.1267, -0.4516],
        [ 0.1272, -0.3977, -0.5073,  ..., -0.4052,  0.0925, -0.4880],
        ...,
        [ 0.1495, -0.3772, -0.4843,  ..., -0.3824,  0.1139, -0.4675],
        [ 0.1596, -0.3610, -0.4803,  ..., -0.3690,  0.1267, -0.4516],
        [ 0.1596, -0.3610, -0.4803,  ..., -0.3690,  0.1267, -0.4516]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 80, 123,  63, 145, 110, 101,  19, 157], device='cuda:0')


64it [00:31,  2.03it/s]

tensor([[ 0.1593, -0.3716, -0.4788,  ..., -0.3808,  0.1256, -0.4518],
        [ 0.1410, -0.4012, -0.4878,  ..., -0.4041,  0.1037, -0.4815],
        [ 0.1593, -0.3716, -0.4788,  ..., -0.3808,  0.1256, -0.4518],
        ...,
        [ 0.1549, -0.3784, -0.4806,  ..., -0.3868,  0.1201, -0.4584],
        [ 0.0688, -0.4781, -0.5536,  ..., -0.4815,  0.0308, -0.5581],
        [ 0.1325, -0.4120, -0.4923,  ..., -0.4146,  0.0936, -0.4917]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 85,  74, 124, 132,  95, 161,  62, 117], device='cuda:0')


65it [00:31,  2.03it/s]

tensor([[ 0.1386, -0.4108, -0.4885,  ..., -0.4157,  0.1018, -0.4812],
        [ 0.1153, -0.4472, -0.5004,  ..., -0.4458,  0.0748, -0.5174],
        [ 0.1243, -0.4243, -0.5046,  ..., -0.4320,  0.0878, -0.4948],
        ...,
        [ 0.1566, -0.3831, -0.4792,  ..., -0.3934,  0.1223, -0.4538],
        [ 0.1566, -0.3831, -0.4792,  ..., -0.3934,  0.1223, -0.4538],
        [ 0.1490, -0.3957, -0.4830,  ..., -0.4032,  0.1133, -0.4665]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 88,   1,  74,  30, 135,  40, 169, 156], device='cuda:0')


66it [00:32,  2.03it/s]

tensor([[ 0.1474, -0.3713, -0.4832,  ..., -0.4129,  0.1346, -0.4639],
        [ 0.1421, -0.3743, -0.4915,  ..., -0.4170,  0.1303, -0.4670],
        [ 0.1518, -0.3648, -0.4814,  ..., -0.4068,  0.1398, -0.4574],
        ...,
        [ 0.1457, -0.3708, -0.4877,  ..., -0.4131,  0.1338, -0.4632],
        [ 0.1352, -0.3895, -0.4894,  ..., -0.4279,  0.1209, -0.4832],
        [ 0.1404, -0.3815, -0.4872,  ..., -0.4215,  0.1270, -0.4748]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60,  17, 170, 102, 167,  73, 104,  19], device='cuda:0')


67it [00:32,  2.03it/s]

tensor([[ 0.1361, -0.3587, -0.4944,  ..., -0.4301,  0.1446, -0.4411],
        [ 0.1450, -0.3499, -0.4851,  ..., -0.4208,  0.1535, -0.4324],
        [ 0.1450, -0.3499, -0.4851,  ..., -0.4208,  0.1535, -0.4324],
        ...,
        [ 0.1450, -0.3499, -0.4851,  ..., -0.4208,  0.1535, -0.4324],
        [ 0.1450, -0.3499, -0.4851,  ..., -0.4208,  0.1535, -0.4324],
        [ 0.1223, -0.3812, -0.4961,  ..., -0.4504,  0.1280, -0.4661]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([162,  26, 162,  84,  20, 104, 124,  77], device='cuda:0')


68it [00:33,  2.03it/s]

tensor([[ 0.1311, -0.3435, -0.4959,  ..., -0.4411,  0.1583, -0.4167],
        [ 0.1365, -0.3383, -0.4903,  ..., -0.4354,  0.1636, -0.4114],
        [ 0.1361, -0.3387, -0.4904,  ..., -0.4359,  0.1631, -0.4119],
        ...,
        [ 0.1365, -0.3383, -0.4903,  ..., -0.4354,  0.1636, -0.4114],
        [ 0.1352, -0.3395, -0.4914,  ..., -0.4367,  0.1623, -0.4126],
        [ 0.1365, -0.3383, -0.4904,  ..., -0.4354,  0.1635, -0.4114]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 96, 141, 165,  44,  19, 108, 151,  47], device='cuda:0')


69it [00:33,  2.03it/s]

tensor([[ 0.1216, -0.3342, -0.5011,  ..., -0.4559,  0.1656, -0.3991],
        [ 0.1264, -0.3295, -0.4968,  ..., -0.4506,  0.1704, -0.3941],
        [ 0.1264, -0.3295, -0.4968,  ..., -0.4506,  0.1704, -0.3941],
        ...,
        [ 0.1173, -0.3382, -0.5063,  ..., -0.4601,  0.1614, -0.4030],
        [ 0.1264, -0.3295, -0.4968,  ..., -0.4506,  0.1704, -0.3941],
        [ 0.1141, -0.3452, -0.5025,  ..., -0.4659,  0.1570, -0.4127]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 46, 128,  16,  24,  56,  38,   5, 113], device='cuda:0')


70it [00:34,  2.03it/s]

tensor([[ 0.1147, -0.3234, -0.5045,  ..., -0.4662,  0.1742, -0.3801],
        [ 0.1147, -0.3234, -0.5045,  ..., -0.4662,  0.1742, -0.3801],
        [ 0.1147, -0.3234, -0.5045,  ..., -0.4662,  0.1742, -0.3801],
        ...,
        [ 0.1143, -0.3238, -0.5050,  ..., -0.4666,  0.1738, -0.3805],
        [ 0.1079, -0.3310, -0.5083,  ..., -0.4746,  0.1668, -0.3888],
        [ 0.1147, -0.3234, -0.5045,  ..., -0.4662,  0.1742, -0.3801]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 74, 150,  29, 157,  70, 128, 146, 167], device='cuda:0')


71it [00:34,  2.03it/s]

tensor([[ 0.1018, -0.3197, -0.5133,  ..., -0.4822,  0.1753, -0.3692],
        [ 0.0873, -0.3364, -0.5204,  ..., -0.5002,  0.1599, -0.3901],
        [ 0.0952, -0.3272, -0.5160,  ..., -0.4913,  0.1679, -0.3785],
        ...,
        [ 0.0968, -0.3247, -0.5184,  ..., -0.4872,  0.1704, -0.3741],
        [ 0.0985, -0.3229, -0.5162,  ..., -0.4857,  0.1723, -0.3726],
        [ 0.1018, -0.3197, -0.5133,  ..., -0.4822,  0.1753, -0.3692]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 69, 116,  62,   9, 141, 144,  28, 130], device='cuda:0')


72it [00:35,  2.03it/s]

tensor([[ 0.0877, -0.3183, -0.5231,  ..., -0.4986,  0.1739, -0.3612],
        [ 0.0877, -0.3183, -0.5231,  ..., -0.4986,  0.1739, -0.3612],
        [ 0.0867, -0.3193, -0.5241,  ..., -0.4996,  0.1729, -0.3622],
        ...,
        [ 0.0877, -0.3183, -0.5231,  ..., -0.4986,  0.1739, -0.3612],
        [ 0.0877, -0.3183, -0.5231,  ..., -0.4986,  0.1739, -0.3612],
        [ 0.0877, -0.3183, -0.5231,  ..., -0.4986,  0.1739, -0.3612]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 14,  42,  60,  90, 166, 130, 155, 118], device='cuda:0')


73it [00:35,  2.03it/s]

tensor([[ 0.0725, -0.3190, -0.5338,  ..., -0.5153,  0.1702, -0.3558],
        [ 0.0719, -0.3196, -0.5343,  ..., -0.5161,  0.1696, -0.3566],
        [ 0.0725, -0.3190, -0.5338,  ..., -0.5153,  0.1702, -0.3558],
        ...,
        [ 0.0725, -0.3190, -0.5338,  ..., -0.5153,  0.1702, -0.3558],
        [ 0.0725, -0.3190, -0.5338,  ..., -0.5153,  0.1702, -0.3558],
        [ 0.0725, -0.3190, -0.5338,  ..., -0.5153,  0.1702, -0.3558]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([158,  88, 167, 167, 120,  92, 145,  43], device='cuda:0')


74it [00:36,  2.03it/s]

tensor([[ 0.0564, -0.3216, -0.5453,  ..., -0.5323,  0.1644, -0.3528],
        [ 0.0564, -0.3216, -0.5453,  ..., -0.5323,  0.1644, -0.3528],
        [ 0.0564, -0.3216, -0.5453,  ..., -0.5323,  0.1644, -0.3528],
        ...,
        [ 0.0544, -0.3234, -0.5472,  ..., -0.5342,  0.1625, -0.3547],
        [ 0.0529, -0.3253, -0.5480,  ..., -0.5368,  0.1614, -0.3571],
        [ 0.0482, -0.3300, -0.5509,  ..., -0.5429,  0.1568, -0.3634]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 30,  75,  19, 119,  53,  92,  72,  79], device='cuda:0')


75it [00:36,  2.03it/s]

tensor([[ 0.0394, -0.3258, -0.5575,  ..., -0.5495,  0.1566, -0.3520],
        [ 0.0394, -0.3258, -0.5575,  ..., -0.5495,  0.1566, -0.3520],
        [ 0.0394, -0.3258, -0.5575,  ..., -0.5495,  0.1566, -0.3520],
        ...,
        [ 0.0394, -0.3258, -0.5575,  ..., -0.5495,  0.1566, -0.3520],
        [ 0.0390, -0.3263, -0.5579,  ..., -0.5500,  0.1562, -0.3524],
        [ 0.0394, -0.3258, -0.5575,  ..., -0.5495,  0.1566, -0.3520]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 13,  33,  89, 163, 139,  43,  33, 160], device='cuda:0')


76it [00:37,  2.03it/s]

tensor([[ 0.0217, -0.3317, -0.5703,  ..., -0.5669,  0.1472, -0.3531],
        [ 0.0217, -0.3317, -0.5703,  ..., -0.5669,  0.1472, -0.3531],
        [ 0.0217, -0.3317, -0.5703,  ..., -0.5669,  0.1472, -0.3531],
        ...,
        [ 0.0155, -0.3386, -0.5773,  ..., -0.5742,  0.1427, -0.3603],
        [ 0.0217, -0.3317, -0.5703,  ..., -0.5669,  0.1472, -0.3531],
        [ 0.0162, -0.3376, -0.5733,  ..., -0.5733,  0.1414, -0.3613]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60, 147,  40, 164, 166, 162, 104,  57], device='cuda:0')


77it [00:37,  2.02it/s]

tensor([[ 0.0011, -0.3413, -0.5862,  ..., -0.5873,  0.1348, -0.3589],
        [ 0.0034, -0.3390, -0.5838,  ..., -0.5845,  0.1362, -0.3562],
        [ 0.0034, -0.3390, -0.5838,  ..., -0.5845,  0.1362, -0.3562],
        ...,
        [ 0.0034, -0.3390, -0.5838,  ..., -0.5845,  0.1362, -0.3562],
        [ 0.0015, -0.3409, -0.5850,  ..., -0.5870,  0.1346, -0.3584],
        [ 0.0018, -0.3407, -0.5853,  ..., -0.5866,  0.1351, -0.3583]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 30,  33, 163,   9, 110, 147,  39,   6], device='cuda:0')


78it [00:38,  2.03it/s]

tensor([[-0.0155, -0.3475, -0.5978,  ..., -0.6022,  0.1238, -0.3608],
        [-0.0155, -0.3475, -0.5978,  ..., -0.6022,  0.1238, -0.3608],
        [-0.0174, -0.3494, -0.5987,  ..., -0.6045,  0.1220, -0.3632],
        ...,
        [-0.0155, -0.3475, -0.5978,  ..., -0.6022,  0.1238, -0.3608],
        [-0.0155, -0.3475, -0.5978,  ..., -0.6022,  0.1238, -0.3608],
        [-0.0155, -0.3475, -0.5978,  ..., -0.6022,  0.1238, -0.3608]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  9, 125, 155,  18, 141,  97,  72,  23], device='cuda:0')


79it [00:38,  2.03it/s]

tensor([[-0.0350, -0.3573, -0.6122,  ..., -0.6200,  0.1101, -0.3671],
        [-0.0350, -0.3573, -0.6122,  ..., -0.6200,  0.1101, -0.3671],
        [-0.0350, -0.3573, -0.6122,  ..., -0.6200,  0.1101, -0.3671],
        ...,
        [-0.0433, -0.3655, -0.6181,  ..., -0.6308,  0.1034, -0.3770],
        [-0.0350, -0.3573, -0.6122,  ..., -0.6200,  0.1101, -0.3671],
        [-0.0398, -0.3623, -0.6162,  ..., -0.6262,  0.1064, -0.3729]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 33,  83,  80,  59, 142,  52,  70,  62], device='cuda:0')


80it [00:39,  2.02it/s]

tensor([[-0.0548, -0.3681, -0.6270,  ..., -0.6379,  0.0952, -0.3746],
        [-0.0590, -0.3726, -0.6315,  ..., -0.6425,  0.0914, -0.3792],
        [-0.0548, -0.3681, -0.6270,  ..., -0.6379,  0.0952, -0.3746],
        ...,
        [-0.0548, -0.3681, -0.6270,  ..., -0.6379,  0.0952, -0.3746],
        [-0.0548, -0.3681, -0.6270,  ..., -0.6379,  0.0952, -0.3746],
        [-0.0551, -0.3685, -0.6274,  ..., -0.6383,  0.0951, -0.3750]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([164, 107, 121,  80,   1,  38, 133,  66], device='cuda:0')


81it [00:39,  2.03it/s]

tensor([[-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835],
        [-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835],
        [-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835],
        ...,
        [-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835],
        [-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835],
        [-0.0750, -0.3498, -0.6422,  ..., -0.6558,  0.0794, -0.3835]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([117,  37,  52, 165, 127, 168, 121, 124], device='cuda:0')


82it [00:40,  2.03it/s]

tensor([[-0.0955, -0.3351, -0.6578,  ..., -0.6391,  0.0626, -0.3935],
        [-0.0955, -0.3351, -0.6578,  ..., -0.6391,  0.0626, -0.3935],
        [-0.0955, -0.3351, -0.6578,  ..., -0.6391,  0.0626, -0.3935],
        ...,
        [-0.0971, -0.3365, -0.6581,  ..., -0.6412,  0.0609, -0.3955],
        [-0.0955, -0.3351, -0.6578,  ..., -0.6391,  0.0626, -0.3935],
        [-0.0955, -0.3351, -0.6578,  ..., -0.6391,  0.0626, -0.3935]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 28, 136, 157, 135, 142,  12, 126,  66], device='cuda:0')


83it [00:40,  2.03it/s]

tensor([[-0.1187, -0.3266, -0.6763,  ..., -0.6286,  0.0430, -0.4075],
        [-0.1162, -0.3237, -0.6736,  ..., -0.6255,  0.0451, -0.4045],
        [-0.1162, -0.3237, -0.6736,  ..., -0.6255,  0.0451, -0.4045],
        ...,
        [-0.1184, -0.3260, -0.6758,  ..., -0.6278,  0.0428, -0.4068],
        [-0.1162, -0.3237, -0.6736,  ..., -0.6255,  0.0451, -0.4045],
        [-0.1162, -0.3237, -0.6736,  ..., -0.6255,  0.0451, -0.4045]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([47, 81, 82, 16, 85, 42,  5, 36], device='cuda:0')


84it [00:41,  2.03it/s]

tensor([[-0.1394, -0.3178, -0.6920,  ..., -0.6174,  0.0245, -0.4189],
        [-0.1376, -0.3159, -0.6901,  ..., -0.6154,  0.0263, -0.4170],
        [-0.1373, -0.3155, -0.6897,  ..., -0.6151,  0.0267, -0.4167],
        ...,
        [-0.1371, -0.3153, -0.6896,  ..., -0.6148,  0.0269, -0.4165],
        [-0.1371, -0.3153, -0.6896,  ..., -0.6148,  0.0269, -0.4165],
        [-0.1371, -0.3153, -0.6896,  ..., -0.6148,  0.0269, -0.4165]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 80, 127,   1,   1,  89, 120,  55, 128], device='cuda:0')


85it [00:41,  2.03it/s]

tensor([[-0.1582, -0.2673, -0.7058,  ..., -0.6067,  0.0080, -0.4292],
        [-0.1617, -0.2701, -0.7071,  ..., -0.6115,  0.0042, -0.4333],
        [-0.1595, -0.2688, -0.7074,  ..., -0.6084,  0.0070, -0.4309],
        ...,
        [-0.1582, -0.2673, -0.7058,  ..., -0.6067,  0.0080, -0.4292],
        [-0.1586, -0.2677, -0.7063,  ..., -0.6072,  0.0076, -0.4297],
        [-0.1582, -0.2673, -0.7058,  ..., -0.6067,  0.0080, -0.4292]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([115, 117,   3,  73,  83,  15, 109,  95], device='cuda:0')


86it [00:42,  2.03it/s]

tensor([[-0.1793, -0.2253, -0.7222,  ..., -0.6010, -0.0113, -0.4427],
        [-0.1793, -0.2253, -0.7222,  ..., -0.6010, -0.0113, -0.4427],
        [-0.1797, -0.2258, -0.7227,  ..., -0.6016, -0.0115, -0.4433],
        ...,
        [-0.1921, -0.2382, -0.7349,  ..., -0.6138, -0.0242, -0.4555],
        [-0.1793, -0.2253, -0.7222,  ..., -0.6010, -0.0113, -0.4427],
        [-0.1793, -0.2253, -0.7222,  ..., -0.6010, -0.0113, -0.4427]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([150,  25,  83,  48,  79,  27,  85,  40], device='cuda:0')


87it [00:42,  2.03it/s]

tensor([[-0.2094, -0.1965, -0.7434,  ..., -0.6090, -0.0403, -0.4669],
        [-0.2005, -0.1889, -0.7388,  ..., -0.5975, -0.0311, -0.4569],
        [-0.2032, -0.1916, -0.7414,  ..., -0.6003, -0.0338, -0.4597],
        ...,
        [-0.2005, -0.1889, -0.7388,  ..., -0.5975, -0.0311, -0.4569],
        [-0.2005, -0.1889, -0.7388,  ..., -0.5975, -0.0311, -0.4569],
        [-0.2005, -0.1889, -0.7388,  ..., -0.5975, -0.0311, -0.4569]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  8, 160,  15,  92, 117, 101, 108, 117], device='cuda:0')


88it [00:43,  2.03it/s]

tensor([[-0.2218, -0.1577, -0.7554,  ..., -0.5961, -0.0513, -0.4717],
        [-0.2294, -0.1655, -0.7629,  ..., -0.6037, -0.0591, -0.4794],
        [-0.2218, -0.1577, -0.7554,  ..., -0.5961, -0.0513, -0.4717],
        ...,
        [-0.2230, -0.1586, -0.7557,  ..., -0.5977, -0.0526, -0.4731],
        [-0.2218, -0.1577, -0.7554,  ..., -0.5961, -0.0513, -0.4717],
        [-0.2289, -0.1650, -0.7624,  ..., -0.6032, -0.0586, -0.4789]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([158, 122, 159, 164,   6,  77,  34, 110], device='cuda:0')


89it [00:43,  2.03it/s]

tensor([[-0.2430, -0.1312, -0.7721,  ..., -0.5964, -0.0718, -0.4869],
        [-0.2430, -0.1312, -0.7721,  ..., -0.5964, -0.0718, -0.4869],
        [-0.2430, -0.1312, -0.7721,  ..., -0.5964, -0.0718, -0.4869],
        ...,
        [-0.2430, -0.1312, -0.7721,  ..., -0.5964, -0.0718, -0.4869],
        [-0.2446, -0.1323, -0.7726,  ..., -0.5985, -0.0735, -0.4887],
        [-0.2653, -0.1536, -0.7940,  ..., -0.6185, -0.0944, -0.5091]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 45,  46,  66,  35, 167,  10,  48,  51], device='cuda:0')


90it [00:44,  2.03it/s]

tensor([[-0.2643, -0.1090, -0.7889,  ..., -0.5984, -0.0925, -0.5026],
        [-0.2643, -0.1090, -0.7889,  ..., -0.5984, -0.0925, -0.5026],
        [-0.2643, -0.1090, -0.7889,  ..., -0.5984, -0.0925, -0.5026],
        ...,
        [-0.2643, -0.1090, -0.7889,  ..., -0.5984, -0.0925, -0.5026],
        [-0.2643, -0.1090, -0.7889,  ..., -0.5984, -0.0925, -0.5026],
        [-0.2654, -0.1101, -0.7900,  ..., -0.5996, -0.0937, -0.5038]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([110,  42, 153, 104, 112, 133,  67, 153], device='cuda:0')


91it [00:44,  2.03it/s]

tensor([[-0.2854, -0.0907, -0.8057,  ..., -0.6019, -0.1134, -0.5187],
        [-0.2907, -0.0948, -0.8078,  ..., -0.6088, -0.1188, -0.5246],
        [-0.2854, -0.0907, -0.8057,  ..., -0.6019, -0.1134, -0.5187],
        ...,
        [-0.2854, -0.0907, -0.8057,  ..., -0.6019, -0.1134, -0.5187],
        [-0.3112, -0.1169, -0.8313,  ..., -0.6280, -0.1390, -0.5448],
        [-0.2854, -0.0907, -0.8057,  ..., -0.6019, -0.1134, -0.5187]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 58,  97,  31,  46,  84,  39, 130, 119], device='cuda:0')


92it [00:45,  2.03it/s]

tensor([[-0.3110, -0.0792, -0.8235,  ..., -0.6129, -0.1392, -0.5402],
        [-0.3166, -0.0866, -0.8326,  ..., -0.6171, -0.1450, -0.5456],
        [-0.3065, -0.0761, -0.8225,  ..., -0.6068, -0.1345, -0.5352],
        ...,
        [-0.3065, -0.0761, -0.8225,  ..., -0.6068, -0.1345, -0.5352],
        [-0.3121, -0.0799, -0.8238,  ..., -0.6144, -0.1403, -0.5415],
        [-0.3166, -0.0830, -0.8248,  ..., -0.6203, -0.1450, -0.5465]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([144,  62,  82,  25, 109,  53,  82,  62], device='cuda:0')


93it [00:45,  2.03it/s]

tensor([[-0.3275, -0.0649, -0.8393,  ..., -0.6130, -0.1556, -0.5519],
        [-0.3309, -0.0684, -0.8428,  ..., -0.6164, -0.1592, -0.5553],
        [-0.3275, -0.0649, -0.8393,  ..., -0.6130, -0.1556, -0.5519],
        ...,
        [-0.3275, -0.0649, -0.8393,  ..., -0.6130, -0.1556, -0.5519],
        [-0.3275, -0.0649, -0.8393,  ..., -0.6130, -0.1556, -0.5519],
        [-0.3275, -0.0649, -0.8393,  ..., -0.6130, -0.1556, -0.5519]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([158,  68,  48,  40, 104, 100,  45,  47], device='cuda:0')


94it [00:46,  2.03it/s]

tensor([[-0.3483, -0.0567, -0.8562,  ..., -0.6203, -0.1769, -0.5689],
        [-0.3483, -0.0567, -0.8562,  ..., -0.6203, -0.1769, -0.5689],
        [-0.3583, -0.0634, -0.8584,  ..., -0.6337, -0.1873, -0.5801],
        ...,
        [-0.3483, -0.0567, -0.8562,  ..., -0.6203, -0.1769, -0.5689],
        [-0.3483, -0.0567, -0.8562,  ..., -0.6203, -0.1769, -0.5689],
        [-0.3483, -0.0567, -0.8562,  ..., -0.6203, -0.1769, -0.5689]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([104,  96,  94, 143,  56, 113,   7, 149], device='cuda:0')


95it [00:46,  2.03it/s]

tensor([[-0.3938, -0.0679, -0.8785,  ..., -0.6619, -0.2240, -0.6138],
        [-0.3902, -0.0659, -0.8785,  ..., -0.6569, -0.2203, -0.6098],
        [-0.3714, -0.0536, -0.8752,  ..., -0.6309, -0.2004, -0.5883],
        ...,
        [-0.3691, -0.0513, -0.8729,  ..., -0.6286, -0.1981, -0.5860],
        [-0.3691, -0.0513, -0.8729,  ..., -0.6286, -0.1981, -0.5860],
        [-0.3691, -0.0513, -0.8729,  ..., -0.6286, -0.1981, -0.5860]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 48, 115,   8, 170, 101, 134,  14, 145], device='cuda:0')


96it [00:47,  2.03it/s]

tensor([[-0.3920, -0.0508, -0.8919,  ..., -0.6402, -0.2217, -0.5726],
        [-0.3980, -0.0555, -0.8950,  ..., -0.6477, -0.2281, -0.5788],
        [-0.4157, -0.0658, -0.8956,  ..., -0.6728, -0.2466, -0.5976],
        ...,
        [-0.3897, -0.0485, -0.8897,  ..., -0.6379, -0.2194, -0.5702],
        [-0.3897, -0.0485, -0.8897,  ..., -0.6379, -0.2194, -0.5702],
        [-0.4251, -0.0724, -0.8983,  ..., -0.6855, -0.2564, -0.6077]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 75,  13, 151, 142,  53, 162, 157,  57], device='cuda:0')


97it [00:47,  2.03it/s]

tensor([[-0.4260, -0.0586, -0.9100,  ..., -0.6693, -0.2572, -0.5734],
        [-0.4336, -0.0641, -0.9126,  ..., -0.6792, -0.2652, -0.5810],
        [-0.4738, -0.1019, -0.9479,  ..., -0.7219, -0.3069, -0.6210],
        ...,
        [-0.4360, -0.0738, -0.9326,  ..., -0.6731, -0.2665, -0.5827],
        [-0.4197, -0.0552, -0.9102,  ..., -0.6602, -0.2506, -0.5672],
        [-0.4222, -0.0561, -0.9091,  ..., -0.6644, -0.2533, -0.5697]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([110, 140,  50,  44, 136, 110, 142,  76], device='cuda:0')


98it [00:48,  2.03it/s]

tensor([[-0.4330, -0.0514, -0.9236,  ..., -0.6624, -0.2645, -0.5503],
        [-0.4527, -0.0666, -0.9328,  ..., -0.6869, -0.2849, -0.5692],
        [-0.5109, -0.1050, -0.9449,  ..., -0.7659, -0.3460, -0.6242],
        ...,
        [-0.4303, -0.0497, -0.9230,  ..., -0.6588, -0.2617, -0.5478],
        [-0.4476, -0.0644, -0.9344,  ..., -0.6787, -0.2794, -0.5646],
        [-0.4303, -0.0497, -0.9230,  ..., -0.6588, -0.2617, -0.5478]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 39, 118, 127,  41,  97,  34, 127, 170], device='cuda:0')


99it [00:48,  2.03it/s]

tensor([[-0.4837, -0.0757, -0.9474,  ..., -0.7153, -0.3178, -0.5423],
        [-0.4862, -0.0775, -0.9480,  ..., -0.7188, -0.3205, -0.5446],
        [-0.4504, -0.0532, -0.9395,  ..., -0.6703, -0.2827, -0.5122],
        ...,
        [-0.5037, -0.1021, -0.9827,  ..., -0.7285, -0.3381, -0.5638],
        [-0.4952, -0.0836, -0.9501,  ..., -0.7310, -0.3300, -0.5527],
        [-0.5080, -0.0985, -0.9684,  ..., -0.7411, -0.3429, -0.5659]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([103,   2,  12,  54,  89, 144,  78, 129], device='cuda:0')


100it [00:49,  2.03it/s]

tensor([[-0.5316, -0.1200, -0.9801,  ..., -0.7436, -0.3668, -0.5423],
        [-0.5318, -0.1011, -0.9280,  ..., -0.7658, -0.3688, -0.5353],
        [-0.4702, -0.0585, -0.9183,  ..., -0.6823, -0.3037, -0.4816],
        ...,
        [-0.5359, -0.1037, -0.9280,  ..., -0.7716, -0.3732, -0.5387],
        [-0.4751, -0.0633, -0.9228,  ..., -0.6873, -0.3087, -0.4865],
        [-0.4734, -0.0607, -0.9187,  ..., -0.6866, -0.3070, -0.4844]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  5,  63,   6,  38,  77,  48, 105, 169], device='cuda:0')


101it [00:49,  2.03it/s]

tensor([[-0.5711, -0.1477, -0.9837,  ..., -0.7755, -0.3810, -0.5364],
        [-0.4899, -0.0654, -0.9004,  ..., -0.6948, -0.2975, -0.4557],
        [-0.4899, -0.0654, -0.9004,  ..., -0.6948, -0.2975, -0.4557],
        ...,
        [-0.5677, -0.1286, -0.9325,  ..., -0.7905, -0.3774, -0.5253],
        [-0.5318, -0.0950, -0.9041,  ..., -0.7518, -0.3415, -0.4911],
        [-0.5265, -0.0917, -0.9050,  ..., -0.7440, -0.3358, -0.4868]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([145,  39,  50, 158,  65,   7, 155, 132], device='cuda:0')


102it [00:50,  2.03it/s]

tensor([[-0.5619, -0.1114, -0.8860,  ..., -0.7802, -0.3487, -0.4773],
        [-0.5166, -0.0789, -0.8856,  ..., -0.7178, -0.3013, -0.4399],
        [-0.5686, -0.1165, -0.8875,  ..., -0.7887, -0.3555, -0.4830],
        ...,
        [-0.6783, -0.2353, -1.0250,  ..., -0.8877, -0.4670, -0.5958],
        [-0.5094, -0.0737, -0.8856,  ..., -0.7078, -0.2937, -0.4339],
        [-0.5837, -0.1275, -0.8876,  ..., -0.8098, -0.3712, -0.4956]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 79, 111,  20,  44,  15,  37, 129,  39], device='cuda:0')


103it [00:50,  2.03it/s]

tensor([[-0.6724, -0.1893, -0.8677,  ..., -0.9204, -0.4421, -0.5337],
        [-0.5286, -0.0833, -0.8736,  ..., -0.7212, -0.2921, -0.4160],
        [-0.6037, -0.1387, -0.8704,  ..., -0.8253, -0.3705, -0.4775],
        ...,
        [-0.7611, -0.2969, -1.0230,  ..., -0.9854, -0.5323, -0.6332],
        [-0.5823, -0.1229, -0.8713,  ..., -0.7956, -0.3481, -0.4600],
        [-0.5286, -0.0833, -0.8736,  ..., -0.7212, -0.2921, -0.4160]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 60, 110,  27,  64,   7, 138,   3,  17], device='cuda:0')


104it [00:51,  2.01it/s]

tensor([[-0.6716, -0.1925, -0.8698,  ..., -0.9027, -0.4218, -0.5063],
        [-0.6242, -0.1525, -0.8581,  ..., -0.8420, -0.3725, -0.4647],
        [-0.5540, -0.0990, -0.8637,  ..., -0.7439, -0.2991, -0.4070],
        ...,
        [-0.6499, -0.1722, -0.8560,  ..., -0.8780, -0.3995, -0.4859],
        [-0.5654, -0.1077, -0.8628,  ..., -0.7599, -0.3110, -0.4163],
        [-0.5808, -0.1194, -0.8616,  ..., -0.7813, -0.3270, -0.4289]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 37, 100, 108, 140,  85,  88, 100,  13], device='cuda:0')


105it [00:51,  2.01it/s]

tensor([[-0.6836, -0.2175, -0.9369,  ..., -0.8814, -0.4160, -0.5026],
        [-0.6463, -0.1797, -0.8990,  ..., -0.8439, -0.3777, -0.4653],
        [-0.5664, -0.1058, -0.8573,  ..., -0.7488, -0.2944, -0.3905],
        ...,
        [-0.5664, -0.1058, -0.8573,  ..., -0.7488, -0.2944, -0.3905],
        [-0.6902, -0.2068, -0.8569,  ..., -0.9195, -0.4249, -0.4965],
        [-0.5905, -0.1250, -0.8547,  ..., -0.7829, -0.3198, -0.4107]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  0, 111,  77, 133, 104, 149,  46,  11], device='cuda:0')


106it [00:52,  2.00it/s]

tensor([[-0.6736, -0.2510, -0.9843,  ..., -0.8932, -0.4308, -0.5131],
        [-0.5937, -0.1527, -0.8472,  ..., -0.8223, -0.3422, -0.4179],
        [-0.6175, -0.1746, -0.8463,  ..., -0.8585, -0.3693, -0.4404],
        ...,
        [-0.6240, -0.1831, -0.8606,  ..., -0.8630, -0.3767, -0.4485],
        [-0.5780, -0.1386, -0.8494,  ..., -0.7978, -0.3239, -0.4032],
        [-0.5715, -0.1327, -0.8503,  ..., -0.7876, -0.3164, -0.3971]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([163, 115,  76,  41,  61,   8, 128, 120], device='cuda:0')


107it [00:52,  2.01it/s]

tensor([[-0.5995, -0.1846, -0.8410,  ..., -0.8662, -0.3693, -0.4309],
        [-0.5526, -0.1372, -0.8488,  ..., -0.7861, -0.3096, -0.3822],
        [-0.5475, -0.1320, -0.8496,  ..., -0.7774, -0.3031, -0.3769],
        ...,
        [-0.5922, -0.1893, -0.9067,  ..., -0.8335, -0.3603, -0.4337],
        [-0.5871, -0.1802, -0.8844,  ..., -0.8325, -0.3540, -0.4251],
        [-0.5857, -0.1706, -0.8433,  ..., -0.8426, -0.3517, -0.4166]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 40, 167,  96,  23,  90,  20, 131, 142], device='cuda:0')


108it [00:53,  2.01it/s]

tensor([[-0.5754, -0.1842, -0.8422,  ..., -0.8548, -0.3563, -0.4127],
        [-0.5935, -0.2046, -0.8388,  ..., -0.8883, -0.3814, -0.4334],
        [-0.6950, -0.3605, -1.0614,  ..., -1.0025, -0.5234, -0.5875],
        ...,
        [-0.5416, -0.1462, -0.8486,  ..., -0.7920, -0.3095, -0.3738],
        [-0.5416, -0.1462, -0.8486,  ..., -0.7920, -0.3095, -0.3738],
        [-0.5641, -0.1715, -0.8443,  ..., -0.8337, -0.3406, -0.3996]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([37, 75, 13, 21, 45, 96, 40, 12], device='cuda:0')


109it [00:53,  2.02it/s]

tensor([[-0.5378, -0.1610, -0.8491,  ..., -0.8067, -0.3171, -0.3730],
        [-0.5799, -0.2131, -0.8403,  ..., -0.8910, -0.3800, -0.4260],
        [-0.5406, -0.1644, -0.8485,  ..., -0.8123, -0.3213, -0.3766],
        ...,
        [-0.5639, -0.1998, -0.8872,  ..., -0.8447, -0.3555, -0.4118],
        [-0.6119, -0.2737, -0.9601,  ..., -0.9168, -0.4286, -0.4855],
        [-0.5462, -0.1720, -0.8584,  ..., -0.8183, -0.3283, -0.3840]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([104,  28,  56, 158, 127,  17,  67, 110], device='cuda:0')


110it [00:54,  2.01it/s]

tensor([[-0.5870, -0.2437, -0.8699,  ..., -0.9131, -0.4009, -0.4420],
        [-0.5360, -0.1763, -0.8511,  ..., -0.8215, -0.3257, -0.3743],
        [-0.5360, -0.1763, -0.8511,  ..., -0.8215, -0.3257, -0.3743],
        ...,
        [-0.5459, -0.1896, -0.8488,  ..., -0.8427, -0.3416, -0.3878],
        [-0.5360, -0.1763, -0.8511,  ..., -0.8215, -0.3257, -0.3743],
        [-0.5360, -0.1763, -0.8511,  ..., -0.8215, -0.3257, -0.3743]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  9,  61, 142,   2, 162,  90, 160, 170], device='cuda:0')


111it [00:54,  2.02it/s]

tensor([[-0.5386, -0.1960, -0.8199,  ..., -0.8426, -0.3400, -0.3526],
        [-0.5437, -0.2036, -0.8317,  ..., -0.8478, -0.3468, -0.3601],
        [-0.5932, -0.2897, -0.9133,  ..., -0.9338, -0.4324, -0.4474],
        ...,
        [-0.6089, -0.3157, -0.9416,  ..., -0.9581, -0.4580, -0.4737],
        [-0.5359, -0.1921, -0.8206,  ..., -0.8364, -0.3354, -0.3486],
        [-0.5826, -0.2746, -0.9011,  ..., -0.9172, -0.4168, -0.4322]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([155, 169,  46,  46, 117, 115,   7,  34], device='cuda:0')


112it [00:55,  2.02it/s]

tensor([[-0.5374, -0.2083, -0.7943,  ..., -0.8514, -0.3198, -0.3272],
        [-0.5631, -0.2446, -0.8296,  ..., -0.8879, -0.3551, -0.3640],
        [-0.7644, -0.5983, -1.1745,  ..., -1.2371, -0.7016, -0.7250],
        ...,
        [-0.5374, -0.2083, -0.7943,  ..., -0.8514, -0.3198, -0.3272],
        [-0.5374, -0.2083, -0.7943,  ..., -0.8514, -0.3198, -0.3272],
        [-0.5374, -0.2083, -0.7943,  ..., -0.8514, -0.3198, -0.3272]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([122,  59, 143, 145,  89, 156,   6, 136], device='cuda:0')


113it [00:55,  2.02it/s]

tensor([[-0.5403, -0.2248, -0.7720,  ..., -0.8665, -0.3074, -0.3098],
        [-0.5621, -0.2549, -0.8015,  ..., -0.8976, -0.3369, -0.3411],
        [-0.5403, -0.2248, -0.7720,  ..., -0.8665, -0.3074, -0.3098],
        ...,
        [-0.7439, -0.5819, -1.1208,  ..., -1.2234, -0.6557, -0.6784],
        [-0.5403, -0.2248, -0.7720,  ..., -0.8665, -0.3074, -0.3098],
        [-0.5465, -0.2312, -0.7783,  ..., -0.8733, -0.3138, -0.3164]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([157, 116,  34,  71,  72,  69,  51, 114], device='cuda:0')


114it [00:56,  2.02it/s]

tensor([[-0.5446, -0.2416, -0.7533,  ..., -0.8816, -0.2980, -0.2961],
        [-0.5622, -0.2618, -0.7732,  ..., -0.9031, -0.3176, -0.3172],
        [-0.5596, -0.2627, -0.7740,  ..., -0.9035, -0.3183, -0.3183],
        ...,
        [-0.5446, -0.2416, -0.7533,  ..., -0.8816, -0.2980, -0.2961],
        [-0.5446, -0.2416, -0.7533,  ..., -0.8816, -0.2980, -0.2961],
        [-0.6290, -0.3927, -0.9012,  ..., -1.0346, -0.4451, -0.4548]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([  1,   2, 161, 162,  75, 126, 159,  89], device='cuda:0')


115it [00:56,  2.02it/s]

tensor([[-0.5611, -0.2449, -0.7174,  ..., -0.9087, -0.3018, -0.2974],
        [-0.5500, -0.2337, -0.7080,  ..., -0.8967, -0.2913, -0.2856],
        [-0.5616, -0.2454, -0.7181,  ..., -0.9093, -0.3020, -0.2979],
        ...,
        [-0.5598, -0.2435, -0.7163,  ..., -0.9072, -0.3004, -0.2959],
        [-0.5529, -0.2366, -0.7105,  ..., -0.8999, -0.2940, -0.2887],
        [-0.5500, -0.2337, -0.7080,  ..., -0.8967, -0.2913, -0.2856]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([133,  40, 102, 123,  89, 127,  84,  36], device='cuda:0')


116it [00:57,  2.03it/s]

tensor([[-0.5597, -0.2315, -0.6709,  ..., -0.9153, -0.2899, -0.2817],
        [-0.5565, -0.2283, -0.6686,  ..., -0.9118, -0.2869, -0.2782],
        [-0.5565, -0.2283, -0.6686,  ..., -0.9118, -0.2869, -0.2782],
        ...,
        [-0.7545, -0.5884, -0.9879,  ..., -1.2760, -0.6275, -0.6633],
        [-0.5565, -0.2283, -0.6686,  ..., -0.9118, -0.2869, -0.2782],
        [-0.5565, -0.2283, -0.6686,  ..., -0.9118, -0.2869, -0.2782]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 64,   5,  76, 136, 135, 109, 162, 114], device='cuda:0')


117it [00:57,  2.03it/s]

tensor([[-0.5884, -0.2503, -0.6517,  ..., -0.9536, -0.3069, -0.3007],
        [-0.6357, -0.3615, -0.7471,  ..., -1.0653, -0.4131, -0.4223],
        [-0.6183, -0.2821, -0.6704,  ..., -0.9872, -0.3350, -0.3352],
        ...,
        [-0.5639, -0.2251, -0.6344,  ..., -0.9269, -0.2849, -0.2735],
        [-0.7217, -0.4759, -0.8321,  ..., -1.1844, -0.5196, -0.5465],
        [-0.5693, -0.2307, -0.6377,  ..., -0.9328, -0.2899, -0.2796]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 97,  86,   8,  96,  60, 117, 133,  96], device='cuda:0')


118it [00:58,  2.03it/s]

tensor([[-0.5750, -0.2270, -0.6065,  ..., -0.9450, -0.2874, -0.2746],
        [-0.6137, -0.2677, -0.6300,  ..., -0.9879, -0.3223, -0.3194],
        [-0.5978, -0.2520, -0.6194,  ..., -0.9713, -0.3096, -0.3026],
        ...,
        [-0.6906, -0.4404, -0.7697,  ..., -1.1620, -0.4862, -0.5114],
        [-0.6055, -0.2589, -0.6228,  ..., -0.9787, -0.3155, -0.3102],
        [-0.5854, -0.2379, -0.6132,  ..., -0.9565, -0.2968, -0.2865]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([130,  19,  58,  38,  61, 153, 135, 154], device='cuda:0')


119it [00:58,  2.03it/s]

tensor([[-0.5880, -0.2321, -0.5832,  ..., -0.9644, -0.2927, -0.2795],
        [-0.6692, -0.3295, -0.6313,  ..., -1.0657, -0.3787, -0.3911],
        [-0.5813, -0.2250, -0.5802,  ..., -0.9569, -0.2865, -0.2714],
        ...,
        [-0.5813, -0.2250, -0.5802,  ..., -0.9569, -0.2865, -0.2714],
        [-0.5813, -0.2250, -0.5802,  ..., -0.9569, -0.2865, -0.2714],
        [-0.9887, -0.9015, -1.0573,  ..., -1.6453, -0.9027, -1.0321]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 61,  73, 133,  10, 110,  84, 169,   2], device='cuda:0')


120it [00:59,  2.03it/s]

tensor([[-0.6320, -0.2833, -0.5522,  ..., -1.0293, -0.3144, -0.3388],
        [-0.5911, -0.2276, -0.5314,  ..., -0.9718, -0.2647, -0.2734],
        [-0.6106, -0.2488, -0.5381,  ..., -0.9939, -0.2830, -0.2981],
        ...,
        [-0.6719, -0.3778, -0.6147,  ..., -1.1241, -0.4025, -0.4465],
        [-0.5911, -0.2276, -0.5314,  ..., -0.9718, -0.2647, -0.2734],
        [-0.6893, -0.3863, -0.6074,  ..., -1.1340, -0.4089, -0.4576]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([132,  67, 157,  25,  61,  48,  41, 140], device='cuda:0')


121it [00:59,  2.03it/s]

tensor([[-0.6014, -0.2317, -0.4889,  ..., -0.9866, -0.2467, -0.2774],
        [-0.6014, -0.2317, -0.4889,  ..., -0.9866, -0.2467, -0.2774],
        [-0.6014, -0.2317, -0.4889,  ..., -0.9866, -0.2467, -0.2774],
        ...,
        [-0.7266, -0.4098, -0.5414,  ..., -1.1685, -0.4052, -0.4880],
        [-0.7603, -0.4770, -0.5675,  ..., -1.2365, -0.4673, -0.5679],
        [-0.6832, -0.3638, -0.5314,  ..., -1.1213, -0.3660, -0.4344]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 92,  23,  95,  38,  68, 154,   9, 140], device='cuda:0')


122it [01:00,  2.03it/s]

tensor([[-0.6123, -0.2372, -0.4523,  ..., -1.0013, -0.2323, -0.2830],
        [-0.6779, -0.3393, -0.4719,  ..., -1.1049, -0.3239, -0.4066],
        [-0.6123, -0.2372, -0.4523,  ..., -1.0013, -0.2323, -0.2830],
        ...,
        [-0.8155, -0.5117, -0.4939,  ..., -1.2807, -0.4738, -0.6146],
        [-0.6123, -0.2372, -0.4523,  ..., -1.0013, -0.2323, -0.2830],
        [-0.6871, -0.3322, -0.4617,  ..., -1.0981, -0.3158, -0.3987]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([119,  21,  19,  58,  54, 117,  55, 160], device='cuda:0')


123it [01:00,  2.02it/s]

tensor([[-0.6904, -0.3281, -0.4231,  ..., -1.1010, -0.2942, -0.3936],
        [-0.6963, -0.3277, -0.4178,  ..., -1.1009, -0.2940, -0.3942],
        [-0.6237, -0.2440, -0.4210,  ..., -1.0159, -0.2212, -0.2901],
        ...,
        [-0.7334, -0.4127, -0.4402,  ..., -1.1860, -0.3717, -0.4964],
        [-0.6237, -0.2440, -0.4210,  ..., -1.0159, -0.2212, -0.2901],
        [-0.6408, -0.2637, -0.4203,  ..., -1.0359, -0.2383, -0.3145]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 53,   4, 162, 120, 127,  85, 162, 170], device='cuda:0')


124it [01:01,  2.00it/s]

tensor([[-0.6377, -0.2546, -0.3945,  ..., -1.0331, -0.2153, -0.2728],
        [-0.6656, -0.2930, -0.3937,  ..., -1.0717, -0.2494, -0.3178],
        [-0.7225, -0.4002, -0.4050,  ..., -1.1789, -0.3465, -0.4445],
        ...,
        [-0.7418, -0.3980, -0.3917,  ..., -1.1771, -0.3425, -0.4408],
        [-0.6355, -0.2520, -0.3947,  ..., -1.0305, -0.2130, -0.2697],
        [-0.6853, -0.3154, -0.3912,  ..., -1.0942, -0.2688, -0.3438]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([139,  67, 155, 100,  22,  91, 142,  16], device='cuda:0')


125it [01:01,  2.01it/s]

tensor([[-0.6564, -0.2714, -0.3715,  ..., -1.0553, -0.2166, -0.2648],
        [-0.6560, -0.2709, -0.3715,  ..., -1.0549, -0.2162, -0.2643],
        [-0.7594, -0.4459, -0.3749,  ..., -1.2293, -0.3749, -0.4642],
        ...,
        [-0.6477, -0.2610, -0.3729,  ..., -1.0449, -0.2075, -0.2534],
        [-0.6477, -0.2610, -0.3729,  ..., -1.0449, -0.2075, -0.2534],
        [-0.6477, -0.2610, -0.3729,  ..., -1.0449, -0.2075, -0.2534]],
       device='cuda:0', grad_fn=<AddmmBackward>) tensor([ 67,  14,  43,  68, 137, 140, 141,  91], device='cuda:0')


In [35]:
DEVICE

device(type='cuda')