# Dataloading 01

In this notebook, we'll figure out how to use PyTorch's DataLoader class to load our massive files without reading the entirety of them into memory

In [3]:
import comet_ml
import dask.dataframe as dd
import pandas as pd 
import torch
import linecache 
import csv
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import torch.nn.functional as F
import sys, os
from pathlib import Path
import plotly.express as px 
from sklearn.utils.class_weight import compute_class_weight
import torch

sys.path.append('../src')
sys.path.append('../src/models/lib')
here = Path().cwd()

We'll first design a custom dataset to use with PyTorch's `DataLoader` class

In [5]:
from models.lib.neural import *

In [6]:
primary = GeneExpressionData(
    filename='../data/processed/primary.csv',
    labelname='../data/processed/meta_primary_labels.csv',
    class_label='Subtype'
)

In [8]:
class Model(nn.Module):
    def __init__(self, inputs, outputs):
        super(Model, self).__init__()

        layers = [
            nn.Linear(1024, 1024),
            nn.ReLU(),
            # nn.Dropout(0.5),
            nn.BatchNorm1d(1024, 1024),
        ]

        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(inputs, 1024),
            *layers,
            nn.Linear(1024, outputs),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [11]:
model = Model(
    inputs = primary.num_features(),
    outputs = primary.num_labels(),
)

In [19]:
train_size = int(0.80 * len(primary))
test_size = len(primary) - train_size

train, test = torch.utils.data.random_split(primary, [train_size, test_size])
trainloader = DataLoader(
    train, 
    batch_size=2,
)

validloader = DataLoader(
    test, 
    batch_size=2, 
)

In [23]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)


In [25]:
epochs = 5
min_valid_loss = np.inf

for e in range(epochs):
    train_loss = 0.0
    model.train()     # Optional when not using Model Specific layer
    for data, labels in trainloader:
        optimizer.zero_grad()
        target = model(data)
        loss = criterion(target,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    valid_loss = 0.0
    model.eval() # Optional when not using Model Specific layer
    for data, labels in validloader:
        target = model(data)
        loss = criterion(target,labels)
        valid_loss = loss.item() * data.size(0)

    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainloader)} \t\t Validation Loss: {valid_loss / len(validloader)}')
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model.pth')

tensor(3.3497, grad_fn=<NllLossBackward0>)
tensor(3.4142, grad_fn=<NllLossBackward0>)
tensor(3.3813, grad_fn=<NllLossBackward0>)
tensor(3.3865, grad_fn=<NllLossBackward0>)
tensor(3.4073, grad_fn=<NllLossBackward0>)
tensor(3.3748, grad_fn=<NllLossBackward0>)
tensor(3.3703, grad_fn=<NllLossBackward0>)
tensor(3.4311, grad_fn=<NllLossBackward0>)
tensor(3.3937, grad_fn=<NllLossBackward0>)
tensor(3.3895, grad_fn=<NllLossBackward0>)
tensor(3.3704, grad_fn=<NllLossBackward0>)
tensor(3.3707, grad_fn=<NllLossBackward0>)
tensor(3.4057, grad_fn=<NllLossBackward0>)
tensor(3.3983, grad_fn=<NllLossBackward0>)
tensor(3.3866, grad_fn=<NllLossBackward0>)
tensor(3.3999, grad_fn=<NllLossBackward0>)
tensor(3.4105, grad_fn=<NllLossBackward0>)
tensor(3.3919, grad_fn=<NllLossBackward0>)
tensor(3.3900, grad_fn=<NllLossBackward0>)
tensor(3.3581, grad_fn=<NllLossBackward0>)
tensor(3.3828, grad_fn=<NllLossBackward0>)
tensor(3.3731, grad_fn=<NllLossBackward0>)
tensor(3.3738, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(3.3541, grad_fn=<NllLossBackward0>)
tensor(3.4143, grad_fn=<NllLossBackward0>)
tensor(3.4109, grad_fn=<NllLossBackward0>)
tensor(3.3859, grad_fn=<NllLossBackward0>)
tensor(3.3671, grad_fn=<NllLossBackward0>)
tensor(3.3136, grad_fn=<NllLossBackward0>)
tensor(3.3513, grad_fn=<NllLossBackward0>)
tensor(3.3585, grad_fn=<NllLossBackward0>)
tensor(3.3640, grad_fn=<NllLossBackward0>)
tensor(3.3557, grad_fn=<NllLossBackward0>)
tensor(3.3499, grad_fn=<NllLossBackward0>)
tensor(3.3748, grad_fn=<NllLossBackward0>)
tensor(3.3777, grad_fn=<NllLossBackward0>)
tensor(3.3579, grad_fn=<NllLossBackward0>)
tensor(3.3933, grad_fn=<NllLossBackward0>)
tensor(3.3531, grad_fn=<NllLossBackward0>)
tensor(3.3352, grad_fn=<NllLossBackward0>)
tensor(3.3612, grad_fn=<NllLossBackward0>)
tensor(3.3883, grad_fn=<NllLossBackward0>)
tensor(3.3565, grad_fn=<NllLossBackward0>)
tensor(3.3556, grad_fn=<NllLossBackward0>)
tensor(3.3393, grad_fn=<NllLossBackward0>)
tensor(3.4001, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(3.3493, grad_fn=<NllLossBackward0>)
tensor(3.3040, grad_fn=<NllLossBackward0>)
tensor(3.3192, grad_fn=<NllLossBackward0>)
tensor(3.3744, grad_fn=<NllLossBackward0>)
tensor(3.3718, grad_fn=<NllLossBackward0>)
tensor(3.3504, grad_fn=<NllLossBackward0>)
tensor(3.3348, grad_fn=<NllLossBackward0>)
tensor(3.3419, grad_fn=<NllLossBackward0>)
tensor(3.3102, grad_fn=<NllLossBackward0>)
tensor(3.3990, grad_fn=<NllLossBackward0>)
tensor(3.3984, grad_fn=<NllLossBackward0>)
tensor(3.3017, grad_fn=<NllLossBackward0>)
tensor(3.3512, grad_fn=<NllLossBackward0>)
tensor(3.3917, grad_fn=<NllLossBackward0>)
tensor(3.3378, grad_fn=<NllLossBackward0>)
tensor(3.3010, grad_fn=<NllLossBackward0>)
tensor(3.3826, grad_fn=<NllLossBackward0>)
tensor(3.3303, grad_fn=<NllLossBackward0>)
tensor(3.3993, grad_fn=<NllLossBackward0>)
tensor(3.3419, grad_fn=<NllLossBackward0>)
tensor(3.3955, grad_fn=<NllLossBackward0>)
tensor(3.3770, grad_fn=<NllLossBackward0>)
tensor(3.3952, grad_fn=<NllLossBackward0>)
tensor(3.33

tensor(3.3945, grad_fn=<NllLossBackward0>)
tensor(3.2781, grad_fn=<NllLossBackward0>)
tensor(3.4013, grad_fn=<NllLossBackward0>)
tensor(3.3872, grad_fn=<NllLossBackward0>)
tensor(3.3891, grad_fn=<NllLossBackward0>)
tensor(3.3787, grad_fn=<NllLossBackward0>)
tensor(3.3559, grad_fn=<NllLossBackward0>)
tensor(3.3117, grad_fn=<NllLossBackward0>)
tensor(3.3114, grad_fn=<NllLossBackward0>)
tensor(3.4067, grad_fn=<NllLossBackward0>)
tensor(3.3396, grad_fn=<NllLossBackward0>)
tensor(3.4316, grad_fn=<NllLossBackward0>)
tensor(3.3444, grad_fn=<NllLossBackward0>)
tensor(3.3273, grad_fn=<NllLossBackward0>)
tensor(3.3514, grad_fn=<NllLossBackward0>)
tensor(3.3722, grad_fn=<NllLossBackward0>)
tensor(3.3954, grad_fn=<NllLossBackward0>)
tensor(3.3790, grad_fn=<NllLossBackward0>)
tensor(3.2757, grad_fn=<NllLossBackward0>)
tensor(3.3900, grad_fn=<NllLossBackward0>)
tensor(3.2428, grad_fn=<NllLossBackward0>)
tensor(3.3161, grad_fn=<NllLossBackward0>)
tensor(3.3403, grad_fn=<NllLossBackward0>)
tensor(3.40

tensor(3.3477, grad_fn=<NllLossBackward0>)
tensor(3.2958, grad_fn=<NllLossBackward0>)
tensor(3.3789, grad_fn=<NllLossBackward0>)
tensor(3.3174, grad_fn=<NllLossBackward0>)
tensor(3.3697, grad_fn=<NllLossBackward0>)
tensor(3.2638, grad_fn=<NllLossBackward0>)
tensor(3.3427, grad_fn=<NllLossBackward0>)
tensor(3.3468, grad_fn=<NllLossBackward0>)
tensor(3.4099, grad_fn=<NllLossBackward0>)
tensor(3.3309, grad_fn=<NllLossBackward0>)
tensor(3.3572, grad_fn=<NllLossBackward0>)
tensor(3.3260, grad_fn=<NllLossBackward0>)
tensor(3.3087, grad_fn=<NllLossBackward0>)
tensor(3.3166, grad_fn=<NllLossBackward0>)
tensor(3.3692, grad_fn=<NllLossBackward0>)
tensor(3.3991, grad_fn=<NllLossBackward0>)
tensor(3.2896, grad_fn=<NllLossBackward0>)
tensor(3.3009, grad_fn=<NllLossBackward0>)
tensor(3.3515, grad_fn=<NllLossBackward0>)
tensor(3.3835, grad_fn=<NllLossBackward0>)
tensor(3.3855, grad_fn=<NllLossBackward0>)
tensor(3.4067, grad_fn=<NllLossBackward0>)
tensor(3.3769, grad_fn=<NllLossBackward0>)
tensor(3.32

tensor(3.3902, grad_fn=<NllLossBackward0>)
tensor(3.3713, grad_fn=<NllLossBackward0>)
tensor(3.4261, grad_fn=<NllLossBackward0>)
tensor(3.2910, grad_fn=<NllLossBackward0>)
tensor(3.3756, grad_fn=<NllLossBackward0>)
tensor(3.3397, grad_fn=<NllLossBackward0>)
tensor(3.3275, grad_fn=<NllLossBackward0>)
tensor(3.4425, grad_fn=<NllLossBackward0>)
tensor(3.3258, grad_fn=<NllLossBackward0>)
tensor(3.3332, grad_fn=<NllLossBackward0>)
tensor(3.3835, grad_fn=<NllLossBackward0>)
tensor(3.2380, grad_fn=<NllLossBackward0>)
tensor(3.3347, grad_fn=<NllLossBackward0>)
tensor(3.3685, grad_fn=<NllLossBackward0>)
tensor(3.3213, grad_fn=<NllLossBackward0>)
tensor(3.2867, grad_fn=<NllLossBackward0>)
tensor(3.4089, grad_fn=<NllLossBackward0>)
tensor(3.3727, grad_fn=<NllLossBackward0>)
tensor(3.3581, grad_fn=<NllLossBackward0>)
tensor(3.3466, grad_fn=<NllLossBackward0>)
tensor(3.2778, grad_fn=<NllLossBackward0>)
tensor(3.3943, grad_fn=<NllLossBackward0>)
tensor(3.3381, grad_fn=<NllLossBackward0>)
tensor(3.40

tensor(3.3209, grad_fn=<NllLossBackward0>)
tensor(3.3997, grad_fn=<NllLossBackward0>)
tensor(3.2618, grad_fn=<NllLossBackward0>)
tensor(3.3057, grad_fn=<NllLossBackward0>)
tensor(3.3764, grad_fn=<NllLossBackward0>)
tensor(3.2427, grad_fn=<NllLossBackward0>)
tensor(3.2657, grad_fn=<NllLossBackward0>)
tensor(3.3772, grad_fn=<NllLossBackward0>)
tensor(3.3248, grad_fn=<NllLossBackward0>)
tensor(3.3735, grad_fn=<NllLossBackward0>)
tensor(3.3241, grad_fn=<NllLossBackward0>)
tensor(3.4016, grad_fn=<NllLossBackward0>)
tensor(3.1536, grad_fn=<NllLossBackward0>)
tensor(3.4080, grad_fn=<NllLossBackward0>)
tensor(3.3760, grad_fn=<NllLossBackward0>)
tensor(3.3522, grad_fn=<NllLossBackward0>)
tensor(3.3580, grad_fn=<NllLossBackward0>)
tensor(3.3935, grad_fn=<NllLossBackward0>)
tensor(3.4353, grad_fn=<NllLossBackward0>)
tensor(3.3084, grad_fn=<NllLossBackward0>)
tensor(3.4161, grad_fn=<NllLossBackward0>)
tensor(3.2668, grad_fn=<NllLossBackward0>)
tensor(3.2400, grad_fn=<NllLossBackward0>)
tensor(3.37

tensor(3.1164, grad_fn=<NllLossBackward0>)
tensor(3.3732, grad_fn=<NllLossBackward0>)
tensor(3.2739, grad_fn=<NllLossBackward0>)
tensor(3.3456, grad_fn=<NllLossBackward0>)
tensor(3.2393, grad_fn=<NllLossBackward0>)
tensor(3.3232, grad_fn=<NllLossBackward0>)
tensor(3.3194, grad_fn=<NllLossBackward0>)
tensor(3.3226, grad_fn=<NllLossBackward0>)
tensor(3.2462, grad_fn=<NllLossBackward0>)
tensor(3.3141, grad_fn=<NllLossBackward0>)
tensor(3.3525, grad_fn=<NllLossBackward0>)
tensor(3.3837, grad_fn=<NllLossBackward0>)
tensor(3.3231, grad_fn=<NllLossBackward0>)
tensor(3.2175, grad_fn=<NllLossBackward0>)
tensor(3.3516, grad_fn=<NllLossBackward0>)
tensor(3.3106, grad_fn=<NllLossBackward0>)
tensor(3.1132, grad_fn=<NllLossBackward0>)
tensor(3.3541, grad_fn=<NllLossBackward0>)
tensor(3.3133, grad_fn=<NllLossBackward0>)
tensor(3.2561, grad_fn=<NllLossBackward0>)
tensor(3.3644, grad_fn=<NllLossBackward0>)
tensor(3.2716, grad_fn=<NllLossBackward0>)
tensor(3.1115, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(3.2190, grad_fn=<NllLossBackward0>)
tensor(3.4221, grad_fn=<NllLossBackward0>)
tensor(3.2193, grad_fn=<NllLossBackward0>)
tensor(3.2436, grad_fn=<NllLossBackward0>)
tensor(3.2830, grad_fn=<NllLossBackward0>)
tensor(3.4511, grad_fn=<NllLossBackward0>)
tensor(3.3999, grad_fn=<NllLossBackward0>)
tensor(3.3859, grad_fn=<NllLossBackward0>)
tensor(3.3970, grad_fn=<NllLossBackward0>)
tensor(3.3843, grad_fn=<NllLossBackward0>)
tensor(3.3918, grad_fn=<NllLossBackward0>)
tensor(3.2193, grad_fn=<NllLossBackward0>)
tensor(3.3025, grad_fn=<NllLossBackward0>)
tensor(3.3170, grad_fn=<NllLossBackward0>)
tensor(3.2169, grad_fn=<NllLossBackward0>)
tensor(3.4047, grad_fn=<NllLossBackward0>)
tensor(3.1620, grad_fn=<NllLossBackward0>)
tensor(3.3140, grad_fn=<NllLossBackward0>)
tensor(3.2430, grad_fn=<NllLossBackward0>)
tensor(3.2767, grad_fn=<NllLossBackward0>)
tensor(3.2172, grad_fn=<NllLossBackward0>)
tensor(3.3872, grad_fn=<NllLossBackward0>)
tensor(3.4096, grad_fn=<NllLossBackward0>)
tensor(3.41

tensor(3.2139, grad_fn=<NllLossBackward0>)
tensor(3.3716, grad_fn=<NllLossBackward0>)
tensor(3.3827, grad_fn=<NllLossBackward0>)
tensor(3.2910, grad_fn=<NllLossBackward0>)
tensor(3.3114, grad_fn=<NllLossBackward0>)
tensor(3.3343, grad_fn=<NllLossBackward0>)
tensor(3.2053, grad_fn=<NllLossBackward0>)
tensor(3.4184, grad_fn=<NllLossBackward0>)
tensor(3.4257, grad_fn=<NllLossBackward0>)
tensor(3.4102, grad_fn=<NllLossBackward0>)
tensor(3.2553, grad_fn=<NllLossBackward0>)
tensor(3.4164, grad_fn=<NllLossBackward0>)
tensor(3.2128, grad_fn=<NllLossBackward0>)
tensor(3.3770, grad_fn=<NllLossBackward0>)
tensor(3.2960, grad_fn=<NllLossBackward0>)
tensor(3.3657, grad_fn=<NllLossBackward0>)
tensor(3.3932, grad_fn=<NllLossBackward0>)
tensor(3.2446, grad_fn=<NllLossBackward0>)
tensor(3.2409, grad_fn=<NllLossBackward0>)
tensor(3.4170, grad_fn=<NllLossBackward0>)
tensor(3.4123, grad_fn=<NllLossBackward0>)
tensor(3.1948, grad_fn=<NllLossBackward0>)
tensor(3.4131, grad_fn=<NllLossBackward0>)
tensor(3.36

tensor(3.4247, grad_fn=<NllLossBackward0>)
tensor(3.2973, grad_fn=<NllLossBackward0>)
tensor(3.1865, grad_fn=<NllLossBackward0>)
tensor(3.0115, grad_fn=<NllLossBackward0>)
tensor(3.4303, grad_fn=<NllLossBackward0>)
tensor(3.4204, grad_fn=<NllLossBackward0>)
tensor(3.2330, grad_fn=<NllLossBackward0>)
tensor(3.3469, grad_fn=<NllLossBackward0>)
tensor(3.3006, grad_fn=<NllLossBackward0>)
tensor(3.3235, grad_fn=<NllLossBackward0>)
tensor(3.4190, grad_fn=<NllLossBackward0>)
tensor(3.4267, grad_fn=<NllLossBackward0>)
tensor(3.3724, grad_fn=<NllLossBackward0>)
tensor(3.0100, grad_fn=<NllLossBackward0>)
tensor(3.1956, grad_fn=<NllLossBackward0>)
tensor(3.1145, grad_fn=<NllLossBackward0>)
tensor(3.4274, grad_fn=<NllLossBackward0>)
tensor(3.3384, grad_fn=<NllLossBackward0>)
tensor(3.4143, grad_fn=<NllLossBackward0>)
tensor(3.0076, grad_fn=<NllLossBackward0>)
tensor(3.0063, grad_fn=<NllLossBackward0>)
tensor(3.4080, grad_fn=<NllLossBackward0>)
tensor(3.3332, grad_fn=<NllLossBackward0>)
tensor(3.22

tensor(3.3503, grad_fn=<NllLossBackward0>)
tensor(3.0853, grad_fn=<NllLossBackward0>)
tensor(3.3135, grad_fn=<NllLossBackward0>)
tensor(3.2753, grad_fn=<NllLossBackward0>)
tensor(3.3264, grad_fn=<NllLossBackward0>)
tensor(3.3183, grad_fn=<NllLossBackward0>)
tensor(3.1592, grad_fn=<NllLossBackward0>)
tensor(3.3403, grad_fn=<NllLossBackward0>)
tensor(3.2910, grad_fn=<NllLossBackward0>)
tensor(3.2137, grad_fn=<NllLossBackward0>)
tensor(3.2737, grad_fn=<NllLossBackward0>)
tensor(3.4834, grad_fn=<NllLossBackward0>)
tensor(3.2872, grad_fn=<NllLossBackward0>)
tensor(3.1940, grad_fn=<NllLossBackward0>)
tensor(3.2452, grad_fn=<NllLossBackward0>)
tensor(3.3520, grad_fn=<NllLossBackward0>)
tensor(3.3318, grad_fn=<NllLossBackward0>)
tensor(3.1813, grad_fn=<NllLossBackward0>)
tensor(3.3536, grad_fn=<NllLossBackward0>)
tensor(3.4201, grad_fn=<NllLossBackward0>)
tensor(3.4141, grad_fn=<NllLossBackward0>)
tensor(3.1761, grad_fn=<NllLossBackward0>)
tensor(3.4092, grad_fn=<NllLossBackward0>)
tensor(3.21

tensor(3.3604, grad_fn=<NllLossBackward0>)
tensor(3.1634, grad_fn=<NllLossBackward0>)
tensor(3.3674, grad_fn=<NllLossBackward0>)
tensor(3.3323, grad_fn=<NllLossBackward0>)
tensor(3.2155, grad_fn=<NllLossBackward0>)
tensor(3.4267, grad_fn=<NllLossBackward0>)
tensor(3.3443, grad_fn=<NllLossBackward0>)
tensor(3.3852, grad_fn=<NllLossBackward0>)
tensor(3.1429, grad_fn=<NllLossBackward0>)
tensor(3.0672, grad_fn=<NllLossBackward0>)
tensor(3.2842, grad_fn=<NllLossBackward0>)
tensor(3.4179, grad_fn=<NllLossBackward0>)
tensor(3.3527, grad_fn=<NllLossBackward0>)
tensor(3.3624, grad_fn=<NllLossBackward0>)
tensor(3.4313, grad_fn=<NllLossBackward0>)
tensor(3.2591, grad_fn=<NllLossBackward0>)
tensor(3.2715, grad_fn=<NllLossBackward0>)
tensor(3.2148, grad_fn=<NllLossBackward0>)
tensor(3.3487, grad_fn=<NllLossBackward0>)
tensor(3.4881, grad_fn=<NllLossBackward0>)
tensor(3.0580, grad_fn=<NllLossBackward0>)
tensor(3.1813, grad_fn=<NllLossBackward0>)
tensor(3.1443, grad_fn=<NllLossBackward0>)
tensor(3.14

tensor(3.4149, grad_fn=<NllLossBackward0>)
tensor(3.3426, grad_fn=<NllLossBackward0>)
tensor(3.1517, grad_fn=<NllLossBackward0>)
tensor(3.0444, grad_fn=<NllLossBackward0>)
tensor(3.2276, grad_fn=<NllLossBackward0>)
tensor(3.0431, grad_fn=<NllLossBackward0>)
tensor(3.1370, grad_fn=<NllLossBackward0>)
tensor(3.3277, grad_fn=<NllLossBackward0>)
tensor(3.1175, grad_fn=<NllLossBackward0>)
tensor(3.1292, grad_fn=<NllLossBackward0>)
tensor(3.2549, grad_fn=<NllLossBackward0>)
tensor(3.3406, grad_fn=<NllLossBackward0>)
tensor(3.1174, grad_fn=<NllLossBackward0>)
tensor(3.2170, grad_fn=<NllLossBackward0>)
tensor(3.1888, grad_fn=<NllLossBackward0>)
tensor(3.2656, grad_fn=<NllLossBackward0>)
tensor(3.4489, grad_fn=<NllLossBackward0>)
tensor(3.1713, grad_fn=<NllLossBackward0>)
tensor(3.4473, grad_fn=<NllLossBackward0>)
tensor(3.2787, grad_fn=<NllLossBackward0>)
tensor(3.1236, grad_fn=<NllLossBackward0>)
tensor(3.2666, grad_fn=<NllLossBackward0>)
tensor(3.0412, grad_fn=<NllLossBackward0>)
tensor(3.28

tensor(3.1774, grad_fn=<NllLossBackward0>)
tensor(3.4118, grad_fn=<NllLossBackward0>)
tensor(3.2069, grad_fn=<NllLossBackward0>)
tensor(3.2830, grad_fn=<NllLossBackward0>)
tensor(3.2324, grad_fn=<NllLossBackward0>)
tensor(3.1308, grad_fn=<NllLossBackward0>)
tensor(3.3090, grad_fn=<NllLossBackward0>)
tensor(3.3293, grad_fn=<NllLossBackward0>)
tensor(3.1770, grad_fn=<NllLossBackward0>)
tensor(3.0967, grad_fn=<NllLossBackward0>)
tensor(3.4665, grad_fn=<NllLossBackward0>)
tensor(3.3690, grad_fn=<NllLossBackward0>)
tensor(3.0979, grad_fn=<NllLossBackward0>)
tensor(3.2487, grad_fn=<NllLossBackward0>)
tensor(3.0952, grad_fn=<NllLossBackward0>)
tensor(3.0960, grad_fn=<NllLossBackward0>)
tensor(3.3503, grad_fn=<NllLossBackward0>)
tensor(3.2610, grad_fn=<NllLossBackward0>)
tensor(3.1070, grad_fn=<NllLossBackward0>)
tensor(3.1211, grad_fn=<NllLossBackward0>)
tensor(3.2471, grad_fn=<NllLossBackward0>)
tensor(3.3682, grad_fn=<NllLossBackward0>)
tensor(3.4265, grad_fn=<NllLossBackward0>)
tensor(3.34

tensor(3.1021, grad_fn=<NllLossBackward0>)
tensor(3.4231, grad_fn=<NllLossBackward0>)
tensor(2.9744, grad_fn=<NllLossBackward0>)
tensor(3.1012, grad_fn=<NllLossBackward0>)
tensor(3.4043, grad_fn=<NllLossBackward0>)
tensor(2.9734, grad_fn=<NllLossBackward0>)
tensor(3.3851, grad_fn=<NllLossBackward0>)
tensor(3.3565, grad_fn=<NllLossBackward0>)
tensor(3.3458, grad_fn=<NllLossBackward0>)
tensor(3.0994, grad_fn=<NllLossBackward0>)
tensor(3.3533, grad_fn=<NllLossBackward0>)
tensor(3.2333, grad_fn=<NllLossBackward0>)
tensor(3.2388, grad_fn=<NllLossBackward0>)
tensor(3.3784, grad_fn=<NllLossBackward0>)
tensor(3.0725, grad_fn=<NllLossBackward0>)
tensor(3.4227, grad_fn=<NllLossBackward0>)
tensor(2.9726, grad_fn=<NllLossBackward0>)
tensor(3.0920, grad_fn=<NllLossBackward0>)
tensor(3.3024, grad_fn=<NllLossBackward0>)
tensor(3.1755, grad_fn=<NllLossBackward0>)
tensor(2.9909, grad_fn=<NllLossBackward0>)
tensor(3.3800, grad_fn=<NllLossBackward0>)
tensor(2.8251, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(3.3139, grad_fn=<NllLossBackward0>)
tensor(3.3241, grad_fn=<NllLossBackward0>)
tensor(3.3406, grad_fn=<NllLossBackward0>)
tensor(3.3798, grad_fn=<NllLossBackward0>)
tensor(3.3560, grad_fn=<NllLossBackward0>)
tensor(3.2314, grad_fn=<NllLossBackward0>)
tensor(3.1455, grad_fn=<NllLossBackward0>)
tensor(3.2253, grad_fn=<NllLossBackward0>)
tensor(3.0672, grad_fn=<NllLossBackward0>)
tensor(3.3515, grad_fn=<NllLossBackward0>)
tensor(3.3555, grad_fn=<NllLossBackward0>)
tensor(3.2353, grad_fn=<NllLossBackward0>)
tensor(2.7946, grad_fn=<NllLossBackward0>)
tensor(3.0672, grad_fn=<NllLossBackward0>)
tensor(3.2007, grad_fn=<NllLossBackward0>)
tensor(3.2306, grad_fn=<NllLossBackward0>)
tensor(3.2563, grad_fn=<NllLossBackward0>)
tensor(3.1671, grad_fn=<NllLossBackward0>)
tensor(3.3103, grad_fn=<NllLossBackward0>)
tensor(3.3191, grad_fn=<NllLossBackward0>)
tensor(3.2933, grad_fn=<NllLossBackward0>)
tensor(3.0227, grad_fn=<NllLossBackward0>)
tensor(3.3142, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(3.4534, grad_fn=<NllLossBackward0>)
tensor(3.2767, grad_fn=<NllLossBackward0>)
tensor(3.1872, grad_fn=<NllLossBackward0>)
tensor(3.3573, grad_fn=<NllLossBackward0>)
tensor(3.0473, grad_fn=<NllLossBackward0>)
tensor(2.9980, grad_fn=<NllLossBackward0>)
tensor(3.3322, grad_fn=<NllLossBackward0>)
tensor(3.4569, grad_fn=<NllLossBackward0>)
tensor(3.3320, grad_fn=<NllLossBackward0>)
tensor(3.4367, grad_fn=<NllLossBackward0>)
tensor(3.3880, grad_fn=<NllLossBackward0>)
tensor(3.4365, grad_fn=<NllLossBackward0>)
tensor(2.7527, grad_fn=<NllLossBackward0>)
tensor(3.2416, grad_fn=<NllLossBackward0>)
tensor(3.3976, grad_fn=<NllLossBackward0>)
tensor(3.4087, grad_fn=<NllLossBackward0>)
tensor(3.2978, grad_fn=<NllLossBackward0>)
tensor(3.0695, grad_fn=<NllLossBackward0>)
tensor(3.4298, grad_fn=<NllLossBackward0>)
tensor(3.1051, grad_fn=<NllLossBackward0>)
tensor(3.2959, grad_fn=<NllLossBackward0>)
tensor(3.2821, grad_fn=<NllLossBackward0>)
tensor(3.1868, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(3.1540, grad_fn=<NllLossBackward0>)
tensor(3.4399, grad_fn=<NllLossBackward0>)
tensor(3.1770, grad_fn=<NllLossBackward0>)
tensor(3.1202, grad_fn=<NllLossBackward0>)
tensor(3.4524, grad_fn=<NllLossBackward0>)
tensor(3.3721, grad_fn=<NllLossBackward0>)
tensor(3.1032, grad_fn=<NllLossBackward0>)
tensor(3.2562, grad_fn=<NllLossBackward0>)
tensor(3.3679, grad_fn=<NllLossBackward0>)
tensor(2.9710, grad_fn=<NllLossBackward0>)
tensor(3.2166, grad_fn=<NllLossBackward0>)
tensor(3.1238, grad_fn=<NllLossBackward0>)
tensor(3.3001, grad_fn=<NllLossBackward0>)
tensor(3.2431, grad_fn=<NllLossBackward0>)
tensor(2.8941, grad_fn=<NllLossBackward0>)
tensor(3.2136, grad_fn=<NllLossBackward0>)
tensor(3.3385, grad_fn=<NllLossBackward0>)
tensor(2.9706, grad_fn=<NllLossBackward0>)
tensor(3.3400, grad_fn=<NllLossBackward0>)
tensor(3.2216, grad_fn=<NllLossBackward0>)
tensor(3.0348, grad_fn=<NllLossBackward0>)
tensor(3.0231, grad_fn=<NllLossBackward0>)
tensor(3.2365, grad_fn=<NllLossBackward0>)
tensor(3.10

tensor(3.2382, grad_fn=<NllLossBackward0>)
tensor(2.9478, grad_fn=<NllLossBackward0>)
tensor(3.4264, grad_fn=<NllLossBackward0>)
tensor(3.4529, grad_fn=<NllLossBackward0>)
tensor(3.3555, grad_fn=<NllLossBackward0>)
tensor(3.1442, grad_fn=<NllLossBackward0>)
tensor(3.0855, grad_fn=<NllLossBackward0>)
tensor(3.3133, grad_fn=<NllLossBackward0>)
tensor(3.5411, grad_fn=<NllLossBackward0>)
tensor(3.5152, grad_fn=<NllLossBackward0>)
tensor(3.2666, grad_fn=<NllLossBackward0>)
tensor(2.9990, grad_fn=<NllLossBackward0>)
tensor(3.1668, grad_fn=<NllLossBackward0>)
tensor(3.3702, grad_fn=<NllLossBackward0>)
tensor(3.3241, grad_fn=<NllLossBackward0>)
tensor(2.9465, grad_fn=<NllLossBackward0>)
tensor(3.3705, grad_fn=<NllLossBackward0>)
tensor(3.0337, grad_fn=<NllLossBackward0>)
tensor(3.2303, grad_fn=<NllLossBackward0>)
tensor(3.3687, grad_fn=<NllLossBackward0>)
tensor(3.2929, grad_fn=<NllLossBackward0>)
tensor(3.1864, grad_fn=<NllLossBackward0>)
tensor(2.6660, grad_fn=<NllLossBackward0>)
tensor(2.88

tensor(2.9995, grad_fn=<NllLossBackward0>)
tensor(3.2228, grad_fn=<NllLossBackward0>)
tensor(3.0761, grad_fn=<NllLossBackward0>)
tensor(3.4792, grad_fn=<NllLossBackward0>)
tensor(2.6283, grad_fn=<NllLossBackward0>)
tensor(3.0727, grad_fn=<NllLossBackward0>)
tensor(3.0537, grad_fn=<NllLossBackward0>)
tensor(2.9792, grad_fn=<NllLossBackward0>)
tensor(3.2019, grad_fn=<NllLossBackward0>)
tensor(3.3474, grad_fn=<NllLossBackward0>)
tensor(3.4636, grad_fn=<NllLossBackward0>)
tensor(3.3046, grad_fn=<NllLossBackward0>)
tensor(3.4405, grad_fn=<NllLossBackward0>)
tensor(3.3331, grad_fn=<NllLossBackward0>)
tensor(3.0737, grad_fn=<NllLossBackward0>)
tensor(3.2936, grad_fn=<NllLossBackward0>)
tensor(3.0135, grad_fn=<NllLossBackward0>)
tensor(3.2791, grad_fn=<NllLossBackward0>)
tensor(2.9972, grad_fn=<NllLossBackward0>)
tensor(3.1720, grad_fn=<NllLossBackward0>)
tensor(3.2901, grad_fn=<NllLossBackward0>)
tensor(3.3357, grad_fn=<NllLossBackward0>)
tensor(2.9211, grad_fn=<NllLossBackward0>)
tensor(3.26

tensor(2.9986, grad_fn=<NllLossBackward0>)
tensor(3.0637, grad_fn=<NllLossBackward0>)
tensor(2.8425, grad_fn=<NllLossBackward0>)
tensor(3.0204, grad_fn=<NllLossBackward0>)
tensor(3.2246, grad_fn=<NllLossBackward0>)
tensor(3.2294, grad_fn=<NllLossBackward0>)
tensor(3.4884, grad_fn=<NllLossBackward0>)
tensor(2.8071, grad_fn=<NllLossBackward0>)
tensor(3.3311, grad_fn=<NllLossBackward0>)
tensor(3.1593, grad_fn=<NllLossBackward0>)
tensor(3.4009, grad_fn=<NllLossBackward0>)
tensor(3.0579, grad_fn=<NllLossBackward0>)
tensor(2.9762, grad_fn=<NllLossBackward0>)
tensor(3.3707, grad_fn=<NllLossBackward0>)
tensor(3.2736, grad_fn=<NllLossBackward0>)
tensor(3.2858, grad_fn=<NllLossBackward0>)
tensor(3.1985, grad_fn=<NllLossBackward0>)
tensor(3.3300, grad_fn=<NllLossBackward0>)
tensor(2.5922, grad_fn=<NllLossBackward0>)
tensor(3.3172, grad_fn=<NllLossBackward0>)
tensor(2.9002, grad_fn=<NllLossBackward0>)
tensor(3.2528, grad_fn=<NllLossBackward0>)
tensor(3.3479, grad_fn=<NllLossBackward0>)
tensor(3.09

tensor(2.9652, grad_fn=<NllLossBackward0>)
tensor(2.9445, grad_fn=<NllLossBackward0>)
tensor(3.4621, grad_fn=<NllLossBackward0>)
tensor(3.0455, grad_fn=<NllLossBackward0>)
tensor(3.1866, grad_fn=<NllLossBackward0>)
tensor(3.3473, grad_fn=<NllLossBackward0>)
tensor(3.3404, grad_fn=<NllLossBackward0>)
tensor(3.5627, grad_fn=<NllLossBackward0>)
tensor(2.5620, grad_fn=<NllLossBackward0>)
tensor(2.7794, grad_fn=<NllLossBackward0>)
tensor(3.3258, grad_fn=<NllLossBackward0>)
tensor(3.3646, grad_fn=<NllLossBackward0>)
tensor(2.5600, grad_fn=<NllLossBackward0>)
tensor(3.0363, grad_fn=<NllLossBackward0>)
tensor(2.5585, grad_fn=<NllLossBackward0>)
tensor(3.3126, grad_fn=<NllLossBackward0>)
tensor(3.1363, grad_fn=<NllLossBackward0>)
tensor(3.4125, grad_fn=<NllLossBackward0>)
tensor(3.2830, grad_fn=<NllLossBackward0>)
tensor(2.7778, grad_fn=<NllLossBackward0>)
tensor(3.4425, grad_fn=<NllLossBackward0>)
tensor(3.3259, grad_fn=<NllLossBackward0>)
tensor(2.8791, grad_fn=<NllLossBackward0>)
tensor(3.46

tensor(3.2964, grad_fn=<NllLossBackward0>)
tensor(3.2432, grad_fn=<NllLossBackward0>)
tensor(3.4871, grad_fn=<NllLossBackward0>)
tensor(3.1730, grad_fn=<NllLossBackward0>)
tensor(2.9109, grad_fn=<NllLossBackward0>)
tensor(3.3683, grad_fn=<NllLossBackward0>)
tensor(2.5208, grad_fn=<NllLossBackward0>)
tensor(2.7514, grad_fn=<NllLossBackward0>)
tensor(3.4204, grad_fn=<NllLossBackward0>)
tensor(3.3260, grad_fn=<NllLossBackward0>)
tensor(2.7511, grad_fn=<NllLossBackward0>)
tensor(3.5511, grad_fn=<NllLossBackward0>)
tensor(2.9460, grad_fn=<NllLossBackward0>)
tensor(2.9588, grad_fn=<NllLossBackward0>)
tensor(3.3098, grad_fn=<NllLossBackward0>)
tensor(3.2441, grad_fn=<NllLossBackward0>)
tensor(3.4034, grad_fn=<NllLossBackward0>)
tensor(3.4402, grad_fn=<NllLossBackward0>)
tensor(2.9378, grad_fn=<NllLossBackward0>)
tensor(3.1298, grad_fn=<NllLossBackward0>)
tensor(2.9380, grad_fn=<NllLossBackward0>)
tensor(3.0660, grad_fn=<NllLossBackward0>)
tensor(3.1912, grad_fn=<NllLossBackward0>)
tensor(3.25

tensor(3.4658, grad_fn=<NllLossBackward0>)
tensor(2.9067, grad_fn=<NllLossBackward0>)
tensor(3.2576, grad_fn=<NllLossBackward0>)
tensor(2.9444, grad_fn=<NllLossBackward0>)
tensor(2.8915, grad_fn=<NllLossBackward0>)
tensor(3.3404, grad_fn=<NllLossBackward0>)
tensor(3.2810, grad_fn=<NllLossBackward0>)
tensor(3.1405, grad_fn=<NllLossBackward0>)
tensor(3.2122, grad_fn=<NllLossBackward0>)
tensor(2.8986, grad_fn=<NllLossBackward0>)
tensor(3.5556, grad_fn=<NllLossBackward0>)
tensor(2.9430, grad_fn=<NllLossBackward0>)
tensor(3.4048, grad_fn=<NllLossBackward0>)
tensor(3.3524, grad_fn=<NllLossBackward0>)
tensor(3.1268, grad_fn=<NllLossBackward0>)
tensor(2.7227, grad_fn=<NllLossBackward0>)
tensor(3.4481, grad_fn=<NllLossBackward0>)
tensor(3.0038, grad_fn=<NllLossBackward0>)
tensor(2.4835, grad_fn=<NllLossBackward0>)
tensor(3.1701, grad_fn=<NllLossBackward0>)
tensor(3.2507, grad_fn=<NllLossBackward0>)
tensor(2.9035, grad_fn=<NllLossBackward0>)
tensor(3.2488, grad_fn=<NllLossBackward0>)
tensor(3.30

tensor(2.9154, grad_fn=<NllLossBackward0>)
tensor(3.3994, grad_fn=<NllLossBackward0>)
tensor(2.8889, grad_fn=<NllLossBackward0>)
tensor(3.4354, grad_fn=<NllLossBackward0>)
tensor(3.3123, grad_fn=<NllLossBackward0>)
tensor(3.3128, grad_fn=<NllLossBackward0>)
tensor(3.2333, grad_fn=<NllLossBackward0>)
tensor(2.8825, grad_fn=<NllLossBackward0>)
tensor(3.2685, grad_fn=<NllLossBackward0>)
tensor(3.1210, grad_fn=<NllLossBackward0>)
tensor(2.8733, grad_fn=<NllLossBackward0>)
tensor(3.3074, grad_fn=<NllLossBackward0>)
tensor(2.9844, grad_fn=<NllLossBackward0>)
tensor(3.3637, grad_fn=<NllLossBackward0>)
tensor(3.3562, grad_fn=<NllLossBackward0>)
tensor(3.3707, grad_fn=<NllLossBackward0>)
tensor(3.4206, grad_fn=<NllLossBackward0>)
tensor(3.2139, grad_fn=<NllLossBackward0>)
tensor(3.1200, grad_fn=<NllLossBackward0>)
tensor(2.4547, grad_fn=<NllLossBackward0>)
tensor(2.8721, grad_fn=<NllLossBackward0>)
tensor(2.9077, grad_fn=<NllLossBackward0>)
tensor(3.2371, grad_fn=<NllLossBackward0>)
tensor(2.69

tensor(3.3699, grad_fn=<NllLossBackward0>)
tensor(2.4159, grad_fn=<NllLossBackward0>)
tensor(3.0432, grad_fn=<NllLossBackward0>)
tensor(3.1543, grad_fn=<NllLossBackward0>)
tensor(3.1439, grad_fn=<NllLossBackward0>)
tensor(3.2240, grad_fn=<NllLossBackward0>)
tensor(2.8560, grad_fn=<NllLossBackward0>)
tensor(3.3693, grad_fn=<NllLossBackward0>)
tensor(2.9225, grad_fn=<NllLossBackward0>)
tensor(3.2446, grad_fn=<NllLossBackward0>)
tensor(2.7149, grad_fn=<NllLossBackward0>)
tensor(3.2903, grad_fn=<NllLossBackward0>)
tensor(3.3232, grad_fn=<NllLossBackward0>)
tensor(3.2115, grad_fn=<NllLossBackward0>)
tensor(2.7888, grad_fn=<NllLossBackward0>)
tensor(3.1501, grad_fn=<NllLossBackward0>)
tensor(3.3683, grad_fn=<NllLossBackward0>)
tensor(3.3720, grad_fn=<NllLossBackward0>)
tensor(3.2957, grad_fn=<NllLossBackward0>)
tensor(2.8524, grad_fn=<NllLossBackward0>)
tensor(3.3039, grad_fn=<NllLossBackward0>)
tensor(2.4134, grad_fn=<NllLossBackward0>)
tensor(3.5036, grad_fn=<NllLossBackward0>)
tensor(3.08

tensor(3.0112, grad_fn=<NllLossBackward0>)
tensor(3.3719, grad_fn=<NllLossBackward0>)
tensor(2.6402, grad_fn=<NllLossBackward0>)
tensor(2.6398, grad_fn=<NllLossBackward0>)
tensor(3.0955, grad_fn=<NllLossBackward0>)
tensor(3.1375, grad_fn=<NllLossBackward0>)
tensor(3.2892, grad_fn=<NllLossBackward0>)
tensor(3.4802, grad_fn=<NllLossBackward0>)
tensor(3.6698, grad_fn=<NllLossBackward0>)
tensor(3.1600, grad_fn=<NllLossBackward0>)
tensor(3.1850, grad_fn=<NllLossBackward0>)
tensor(2.7692, grad_fn=<NllLossBackward0>)
tensor(3.2249, grad_fn=<NllLossBackward0>)
tensor(3.1833, grad_fn=<NllLossBackward0>)
tensor(3.2825, grad_fn=<NllLossBackward0>)
tensor(3.4371, grad_fn=<NllLossBackward0>)
tensor(3.4604, grad_fn=<NllLossBackward0>)
tensor(2.3800, grad_fn=<NllLossBackward0>)
tensor(3.5811, grad_fn=<NllLossBackward0>)
tensor(3.4279, grad_fn=<NllLossBackward0>)
tensor(3.0755, grad_fn=<NllLossBackward0>)
tensor(3.4803, grad_fn=<NllLossBackward0>)
tensor(2.8417, grad_fn=<NllLossBackward0>)
tensor(3.36

tensor(3.4320, grad_fn=<NllLossBackward0>)
tensor(3.1355, grad_fn=<NllLossBackward0>)
tensor(3.1295, grad_fn=<NllLossBackward0>)
tensor(2.7454, grad_fn=<NllLossBackward0>)
tensor(2.8075, grad_fn=<NllLossBackward0>)
tensor(2.8137, grad_fn=<NllLossBackward0>)
tensor(3.1355, grad_fn=<NllLossBackward0>)
tensor(3.0821, grad_fn=<NllLossBackward0>)
tensor(2.8135, grad_fn=<NllLossBackward0>)
tensor(3.0197, grad_fn=<NllLossBackward0>)
tensor(2.9645, grad_fn=<NllLossBackward0>)
tensor(3.2803, grad_fn=<NllLossBackward0>)
tensor(3.2383, grad_fn=<NllLossBackward0>)
tensor(2.8859, grad_fn=<NllLossBackward0>)
tensor(3.4001, grad_fn=<NllLossBackward0>)
tensor(2.3373, grad_fn=<NllLossBackward0>)
tensor(3.4711, grad_fn=<NllLossBackward0>)
tensor(3.1732, grad_fn=<NllLossBackward0>)
tensor(2.9278, grad_fn=<NllLossBackward0>)
tensor(2.6088, grad_fn=<NllLossBackward0>)
tensor(3.1272, grad_fn=<NllLossBackward0>)
tensor(3.2381, grad_fn=<NllLossBackward0>)
tensor(3.4449, grad_fn=<NllLossBackward0>)
tensor(3.12

tensor(2.9904, grad_fn=<NllLossBackward0>)
tensor(2.9562, grad_fn=<NllLossBackward0>)
tensor(3.4311, grad_fn=<NllLossBackward0>)
tensor(3.4644, grad_fn=<NllLossBackward0>)
tensor(3.2730, grad_fn=<NllLossBackward0>)
tensor(3.2685, grad_fn=<NllLossBackward0>)
tensor(3.3178, grad_fn=<NllLossBackward0>)
tensor(3.1292, grad_fn=<NllLossBackward0>)
tensor(3.2320, grad_fn=<NllLossBackward0>)
tensor(3.2306, grad_fn=<NllLossBackward0>)
tensor(3.1592, grad_fn=<NllLossBackward0>)
tensor(3.3003, grad_fn=<NllLossBackward0>)
tensor(3.2271, grad_fn=<NllLossBackward0>)
tensor(2.6312, grad_fn=<NllLossBackward0>)
tensor(3.2994, grad_fn=<NllLossBackward0>)
tensor(3.0473, grad_fn=<NllLossBackward0>)
tensor(2.5894, grad_fn=<NllLossBackward0>)
tensor(3.2645, grad_fn=<NllLossBackward0>)
tensor(3.5580, grad_fn=<NllLossBackward0>)
tensor(2.5860, grad_fn=<NllLossBackward0>)
tensor(3.1270, grad_fn=<NllLossBackward0>)
tensor(2.8705, grad_fn=<NllLossBackward0>)
tensor(3.1680, grad_fn=<NllLossBackward0>)
tensor(3.29

tensor(2.9423, grad_fn=<NllLossBackward0>)
tensor(3.4897, grad_fn=<NllLossBackward0>)
tensor(3.3227, grad_fn=<NllLossBackward0>)
tensor(3.3068, grad_fn=<NllLossBackward0>)
tensor(2.7074, grad_fn=<NllLossBackward0>)
tensor(3.4509, grad_fn=<NllLossBackward0>)
tensor(3.2275, grad_fn=<NllLossBackward0>)
tensor(3.2705, grad_fn=<NllLossBackward0>)
tensor(3.2312, grad_fn=<NllLossBackward0>)
tensor(3.3484, grad_fn=<NllLossBackward0>)
tensor(2.7811, grad_fn=<NllLossBackward0>)
tensor(3.1538, grad_fn=<NllLossBackward0>)
tensor(2.7809, grad_fn=<NllLossBackward0>)
tensor(3.3726, grad_fn=<NllLossBackward0>)
tensor(2.7704, grad_fn=<NllLossBackward0>)
tensor(3.3467, grad_fn=<NllLossBackward0>)
tensor(3.2505, grad_fn=<NllLossBackward0>)
tensor(3.4725, grad_fn=<NllLossBackward0>)
tensor(3.4493, grad_fn=<NllLossBackward0>)
tensor(3.1171, grad_fn=<NllLossBackward0>)
tensor(2.9176, grad_fn=<NllLossBackward0>)
tensor(3.2754, grad_fn=<NllLossBackward0>)
tensor(2.8349, grad_fn=<NllLossBackward0>)
tensor(3.11

tensor(2.2419, grad_fn=<NllLossBackward0>)
tensor(3.4398, grad_fn=<NllLossBackward0>)
tensor(3.1108, grad_fn=<NllLossBackward0>)
tensor(2.8198, grad_fn=<NllLossBackward0>)
tensor(3.4674, grad_fn=<NllLossBackward0>)
tensor(3.5299, grad_fn=<NllLossBackward0>)
tensor(3.1067, grad_fn=<NllLossBackward0>)
tensor(3.1533, grad_fn=<NllLossBackward0>)
tensor(3.2091, grad_fn=<NllLossBackward0>)
tensor(3.2214, grad_fn=<NllLossBackward0>)
tensor(2.7507, grad_fn=<NllLossBackward0>)
tensor(3.3770, grad_fn=<NllLossBackward0>)
tensor(3.2908, grad_fn=<NllLossBackward0>)
tensor(2.8196, grad_fn=<NllLossBackward0>)
tensor(2.5335, grad_fn=<NllLossBackward0>)
tensor(3.0913, grad_fn=<NllLossBackward0>)
tensor(3.4391, grad_fn=<NllLossBackward0>)
tensor(3.2587, grad_fn=<NllLossBackward0>)
tensor(2.6861, grad_fn=<NllLossBackward0>)
tensor(3.0605, grad_fn=<NllLossBackward0>)
tensor(3.1535, grad_fn=<NllLossBackward0>)
tensor(3.4211, grad_fn=<NllLossBackward0>)
tensor(2.5312, grad_fn=<NllLossBackward0>)
tensor(3.27

tensor(3.2720, grad_fn=<NllLossBackward0>)
tensor(3.6040, grad_fn=<NllLossBackward0>)
tensor(2.5084, grad_fn=<NllLossBackward0>)
tensor(2.8642, grad_fn=<NllLossBackward0>)
tensor(2.8645, grad_fn=<NllLossBackward0>)
tensor(2.8640, grad_fn=<NllLossBackward0>)
tensor(3.4185, grad_fn=<NllLossBackward0>)
tensor(3.1324, grad_fn=<NllLossBackward0>)
tensor(3.3044, grad_fn=<NllLossBackward0>)
tensor(2.9681, grad_fn=<NllLossBackward0>)
tensor(2.6612, grad_fn=<NllLossBackward0>)
tensor(2.9134, grad_fn=<NllLossBackward0>)
tensor(2.7983, grad_fn=<NllLossBackward0>)
tensor(3.3869, grad_fn=<NllLossBackward0>)
tensor(3.4573, grad_fn=<NllLossBackward0>)
tensor(3.3548, grad_fn=<NllLossBackward0>)
tensor(3.6671, grad_fn=<NllLossBackward0>)
tensor(3.4864, grad_fn=<NllLossBackward0>)
tensor(3.1797, grad_fn=<NllLossBackward0>)
tensor(2.9687, grad_fn=<NllLossBackward0>)
tensor(2.7240, grad_fn=<NllLossBackward0>)
tensor(3.5135, grad_fn=<NllLossBackward0>)
tensor(3.2179, grad_fn=<NllLossBackward0>)
tensor(2.50

tensor(3.3752, grad_fn=<NllLossBackward0>)
tensor(3.0055, grad_fn=<NllLossBackward0>)
tensor(2.7272, grad_fn=<NllLossBackward0>)
tensor(2.8965, grad_fn=<NllLossBackward0>)
tensor(3.0228, grad_fn=<NllLossBackward0>)
tensor(3.5111, grad_fn=<NllLossBackward0>)
tensor(2.9521, grad_fn=<NllLossBackward0>)
tensor(2.5340, grad_fn=<NllLossBackward0>)
tensor(2.9511, grad_fn=<NllLossBackward0>)
tensor(3.0414, grad_fn=<NllLossBackward0>)
tensor(2.8996, grad_fn=<NllLossBackward0>)
tensor(3.2508, grad_fn=<NllLossBackward0>)
tensor(3.0193, grad_fn=<NllLossBackward0>)
tensor(2.5326, grad_fn=<NllLossBackward0>)
tensor(2.7427, grad_fn=<NllLossBackward0>)
tensor(3.6766, grad_fn=<NllLossBackward0>)
tensor(3.2091, grad_fn=<NllLossBackward0>)
tensor(3.1104, grad_fn=<NllLossBackward0>)
tensor(2.7255, grad_fn=<NllLossBackward0>)
tensor(2.7068, grad_fn=<NllLossBackward0>)
tensor(2.5312, grad_fn=<NllLossBackward0>)
tensor(3.1987, grad_fn=<NllLossBackward0>)
tensor(3.4234, grad_fn=<NllLossBackward0>)
tensor(2.79

tensor(3.0299, grad_fn=<NllLossBackward0>)
tensor(2.8804, grad_fn=<NllLossBackward0>)
tensor(3.0805, grad_fn=<NllLossBackward0>)
tensor(2.9003, grad_fn=<NllLossBackward0>)
tensor(3.3462, grad_fn=<NllLossBackward0>)
tensor(3.2578, grad_fn=<NllLossBackward0>)
tensor(2.9411, grad_fn=<NllLossBackward0>)
tensor(2.6250, grad_fn=<NllLossBackward0>)
tensor(3.4435, grad_fn=<NllLossBackward0>)
tensor(3.2559, grad_fn=<NllLossBackward0>)
tensor(3.3786, grad_fn=<NllLossBackward0>)
tensor(2.8764, grad_fn=<NllLossBackward0>)
tensor(3.4586, grad_fn=<NllLossBackward0>)
tensor(3.2870, grad_fn=<NllLossBackward0>)
tensor(3.2120, grad_fn=<NllLossBackward0>)
tensor(2.8249, grad_fn=<NllLossBackward0>)
tensor(3.2594, grad_fn=<NllLossBackward0>)
tensor(2.7622, grad_fn=<NllLossBackward0>)
tensor(2.6280, grad_fn=<NllLossBackward0>)
tensor(2.5105, grad_fn=<NllLossBackward0>)
tensor(3.3754, grad_fn=<NllLossBackward0>)
tensor(2.5094, grad_fn=<NllLossBackward0>)
tensor(2.5088, grad_fn=<NllLossBackward0>)
tensor(3.48

tensor(3.5069, grad_fn=<NllLossBackward0>)
tensor(2.7647, grad_fn=<NllLossBackward0>)
tensor(3.1863, grad_fn=<NllLossBackward0>)
tensor(2.7663, grad_fn=<NllLossBackward0>)
tensor(2.8901, grad_fn=<NllLossBackward0>)
tensor(2.8096, grad_fn=<NllLossBackward0>)
tensor(3.2342, grad_fn=<NllLossBackward0>)
tensor(3.1231, grad_fn=<NllLossBackward0>)
tensor(3.5085, grad_fn=<NllLossBackward0>)
tensor(3.1302, grad_fn=<NllLossBackward0>)
tensor(3.3281, grad_fn=<NllLossBackward0>)
tensor(3.4198, grad_fn=<NllLossBackward0>)
tensor(3.3528, grad_fn=<NllLossBackward0>)
tensor(2.8098, grad_fn=<NllLossBackward0>)
tensor(2.7635, grad_fn=<NllLossBackward0>)
tensor(2.7616, grad_fn=<NllLossBackward0>)
tensor(2.9304, grad_fn=<NllLossBackward0>)
tensor(3.0670, grad_fn=<NllLossBackward0>)
tensor(2.7968, grad_fn=<NllLossBackward0>)
tensor(3.0694, grad_fn=<NllLossBackward0>)
tensor(2.7454, grad_fn=<NllLossBackward0>)
tensor(3.2417, grad_fn=<NllLossBackward0>)
tensor(3.3844, grad_fn=<NllLossBackward0>)
tensor(2.12

tensor(3.3560, grad_fn=<NllLossBackward0>)
tensor(2.7946, grad_fn=<NllLossBackward0>)
tensor(3.2319, grad_fn=<NllLossBackward0>)
tensor(3.1141, grad_fn=<NllLossBackward0>)
tensor(2.7297, grad_fn=<NllLossBackward0>)
tensor(3.1228, grad_fn=<NllLossBackward0>)
tensor(2.7450, grad_fn=<NllLossBackward0>)
tensor(2.6585, grad_fn=<NllLossBackward0>)
tensor(3.3461, grad_fn=<NllLossBackward0>)
tensor(2.8775, grad_fn=<NllLossBackward0>)
tensor(3.5585, grad_fn=<NllLossBackward0>)
tensor(2.8807, grad_fn=<NllLossBackward0>)
tensor(2.9435, grad_fn=<NllLossBackward0>)
tensor(2.9196, grad_fn=<NllLossBackward0>)
tensor(3.0603, grad_fn=<NllLossBackward0>)
tensor(2.9314, grad_fn=<NllLossBackward0>)
tensor(3.0190, grad_fn=<NllLossBackward0>)
tensor(3.5333, grad_fn=<NllLossBackward0>)
tensor(3.6759, grad_fn=<NllLossBackward0>)
tensor(2.4186, grad_fn=<NllLossBackward0>)
tensor(2.9129, grad_fn=<NllLossBackward0>)
tensor(3.4954, grad_fn=<NllLossBackward0>)
tensor(2.4164, grad_fn=<NllLossBackward0>)
tensor(3.03

tensor(2.3985, grad_fn=<NllLossBackward0>)
tensor(3.1778, grad_fn=<NllLossBackward0>)
tensor(3.3182, grad_fn=<NllLossBackward0>)
tensor(3.3239, grad_fn=<NllLossBackward0>)
tensor(3.3226, grad_fn=<NllLossBackward0>)
tensor(3.2484, grad_fn=<NllLossBackward0>)
tensor(3.4483, grad_fn=<NllLossBackward0>)
tensor(3.0564, grad_fn=<NllLossBackward0>)
tensor(3.6666, grad_fn=<NllLossBackward0>)
tensor(3.3229, grad_fn=<NllLossBackward0>)
tensor(3.2065, grad_fn=<NllLossBackward0>)
tensor(3.1071, grad_fn=<NllLossBackward0>)
tensor(3.3220, grad_fn=<NllLossBackward0>)
tensor(2.9602, grad_fn=<NllLossBackward0>)
tensor(2.0629, grad_fn=<NllLossBackward0>)
tensor(3.4715, grad_fn=<NllLossBackward0>)
tensor(2.0612, grad_fn=<NllLossBackward0>)
tensor(3.0514, grad_fn=<NllLossBackward0>)
tensor(3.0765, grad_fn=<NllLossBackward0>)
tensor(3.6193, grad_fn=<NllLossBackward0>)
tensor(2.5720, grad_fn=<NllLossBackward0>)
tensor(3.4575, grad_fn=<NllLossBackward0>)
tensor(2.3968, grad_fn=<NllLossBackward0>)
tensor(2.72

tensor(3.0973, grad_fn=<NllLossBackward0>)
tensor(3.6578, grad_fn=<NllLossBackward0>)
tensor(3.0484, grad_fn=<NllLossBackward0>)
tensor(2.7045, grad_fn=<NllLossBackward0>)
tensor(3.2626, grad_fn=<NllLossBackward0>)
tensor(3.3630, grad_fn=<NllLossBackward0>)
tensor(3.1534, grad_fn=<NllLossBackward0>)
tensor(3.5352, grad_fn=<NllLossBackward0>)
tensor(3.6024, grad_fn=<NllLossBackward0>)
tensor(3.0473, grad_fn=<NllLossBackward0>)
tensor(3.2505, grad_fn=<NllLossBackward0>)
tensor(2.7583, grad_fn=<NllLossBackward0>)
tensor(3.1529, grad_fn=<NllLossBackward0>)
tensor(2.6798, grad_fn=<NllLossBackward0>)
tensor(2.3777, grad_fn=<NllLossBackward0>)
tensor(3.3323, grad_fn=<NllLossBackward0>)
tensor(3.5166, grad_fn=<NllLossBackward0>)
tensor(3.2039, grad_fn=<NllLossBackward0>)
tensor(2.3775, grad_fn=<NllLossBackward0>)
tensor(2.7171, grad_fn=<NllLossBackward0>)
tensor(2.6347, grad_fn=<NllLossBackward0>)
tensor(2.6624, grad_fn=<NllLossBackward0>)
tensor(2.9055, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(3.0458, grad_fn=<NllLossBackward0>)
tensor(3.7484, grad_fn=<NllLossBackward0>)
tensor(3.3831, grad_fn=<NllLossBackward0>)
tensor(3.3799, grad_fn=<NllLossBackward0>)
tensor(2.9548, grad_fn=<NllLossBackward0>)
tensor(2.9127, grad_fn=<NllLossBackward0>)
tensor(3.3995, grad_fn=<NllLossBackward0>)
tensor(2.6970, grad_fn=<NllLossBackward0>)
tensor(2.3565, grad_fn=<NllLossBackward0>)
tensor(2.6488, grad_fn=<NllLossBackward0>)
tensor(2.5391, grad_fn=<NllLossBackward0>)
tensor(3.4840, grad_fn=<NllLossBackward0>)
tensor(2.7050, grad_fn=<NllLossBackward0>)
tensor(2.6155, grad_fn=<NllLossBackward0>)
tensor(2.7059, grad_fn=<NllLossBackward0>)
tensor(2.8753, grad_fn=<NllLossBackward0>)
tensor(2.5351, grad_fn=<NllLossBackward0>)
tensor(2.6943, grad_fn=<NllLossBackward0>)
tensor(2.3540, grad_fn=<NllLossBackward0>)
tensor(2.5341, grad_fn=<NllLossBackward0>)
tensor(3.0936, grad_fn=<NllLossBackward0>)
tensor(2.6952, grad_fn=<NllLossBackward0>)
tensor(2.3517, grad_fn=<NllLossBackward0>)
tensor(3.05

tensor(3.6269, grad_fn=<NllLossBackward0>)
tensor(2.3284, grad_fn=<NllLossBackward0>)
tensor(3.2051, grad_fn=<NllLossBackward0>)
tensor(2.6520, grad_fn=<NllLossBackward0>)
tensor(2.6848, grad_fn=<NllLossBackward0>)
tensor(3.4930, grad_fn=<NllLossBackward0>)
tensor(3.6247, grad_fn=<NllLossBackward0>)
tensor(3.3133, grad_fn=<NllLossBackward0>)
tensor(2.8455, grad_fn=<NllLossBackward0>)
tensor(3.0382, grad_fn=<NllLossBackward0>)
tensor(3.0049, grad_fn=<NllLossBackward0>)
tensor(3.0833, grad_fn=<NllLossBackward0>)
tensor(2.6331, grad_fn=<NllLossBackward0>)
tensor(2.6340, grad_fn=<NllLossBackward0>)
tensor(2.9240, grad_fn=<NllLossBackward0>)
tensor(3.3870, grad_fn=<NllLossBackward0>)
tensor(3.3611, grad_fn=<NllLossBackward0>)
tensor(2.3849, grad_fn=<NllLossBackward0>)
tensor(2.3836, grad_fn=<NllLossBackward0>)
tensor(3.1920, grad_fn=<NllLossBackward0>)
tensor(2.6326, grad_fn=<NllLossBackward0>)
tensor(3.0347, grad_fn=<NllLossBackward0>)
tensor(2.3828, grad_fn=<NllLossBackward0>)
tensor(3.27

tensor(2.7919, grad_fn=<NllLossBackward0>)
tensor(3.5977, grad_fn=<NllLossBackward0>)
tensor(3.2660, grad_fn=<NllLossBackward0>)
tensor(3.2986, grad_fn=<NllLossBackward0>)
tensor(2.3144, grad_fn=<NllLossBackward0>)
tensor(2.8074, grad_fn=<NllLossBackward0>)
tensor(3.5952, grad_fn=<NllLossBackward0>)
tensor(3.5710, grad_fn=<NllLossBackward0>)
tensor(2.9106, grad_fn=<NllLossBackward0>)
tensor(3.2628, grad_fn=<NllLossBackward0>)
tensor(3.3513, grad_fn=<NllLossBackward0>)
tensor(3.5999, grad_fn=<NllLossBackward0>)
tensor(3.4365, grad_fn=<NllLossBackward0>)
tensor(2.7199, grad_fn=<NllLossBackward0>)
tensor(3.3491, grad_fn=<NllLossBackward0>)
tensor(3.2649, grad_fn=<NllLossBackward0>)
tensor(3.0375, grad_fn=<NllLossBackward0>)
tensor(2.8546, grad_fn=<NllLossBackward0>)
tensor(3.2451, grad_fn=<NllLossBackward0>)
tensor(2.9749, grad_fn=<NllLossBackward0>)
tensor(2.6141, grad_fn=<NllLossBackward0>)
tensor(3.3295, grad_fn=<NllLossBackward0>)
tensor(2.9100, grad_fn=<NllLossBackward0>)
tensor(3.44

tensor(3.3093, grad_fn=<NllLossBackward0>)
tensor(2.7125, grad_fn=<NllLossBackward0>)
tensor(3.2955, grad_fn=<NllLossBackward0>)
tensor(3.7772, grad_fn=<NllLossBackward0>)
tensor(2.2958, grad_fn=<NllLossBackward0>)
tensor(2.4876, grad_fn=<NllLossBackward0>)
tensor(3.5146, grad_fn=<NllLossBackward0>)
tensor(2.6652, grad_fn=<NllLossBackward0>)
tensor(3.5497, grad_fn=<NllLossBackward0>)
tensor(3.5505, grad_fn=<NllLossBackward0>)
tensor(2.7099, grad_fn=<NllLossBackward0>)
tensor(3.6807, grad_fn=<NllLossBackward0>)
tensor(2.8999, grad_fn=<NllLossBackward0>)
tensor(3.2828, grad_fn=<NllLossBackward0>)
tensor(3.1716, grad_fn=<NllLossBackward0>)
tensor(3.5541, grad_fn=<NllLossBackward0>)
tensor(3.3631, grad_fn=<NllLossBackward0>)
tensor(3.3898, grad_fn=<NllLossBackward0>)
tensor(3.2583, grad_fn=<NllLossBackward0>)
tensor(2.6119, grad_fn=<NllLossBackward0>)
tensor(2.6193, grad_fn=<NllLossBackward0>)
tensor(3.2519, grad_fn=<NllLossBackward0>)
tensor(2.4866, grad_fn=<NllLossBackward0>)
tensor(2.83

tensor(2.5676, grad_fn=<NllLossBackward0>)
tensor(3.2136, grad_fn=<NllLossBackward0>)
tensor(2.8419, grad_fn=<NllLossBackward0>)
tensor(3.1540, grad_fn=<NllLossBackward0>)
tensor(3.4654, grad_fn=<NllLossBackward0>)
tensor(1.9273, grad_fn=<NllLossBackward0>)
tensor(2.4686, grad_fn=<NllLossBackward0>)
tensor(3.3833, grad_fn=<NllLossBackward0>)
tensor(2.9540, grad_fn=<NllLossBackward0>)
tensor(3.5943, grad_fn=<NllLossBackward0>)
tensor(2.9786, grad_fn=<NllLossBackward0>)
tensor(3.0290, grad_fn=<NllLossBackward0>)
tensor(3.3631, grad_fn=<NllLossBackward0>)
tensor(3.5632, grad_fn=<NllLossBackward0>)
tensor(2.9118, grad_fn=<NllLossBackward0>)
tensor(2.4677, grad_fn=<NllLossBackward0>)
tensor(3.2673, grad_fn=<NllLossBackward0>)
tensor(3.4809, grad_fn=<NllLossBackward0>)
tensor(3.3612, grad_fn=<NllLossBackward0>)
tensor(2.6542, grad_fn=<NllLossBackward0>)
tensor(2.5613, grad_fn=<NllLossBackward0>)
tensor(3.4541, grad_fn=<NllLossBackward0>)
tensor(2.6932, grad_fn=<NllLossBackward0>)
tensor(2.66

tensor(3.1648, grad_fn=<NllLossBackward0>)
tensor(3.1342, grad_fn=<NllLossBackward0>)
tensor(3.3968, grad_fn=<NllLossBackward0>)
tensor(3.1816, grad_fn=<NllLossBackward0>)
tensor(3.1803, grad_fn=<NllLossBackward0>)
tensor(2.2575, grad_fn=<NllLossBackward0>)
tensor(2.9766, grad_fn=<NllLossBackward0>)
tensor(3.0273, grad_fn=<NllLossBackward0>)
tensor(2.8904, grad_fn=<NllLossBackward0>)
tensor(3.4532, grad_fn=<NllLossBackward0>)
tensor(3.3499, grad_fn=<NllLossBackward0>)
tensor(3.6786, grad_fn=<NllLossBackward0>)
tensor(2.6418, grad_fn=<NllLossBackward0>)
tensor(3.0259, grad_fn=<NllLossBackward0>)
tensor(2.6403, grad_fn=<NllLossBackward0>)
tensor(2.6380, grad_fn=<NllLossBackward0>)
tensor(3.3524, grad_fn=<NllLossBackward0>)
tensor(3.1896, grad_fn=<NllLossBackward0>)
tensor(3.1586, grad_fn=<NllLossBackward0>)
tensor(2.5443, grad_fn=<NllLossBackward0>)
tensor(2.8338, grad_fn=<NllLossBackward0>)
tensor(2.9514, grad_fn=<NllLossBackward0>)
tensor(2.9122, grad_fn=<NllLossBackward0>)
tensor(2.80

tensor(2.5959, grad_fn=<NllLossBackward0>)
tensor(2.6530, grad_fn=<NllLossBackward0>)
tensor(2.5960, grad_fn=<NllLossBackward0>)
tensor(2.5962, grad_fn=<NllLossBackward0>)
tensor(3.1876, grad_fn=<NllLossBackward0>)
tensor(3.0287, grad_fn=<NllLossBackward0>)
tensor(3.1853, grad_fn=<NllLossBackward0>)
tensor(2.8693, grad_fn=<NllLossBackward0>)
tensor(1.8921, grad_fn=<NllLossBackward0>)
tensor(2.3158, grad_fn=<NllLossBackward0>)
tensor(3.3441, grad_fn=<NllLossBackward0>)
tensor(1.8894, grad_fn=<NllLossBackward0>)
tensor(1.8873, grad_fn=<NllLossBackward0>)
tensor(3.1866, grad_fn=<NllLossBackward0>)
tensor(2.6682, grad_fn=<NllLossBackward0>)
tensor(3.7477, grad_fn=<NllLossBackward0>)
tensor(3.5717, grad_fn=<NllLossBackward0>)
tensor(3.4136, grad_fn=<NllLossBackward0>)
tensor(3.3887, grad_fn=<NllLossBackward0>)
tensor(3.1750, grad_fn=<NllLossBackward0>)
tensor(2.3136, grad_fn=<NllLossBackward0>)
tensor(2.5988, grad_fn=<NllLossBackward0>)
tensor(2.4393, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(1.8704, grad_fn=<NllLossBackward0>)
tensor(3.3525, grad_fn=<NllLossBackward0>)
tensor(2.5739, grad_fn=<NllLossBackward0>)
tensor(2.8235, grad_fn=<NllLossBackward0>)
tensor(3.1900, grad_fn=<NllLossBackward0>)
tensor(3.4799, grad_fn=<NllLossBackward0>)
tensor(2.4327, grad_fn=<NllLossBackward0>)
tensor(3.2610, grad_fn=<NllLossBackward0>)
tensor(2.6202, grad_fn=<NllLossBackward0>)
tensor(3.8543, grad_fn=<NllLossBackward0>)
tensor(3.1256, grad_fn=<NllLossBackward0>)
tensor(2.7883, grad_fn=<NllLossBackward0>)
tensor(3.8431, grad_fn=<NllLossBackward0>)
tensor(2.6212, grad_fn=<NllLossBackward0>)
tensor(3.2471, grad_fn=<NllLossBackward0>)
tensor(3.0539, grad_fn=<NllLossBackward0>)
tensor(3.7702, grad_fn=<NllLossBackward0>)
tensor(3.6576, grad_fn=<NllLossBackward0>)
tensor(2.4329, grad_fn=<NllLossBackward0>)
tensor(2.5265, grad_fn=<NllLossBackward0>)
tensor(3.3718, grad_fn=<NllLossBackward0>)
tensor(2.7868, grad_fn=<NllLossBackward0>)
tensor(3.1240, grad_fn=<NllLossBackward0>)
tensor(3.04

tensor(2.4185, grad_fn=<NllLossBackward0>)
tensor(3.1370, grad_fn=<NllLossBackward0>)
tensor(3.5381, grad_fn=<NllLossBackward0>)
tensor(3.3244, grad_fn=<NllLossBackward0>)
tensor(2.9562, grad_fn=<NllLossBackward0>)
tensor(3.3720, grad_fn=<NllLossBackward0>)
tensor(3.1697, grad_fn=<NllLossBackward0>)
tensor(3.4406, grad_fn=<NllLossBackward0>)
tensor(2.7890, grad_fn=<NllLossBackward0>)
tensor(3.5830, grad_fn=<NllLossBackward0>)
tensor(3.3960, grad_fn=<NllLossBackward0>)
tensor(2.9958, grad_fn=<NllLossBackward0>)
tensor(2.8747, grad_fn=<NllLossBackward0>)
tensor(3.2740, grad_fn=<NllLossBackward0>)
tensor(2.8576, grad_fn=<NllLossBackward0>)
tensor(3.5866, grad_fn=<NllLossBackward0>)
tensor(3.1679, grad_fn=<NllLossBackward0>)
tensor(2.5643, grad_fn=<NllLossBackward0>)
tensor(2.2865, grad_fn=<NllLossBackward0>)
tensor(3.2487, grad_fn=<NllLossBackward0>)
tensor(3.5489, grad_fn=<NllLossBackward0>)
tensor(3.5432, grad_fn=<NllLossBackward0>)
tensor(3.1360, grad_fn=<NllLossBackward0>)
tensor(2.96

tensor(2.1992, grad_fn=<NllLossBackward0>)
tensor(2.9101, grad_fn=<NllLossBackward0>)
tensor(2.6815, grad_fn=<NllLossBackward0>)
tensor(2.9114, grad_fn=<NllLossBackward0>)
tensor(3.2992, grad_fn=<NllLossBackward0>)
tensor(3.1747, grad_fn=<NllLossBackward0>)
tensor(3.1739, grad_fn=<NllLossBackward0>)
tensor(2.5724, grad_fn=<NllLossBackward0>)
tensor(3.2370, grad_fn=<NllLossBackward0>)
tensor(3.3138, grad_fn=<NllLossBackward0>)
tensor(3.1292, grad_fn=<NllLossBackward0>)
tensor(3.5335, grad_fn=<NllLossBackward0>)
tensor(2.9248, grad_fn=<NllLossBackward0>)
tensor(2.9228, grad_fn=<NllLossBackward0>)
tensor(2.2827, grad_fn=<NllLossBackward0>)
tensor(2.6332, grad_fn=<NllLossBackward0>)
tensor(3.3493, grad_fn=<NllLossBackward0>)
tensor(2.4215, grad_fn=<NllLossBackward0>)
tensor(2.4207, grad_fn=<NllLossBackward0>)
tensor(3.4042, grad_fn=<NllLossBackward0>)
tensor(3.5386, grad_fn=<NllLossBackward0>)
tensor(2.9686, grad_fn=<NllLossBackward0>)
tensor(3.0485, grad_fn=<NllLossBackward0>)
tensor(3.58

tensor(3.4591, grad_fn=<NllLossBackward0>)
tensor(3.2587, grad_fn=<NllLossBackward0>)
tensor(2.9638, grad_fn=<NllLossBackward0>)
tensor(3.5877, grad_fn=<NllLossBackward0>)
tensor(3.4558, grad_fn=<NllLossBackward0>)
tensor(3.3799, grad_fn=<NllLossBackward0>)
tensor(3.3794, grad_fn=<NllLossBackward0>)
tensor(2.6191, grad_fn=<NllLossBackward0>)
tensor(2.9761, grad_fn=<NllLossBackward0>)
tensor(3.5391, grad_fn=<NllLossBackward0>)
tensor(3.1035, grad_fn=<NllLossBackward0>)
tensor(2.1942, grad_fn=<NllLossBackward0>)
tensor(2.5440, grad_fn=<NllLossBackward0>)
tensor(3.7621, grad_fn=<NllLossBackward0>)
tensor(2.6809, grad_fn=<NllLossBackward0>)
tensor(2.2685, grad_fn=<NllLossBackward0>)
tensor(2.9746, grad_fn=<NllLossBackward0>)
tensor(2.6812, grad_fn=<NllLossBackward0>)
tensor(3.3433, grad_fn=<NllLossBackward0>)
tensor(3.6105, grad_fn=<NllLossBackward0>)
tensor(2.9467, grad_fn=<NllLossBackward0>)
tensor(2.6152, grad_fn=<NllLossBackward0>)
tensor(2.6140, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(3.3693, grad_fn=<NllLossBackward0>)
tensor(3.3979, grad_fn=<NllLossBackward0>)
tensor(3.7906, grad_fn=<NllLossBackward0>)
tensor(3.4303, grad_fn=<NllLossBackward0>)
tensor(2.5563, grad_fn=<NllLossBackward0>)
tensor(3.0965, grad_fn=<NllLossBackward0>)
tensor(3.2719, grad_fn=<NllLossBackward0>)
tensor(3.1727, grad_fn=<NllLossBackward0>)
tensor(3.0140, grad_fn=<NllLossBackward0>)
tensor(3.2626, grad_fn=<NllLossBackward0>)
tensor(2.8496, grad_fn=<NllLossBackward0>)
tensor(3.1798, grad_fn=<NllLossBackward0>)
tensor(2.5074, grad_fn=<NllLossBackward0>)
tensor(2.6821, grad_fn=<NllLossBackward0>)
tensor(2.6797, grad_fn=<NllLossBackward0>)
tensor(2.5580, grad_fn=<NllLossBackward0>)
tensor(3.2080, grad_fn=<NllLossBackward0>)
tensor(2.8463, grad_fn=<NllLossBackward0>)
tensor(3.1355, grad_fn=<NllLossBackward0>)
tensor(3.2476, grad_fn=<NllLossBackward0>)
tensor(3.3789, grad_fn=<NllLossBackward0>)
tensor(3.5435, grad_fn=<NllLossBackward0>)
tensor(1.8428, grad_fn=<NllLossBackward0>)
tensor(2.75

tensor(2.5870, grad_fn=<NllLossBackward0>)
tensor(2.8358, grad_fn=<NllLossBackward0>)
tensor(2.5040, grad_fn=<NllLossBackward0>)
tensor(3.0844, grad_fn=<NllLossBackward0>)
tensor(3.3768, grad_fn=<NllLossBackward0>)
tensor(2.1716, grad_fn=<NllLossBackward0>)
tensor(2.5040, grad_fn=<NllLossBackward0>)
tensor(3.5022, grad_fn=<NllLossBackward0>)
tensor(2.4194, grad_fn=<NllLossBackward0>)
tensor(3.5064, grad_fn=<NllLossBackward0>)
tensor(2.9440, grad_fn=<NllLossBackward0>)
tensor(3.2889, grad_fn=<NllLossBackward0>)
tensor(2.5410, grad_fn=<NllLossBackward0>)
tensor(2.7493, grad_fn=<NllLossBackward0>)
tensor(2.1693, grad_fn=<NllLossBackward0>)
tensor(3.3912, grad_fn=<NllLossBackward0>)
tensor(2.5566, grad_fn=<NllLossBackward0>)
tensor(2.5891, grad_fn=<NllLossBackward0>)
tensor(2.5662, grad_fn=<NllLossBackward0>)
tensor(3.0891, grad_fn=<NllLossBackward0>)
tensor(3.1456, grad_fn=<NllLossBackward0>)
tensor(3.2054, grad_fn=<NllLossBackward0>)
tensor(2.8243, grad_fn=<NllLossBackward0>)
tensor(2.55

tensor(2.5904, grad_fn=<NllLossBackward0>)
tensor(2.1528, grad_fn=<NllLossBackward0>)
tensor(2.9917, grad_fn=<NllLossBackward0>)
tensor(3.1562, grad_fn=<NllLossBackward0>)
tensor(2.5437, grad_fn=<NllLossBackward0>)
tensor(3.9498, grad_fn=<NllLossBackward0>)
tensor(3.5331, grad_fn=<NllLossBackward0>)
tensor(3.1997, grad_fn=<NllLossBackward0>)
tensor(3.6468, grad_fn=<NllLossBackward0>)
tensor(2.5559, grad_fn=<NllLossBackward0>)
tensor(3.8408, grad_fn=<NllLossBackward0>)
tensor(3.9753, grad_fn=<NllLossBackward0>)
tensor(3.6583, grad_fn=<NllLossBackward0>)
tensor(1.8386, grad_fn=<NllLossBackward0>)
tensor(3.2922, grad_fn=<NllLossBackward0>)
tensor(3.6241, grad_fn=<NllLossBackward0>)
tensor(3.2249, grad_fn=<NllLossBackward0>)
tensor(3.2261, grad_fn=<NllLossBackward0>)
tensor(2.8258, grad_fn=<NllLossBackward0>)
tensor(1.8371, grad_fn=<NllLossBackward0>)
tensor(3.3821, grad_fn=<NllLossBackward0>)
tensor(2.1547, grad_fn=<NllLossBackward0>)
tensor(1.8343, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(2.6603, grad_fn=<NllLossBackward0>)
tensor(2.9113, grad_fn=<NllLossBackward0>)
tensor(3.2566, grad_fn=<NllLossBackward0>)
tensor(2.1448, grad_fn=<NllLossBackward0>)
tensor(3.2989, grad_fn=<NllLossBackward0>)
tensor(2.1442, grad_fn=<NllLossBackward0>)
tensor(2.3956, grad_fn=<NllLossBackward0>)
tensor(2.2299, grad_fn=<NllLossBackward0>)
tensor(3.6747, grad_fn=<NllLossBackward0>)
tensor(3.4654, grad_fn=<NllLossBackward0>)
tensor(2.3956, grad_fn=<NllLossBackward0>)
tensor(3.3208, grad_fn=<NllLossBackward0>)
tensor(3.6661, grad_fn=<NllLossBackward0>)
tensor(2.9694, grad_fn=<NllLossBackward0>)
tensor(3.2415, grad_fn=<NllLossBackward0>)
tensor(3.2731, grad_fn=<NllLossBackward0>)
tensor(2.1440, grad_fn=<NllLossBackward0>)
tensor(3.6658, grad_fn=<NllLossBackward0>)
tensor(2.6478, grad_fn=<NllLossBackward0>)
tensor(2.8117, grad_fn=<NllLossBackward0>)
tensor(3.3092, grad_fn=<NllLossBackward0>)
tensor(2.2281, grad_fn=<NllLossBackward0>)
tensor(3.2211, grad_fn=<NllLossBackward0>)
tensor(3.71

tensor(2.8522, grad_fn=<NllLossBackward0>)
tensor(2.4875, grad_fn=<NllLossBackward0>)
tensor(2.5465, grad_fn=<NllLossBackward0>)
tensor(3.6566, grad_fn=<NllLossBackward0>)
tensor(3.1528, grad_fn=<NllLossBackward0>)
tensor(2.5368, grad_fn=<NllLossBackward0>)
tensor(3.2008, grad_fn=<NllLossBackward0>)
tensor(2.8461, grad_fn=<NllLossBackward0>)
tensor(2.8367, grad_fn=<NllLossBackward0>)
tensor(1.8226, grad_fn=<NllLossBackward0>)
tensor(2.9330, grad_fn=<NllLossBackward0>)
tensor(2.7982, grad_fn=<NllLossBackward0>)
tensor(2.4014, grad_fn=<NllLossBackward0>)
tensor(2.8930, grad_fn=<NllLossBackward0>)
tensor(2.8186, grad_fn=<NllLossBackward0>)
tensor(3.5501, grad_fn=<NllLossBackward0>)
tensor(3.2485, grad_fn=<NllLossBackward0>)
tensor(1.8205, grad_fn=<NllLossBackward0>)
tensor(3.3794, grad_fn=<NllLossBackward0>)
tensor(3.2817, grad_fn=<NllLossBackward0>)
tensor(3.2446, grad_fn=<NllLossBackward0>)
tensor(2.4530, grad_fn=<NllLossBackward0>)
tensor(3.2479, grad_fn=<NllLossBackward0>)
tensor(1.82

tensor(3.6162, grad_fn=<NllLossBackward0>)
tensor(2.8457, grad_fn=<NllLossBackward0>)
tensor(2.5706, grad_fn=<NllLossBackward0>)
tensor(3.3316, grad_fn=<NllLossBackward0>)
tensor(2.8346, grad_fn=<NllLossBackward0>)
tensor(3.2500, grad_fn=<NllLossBackward0>)
tensor(2.8423, grad_fn=<NllLossBackward0>)
tensor(3.5556, grad_fn=<NllLossBackward0>)
tensor(3.2848, grad_fn=<NllLossBackward0>)
tensor(3.0628, grad_fn=<NllLossBackward0>)
tensor(4.0220, grad_fn=<NllLossBackward0>)
tensor(2.8043, grad_fn=<NllLossBackward0>)
tensor(2.5278, grad_fn=<NllLossBackward0>)
tensor(3.2385, grad_fn=<NllLossBackward0>)
tensor(2.9284, grad_fn=<NllLossBackward0>)
tensor(3.2913, grad_fn=<NllLossBackward0>)
tensor(2.7915, grad_fn=<NllLossBackward0>)
tensor(3.2453, grad_fn=<NllLossBackward0>)
tensor(2.1270, grad_fn=<NllLossBackward0>)
tensor(2.4777, grad_fn=<NllLossBackward0>)
tensor(3.2383, grad_fn=<NllLossBackward0>)
tensor(2.8454, grad_fn=<NllLossBackward0>)
tensor(2.8301, grad_fn=<NllLossBackward0>)
tensor(2.93

tensor(2.4229, grad_fn=<NllLossBackward0>)
tensor(2.9903, grad_fn=<NllLossBackward0>)
tensor(2.9310, grad_fn=<NllLossBackward0>)
tensor(3.2205, grad_fn=<NllLossBackward0>)
tensor(3.2022, grad_fn=<NllLossBackward0>)
tensor(3.3424, grad_fn=<NllLossBackward0>)
tensor(3.5589, grad_fn=<NllLossBackward0>)
tensor(3.7115, grad_fn=<NllLossBackward0>)
tensor(3.1066, grad_fn=<NllLossBackward0>)
tensor(2.6157, grad_fn=<NllLossBackward0>)
tensor(2.8333, grad_fn=<NllLossBackward0>)
tensor(2.6606, grad_fn=<NllLossBackward0>)
tensor(2.9170, grad_fn=<NllLossBackward0>)
tensor(2.3842, grad_fn=<NllLossBackward0>)
tensor(3.1094, grad_fn=<NllLossBackward0>)
tensor(3.6320, grad_fn=<NllLossBackward0>)
tensor(3.4022, grad_fn=<NllLossBackward0>)
tensor(3.1432, grad_fn=<NllLossBackward0>)
tensor(1.8001, grad_fn=<NllLossBackward0>)
tensor(2.5233, grad_fn=<NllLossBackward0>)
tensor(2.6095, grad_fn=<NllLossBackward0>)
tensor(3.5641, grad_fn=<NllLossBackward0>)
tensor(2.6587, grad_fn=<NllLossBackward0>)
tensor(3.50

tensor(2.9284, grad_fn=<NllLossBackward0>)
tensor(3.0133, grad_fn=<NllLossBackward0>)
tensor(2.6527, grad_fn=<NllLossBackward0>)
tensor(2.1888, grad_fn=<NllLossBackward0>)
tensor(2.8956, grad_fn=<NllLossBackward0>)
tensor(3.3409, grad_fn=<NllLossBackward0>)
tensor(2.8348, grad_fn=<NllLossBackward0>)
tensor(2.8282, grad_fn=<NllLossBackward0>)
tensor(2.7769, grad_fn=<NllLossBackward0>)
tensor(2.9208, grad_fn=<NllLossBackward0>)
tensor(3.6845, grad_fn=<NllLossBackward0>)
tensor(2.3681, grad_fn=<NllLossBackward0>)
tensor(2.1870, grad_fn=<NllLossBackward0>)
tensor(3.8013, grad_fn=<NllLossBackward0>)
tensor(3.1956, grad_fn=<NllLossBackward0>)
tensor(3.3269, grad_fn=<NllLossBackward0>)
tensor(2.8256, grad_fn=<NllLossBackward0>)
tensor(3.2422, grad_fn=<NllLossBackward0>)
tensor(3.2708, grad_fn=<NllLossBackward0>)
tensor(2.8260, grad_fn=<NllLossBackward0>)
tensor(2.9781, grad_fn=<NllLossBackward0>)
tensor(3.6646, grad_fn=<NllLossBackward0>)
tensor(3.3328, grad_fn=<NllLossBackward0>)
tensor(3.57

tensor(3.3792, grad_fn=<NllLossBackward0>)
tensor(3.1041, grad_fn=<NllLossBackward0>)
tensor(3.2213, grad_fn=<NllLossBackward0>)
tensor(2.1705, grad_fn=<NllLossBackward0>)
tensor(3.9228, grad_fn=<NllLossBackward0>)
tensor(3.2731, grad_fn=<NllLossBackward0>)
tensor(3.0958, grad_fn=<NllLossBackward0>)
tensor(3.2064, grad_fn=<NllLossBackward0>)
tensor(2.6478, grad_fn=<NllLossBackward0>)
tensor(3.2725, grad_fn=<NllLossBackward0>)
tensor(2.9429, grad_fn=<NllLossBackward0>)
tensor(3.3322, grad_fn=<NllLossBackward0>)
tensor(2.8005, grad_fn=<NllLossBackward0>)
tensor(3.2431, grad_fn=<NllLossBackward0>)
tensor(3.7669, grad_fn=<NllLossBackward0>)
tensor(3.0937, grad_fn=<NllLossBackward0>)
tensor(2.8277, grad_fn=<NllLossBackward0>)
tensor(2.9653, grad_fn=<NllLossBackward0>)
tensor(3.6981, grad_fn=<NllLossBackward0>)
tensor(3.4894, grad_fn=<NllLossBackward0>)
tensor(3.9664, grad_fn=<NllLossBackward0>)
tensor(3.5128, grad_fn=<NllLossBackward0>)
tensor(2.8295, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(2.8246, grad_fn=<NllLossBackward0>)
tensor(3.5435, grad_fn=<NllLossBackward0>)
tensor(3.4244, grad_fn=<NllLossBackward0>)
tensor(3.3763, grad_fn=<NllLossBackward0>)
tensor(3.3782, grad_fn=<NllLossBackward0>)
tensor(2.5022, grad_fn=<NllLossBackward0>)
tensor(3.3147, grad_fn=<NllLossBackward0>)
tensor(2.7931, grad_fn=<NllLossBackward0>)
tensor(2.5009, grad_fn=<NllLossBackward0>)
tensor(3.2336, grad_fn=<NllLossBackward0>)
tensor(1.7764, grad_fn=<NllLossBackward0>)
tensor(3.1944, grad_fn=<NllLossBackward0>)
tensor(3.1623, grad_fn=<NllLossBackward0>)
tensor(2.4486, grad_fn=<NllLossBackward0>)
tensor(2.8914, grad_fn=<NllLossBackward0>)
tensor(2.3570, grad_fn=<NllLossBackward0>)
tensor(3.5053, grad_fn=<NllLossBackward0>)
tensor(2.3566, grad_fn=<NllLossBackward0>)
tensor(3.7208, grad_fn=<NllLossBackward0>)
tensor(2.7742, grad_fn=<NllLossBackward0>)
tensor(2.9225, grad_fn=<NllLossBackward0>)
tensor(3.2260, grad_fn=<NllLossBackward0>)
tensor(2.8414, grad_fn=<NllLossBackward0>)
tensor(3.68

tensor(3.1694, grad_fn=<NllLossBackward0>)
tensor(3.1891, grad_fn=<NllLossBackward0>)
tensor(3.8759, grad_fn=<NllLossBackward0>)
tensor(3.5510, grad_fn=<NllLossBackward0>)
tensor(3.2950, grad_fn=<NllLossBackward0>)
tensor(1.7741, grad_fn=<NllLossBackward0>)
tensor(3.2115, grad_fn=<NllLossBackward0>)
tensor(3.6058, grad_fn=<NllLossBackward0>)
tensor(3.1649, grad_fn=<NllLossBackward0>)
tensor(3.3714, grad_fn=<NllLossBackward0>)
tensor(3.4830, grad_fn=<NllLossBackward0>)
tensor(3.5576, grad_fn=<NllLossBackward0>)
tensor(2.9199, grad_fn=<NllLossBackward0>)
tensor(2.9205, grad_fn=<NllLossBackward0>)
tensor(2.5134, grad_fn=<NllLossBackward0>)
tensor(2.9033, grad_fn=<NllLossBackward0>)
tensor(3.1159, grad_fn=<NllLossBackward0>)
tensor(3.9334, grad_fn=<NllLossBackward0>)
tensor(3.3711, grad_fn=<NllLossBackward0>)
tensor(2.5721, grad_fn=<NllLossBackward0>)
tensor(3.2925, grad_fn=<NllLossBackward0>)
tensor(2.4344, grad_fn=<NllLossBackward0>)
tensor(3.2938, grad_fn=<NllLossBackward0>)
tensor(2.96

tensor(2.6403, grad_fn=<NllLossBackward0>)
tensor(3.6249, grad_fn=<NllLossBackward0>)
tensor(3.3052, grad_fn=<NllLossBackward0>)
tensor(3.2642, grad_fn=<NllLossBackward0>)
tensor(2.4946, grad_fn=<NllLossBackward0>)
tensor(2.6610, grad_fn=<NllLossBackward0>)
tensor(2.3308, grad_fn=<NllLossBackward0>)
tensor(2.6572, grad_fn=<NllLossBackward0>)
tensor(3.6466, grad_fn=<NllLossBackward0>)
tensor(3.3215, grad_fn=<NllLossBackward0>)
tensor(2.9824, grad_fn=<NllLossBackward0>)
tensor(3.6086, grad_fn=<NllLossBackward0>)
tensor(3.0035, grad_fn=<NllLossBackward0>)
tensor(3.1655, grad_fn=<NllLossBackward0>)
tensor(2.9841, grad_fn=<NllLossBackward0>)
tensor(3.1120, grad_fn=<NllLossBackward0>)
tensor(2.8513, grad_fn=<NllLossBackward0>)
tensor(2.9413, grad_fn=<NllLossBackward0>)
tensor(2.8951, grad_fn=<NllLossBackward0>)
tensor(2.8000, grad_fn=<NllLossBackward0>)
tensor(3.1783, grad_fn=<NllLossBackward0>)
tensor(2.4239, grad_fn=<NllLossBackward0>)
tensor(2.0936, grad_fn=<NllLossBackward0>)
tensor(3.15

tensor(2.8097, grad_fn=<NllLossBackward0>)
tensor(2.5915, grad_fn=<NllLossBackward0>)
tensor(2.4954, grad_fn=<NllLossBackward0>)
tensor(2.9128, grad_fn=<NllLossBackward0>)
tensor(2.8812, grad_fn=<NllLossBackward0>)
tensor(3.3047, grad_fn=<NllLossBackward0>)
tensor(2.5503, grad_fn=<NllLossBackward0>)
tensor(3.6440, grad_fn=<NllLossBackward0>)
tensor(2.8235, grad_fn=<NllLossBackward0>)
tensor(2.4938, grad_fn=<NllLossBackward0>)
tensor(3.2896, grad_fn=<NllLossBackward0>)
tensor(2.6444, grad_fn=<NllLossBackward0>)
tensor(3.0646, grad_fn=<NllLossBackward0>)
tensor(2.5658, grad_fn=<NllLossBackward0>)
tensor(3.8845, grad_fn=<NllLossBackward0>)
tensor(3.9264, grad_fn=<NllLossBackward0>)
tensor(3.3110, grad_fn=<NllLossBackward0>)
tensor(2.5668, grad_fn=<NllLossBackward0>)
tensor(3.3022, grad_fn=<NllLossBackward0>)
tensor(3.0785, grad_fn=<NllLossBackward0>)
tensor(2.4400, grad_fn=<NllLossBackward0>)
tensor(3.6978, grad_fn=<NllLossBackward0>)
tensor(2.8375, grad_fn=<NllLossBackward0>)
tensor(2.09

tensor(3.0618, grad_fn=<NllLossBackward0>)
tensor(3.5913, grad_fn=<NllLossBackward0>)
tensor(3.2053, grad_fn=<NllLossBackward0>)
tensor(3.1163, grad_fn=<NllLossBackward0>)
tensor(2.8762, grad_fn=<NllLossBackward0>)
tensor(2.3198, grad_fn=<NllLossBackward0>)
tensor(3.0298, grad_fn=<NllLossBackward0>)
tensor(3.6574, grad_fn=<NllLossBackward0>)
tensor(2.8561, grad_fn=<NllLossBackward0>)
tensor(2.3195, grad_fn=<NllLossBackward0>)
tensor(3.2358, grad_fn=<NllLossBackward0>)
tensor(2.8475, grad_fn=<NllLossBackward0>)
tensor(3.4986, grad_fn=<NllLossBackward0>)
tensor(3.4984, grad_fn=<NllLossBackward0>)
tensor(2.9377, grad_fn=<NllLossBackward0>)
tensor(3.5445, grad_fn=<NllLossBackward0>)
tensor(2.0800, grad_fn=<NllLossBackward0>)
tensor(2.4732, grad_fn=<NllLossBackward0>)
tensor(3.3080, grad_fn=<NllLossBackward0>)
tensor(2.3192, grad_fn=<NllLossBackward0>)
tensor(2.5866, grad_fn=<NllLossBackward0>)
tensor(3.2401, grad_fn=<NllLossBackward0>)
tensor(2.9583, grad_fn=<NllLossBackward0>)
tensor(2.91

tensor(2.7986, grad_fn=<NllLossBackward0>)
tensor(3.3410, grad_fn=<NllLossBackward0>)
tensor(3.2395, grad_fn=<NllLossBackward0>)
tensor(2.3187, grad_fn=<NllLossBackward0>)
tensor(3.4985, grad_fn=<NllLossBackward0>)
tensor(2.6420, grad_fn=<NllLossBackward0>)
tensor(3.5716, grad_fn=<NllLossBackward0>)
tensor(2.8829, grad_fn=<NllLossBackward0>)
tensor(2.4206, grad_fn=<NllLossBackward0>)
tensor(2.1438, grad_fn=<NllLossBackward0>)
tensor(3.4919, grad_fn=<NllLossBackward0>)
tensor(2.3193, grad_fn=<NllLossBackward0>)
tensor(2.5644, grad_fn=<NllLossBackward0>)
tensor(2.0804, grad_fn=<NllLossBackward0>)
tensor(3.6533, grad_fn=<NllLossBackward0>)
tensor(2.4691, grad_fn=<NllLossBackward0>)
tensor(3.7611, grad_fn=<NllLossBackward0>)
tensor(3.3955, grad_fn=<NllLossBackward0>)
tensor(3.3003, grad_fn=<NllLossBackward0>)
tensor(3.2243, grad_fn=<NllLossBackward0>)
tensor(3.6791, grad_fn=<NllLossBackward0>)
tensor(1.7573, grad_fn=<NllLossBackward0>)
tensor(3.5662, grad_fn=<NllLossBackward0>)
tensor(1.75

tensor(3.1866, grad_fn=<NllLossBackward0>)
tensor(2.8383, grad_fn=<NllLossBackward0>)
tensor(2.1347, grad_fn=<NllLossBackward0>)
tensor(3.3641, grad_fn=<NllLossBackward0>)
tensor(3.2641, grad_fn=<NllLossBackward0>)
tensor(3.2936, grad_fn=<NllLossBackward0>)
tensor(3.1390, grad_fn=<NllLossBackward0>)
tensor(3.6067, grad_fn=<NllLossBackward0>)
tensor(2.8636, grad_fn=<NllLossBackward0>)
tensor(3.3993, grad_fn=<NllLossBackward0>)
tensor(2.6936, grad_fn=<NllLossBackward0>)
tensor(3.9111, grad_fn=<NllLossBackward0>)
tensor(2.8544, grad_fn=<NllLossBackward0>)
tensor(2.9703, grad_fn=<NllLossBackward0>)
tensor(2.1355, grad_fn=<NllLossBackward0>)
tensor(4.0701, grad_fn=<NllLossBackward0>)
tensor(2.8905, grad_fn=<NllLossBackward0>)
tensor(2.4604, grad_fn=<NllLossBackward0>)
tensor(3.4152, grad_fn=<NllLossBackward0>)
tensor(2.4767, grad_fn=<NllLossBackward0>)
tensor(3.3625, grad_fn=<NllLossBackward0>)
tensor(2.8141, grad_fn=<NllLossBackward0>)
tensor(2.9710, grad_fn=<NllLossBackward0>)
tensor(2.91

tensor(2.7310, grad_fn=<NllLossBackward0>)
tensor(2.8658, grad_fn=<NllLossBackward0>)
tensor(2.5338, grad_fn=<NllLossBackward0>)
tensor(2.7296, grad_fn=<NllLossBackward0>)
tensor(3.1176, grad_fn=<NllLossBackward0>)
tensor(3.2419, grad_fn=<NllLossBackward0>)
tensor(3.8745, grad_fn=<NllLossBackward0>)
tensor(1.7515, grad_fn=<NllLossBackward0>)
tensor(3.1447, grad_fn=<NllLossBackward0>)
tensor(3.8893, grad_fn=<NllLossBackward0>)
tensor(2.4062, grad_fn=<NllLossBackward0>)
tensor(3.5638, grad_fn=<NllLossBackward0>)
tensor(2.8574, grad_fn=<NllLossBackward0>)
tensor(2.8423, grad_fn=<NllLossBackward0>)
tensor(2.9522, grad_fn=<NllLossBackward0>)
tensor(4.1658, grad_fn=<NllLossBackward0>)
tensor(2.4532, grad_fn=<NllLossBackward0>)
tensor(2.1295, grad_fn=<NllLossBackward0>)
tensor(2.9411, grad_fn=<NllLossBackward0>)
tensor(2.4492, grad_fn=<NllLossBackward0>)
tensor(2.8794, grad_fn=<NllLossBackward0>)
tensor(2.4766, grad_fn=<NllLossBackward0>)
tensor(4.0599, grad_fn=<NllLossBackward0>)
tensor(3.16

tensor(2.8546, grad_fn=<NllLossBackward0>)
tensor(3.3805, grad_fn=<NllLossBackward0>)
tensor(2.8248, grad_fn=<NllLossBackward0>)
tensor(2.9896, grad_fn=<NllLossBackward0>)
tensor(3.3880, grad_fn=<NllLossBackward0>)
tensor(3.1284, grad_fn=<NllLossBackward0>)
tensor(2.1253, grad_fn=<NllLossBackward0>)
tensor(2.1257, grad_fn=<NllLossBackward0>)
tensor(4.1156, grad_fn=<NllLossBackward0>)
tensor(3.4450, grad_fn=<NllLossBackward0>)
tensor(2.6763, grad_fn=<NllLossBackward0>)
tensor(3.2903, grad_fn=<NllLossBackward0>)
tensor(3.0315, grad_fn=<NllLossBackward0>)
tensor(3.6207, grad_fn=<NllLossBackward0>)
tensor(2.9393, grad_fn=<NllLossBackward0>)
tensor(2.9654, grad_fn=<NllLossBackward0>)
tensor(4.0471, grad_fn=<NllLossBackward0>)
tensor(2.1241, grad_fn=<NllLossBackward0>)
tensor(3.7276, grad_fn=<NllLossBackward0>)
tensor(2.7324, grad_fn=<NllLossBackward0>)
tensor(2.6721, grad_fn=<NllLossBackward0>)
tensor(2.4775, grad_fn=<NllLossBackward0>)
tensor(2.1228, grad_fn=<NllLossBackward0>)
tensor(3.15

tensor(3.7340, grad_fn=<NllLossBackward0>)
tensor(3.1605, grad_fn=<NllLossBackward0>)
tensor(2.1265, grad_fn=<NllLossBackward0>)
tensor(4.0632, grad_fn=<NllLossBackward0>)
tensor(3.2477, grad_fn=<NllLossBackward0>)
tensor(2.9419, grad_fn=<NllLossBackward0>)
tensor(3.1065, grad_fn=<NllLossBackward0>)
tensor(2.8647, grad_fn=<NllLossBackward0>)
tensor(2.8098, grad_fn=<NllLossBackward0>)
tensor(3.2445, grad_fn=<NllLossBackward0>)
tensor(3.2373, grad_fn=<NllLossBackward0>)
tensor(3.7114, grad_fn=<NllLossBackward0>)
tensor(2.7988, grad_fn=<NllLossBackward0>)
tensor(3.7288, grad_fn=<NllLossBackward0>)
tensor(2.8156, grad_fn=<NllLossBackward0>)
tensor(3.5790, grad_fn=<NllLossBackward0>)
tensor(3.5782, grad_fn=<NllLossBackward0>)
tensor(1.7423, grad_fn=<NllLossBackward0>)
tensor(2.8482, grad_fn=<NllLossBackward0>)
tensor(2.7221, grad_fn=<NllLossBackward0>)
tensor(2.2906, grad_fn=<NllLossBackward0>)
tensor(2.2903, grad_fn=<NllLossBackward0>)
tensor(2.8406, grad_fn=<NllLossBackward0>)
tensor(2.86

tensor(2.5062, grad_fn=<NllLossBackward0>)
tensor(3.3623, grad_fn=<NllLossBackward0>)
tensor(3.9875, grad_fn=<NllLossBackward0>)
tensor(2.2794, grad_fn=<NllLossBackward0>)
tensor(2.7164, grad_fn=<NllLossBackward0>)
tensor(2.5718, grad_fn=<NllLossBackward0>)
tensor(2.5689, grad_fn=<NllLossBackward0>)
tensor(1.7340, grad_fn=<NllLossBackward0>)
tensor(3.5437, grad_fn=<NllLossBackward0>)
tensor(3.2955, grad_fn=<NllLossBackward0>)
tensor(2.0528, grad_fn=<NllLossBackward0>)
tensor(2.8577, grad_fn=<NllLossBackward0>)
tensor(2.5048, grad_fn=<NllLossBackward0>)
tensor(3.5932, grad_fn=<NllLossBackward0>)
tensor(2.8064, grad_fn=<NllLossBackward0>)
tensor(3.7314, grad_fn=<NllLossBackward0>)
tensor(3.3785, grad_fn=<NllLossBackward0>)
tensor(3.3153, grad_fn=<NllLossBackward0>)
tensor(2.8175, grad_fn=<NllLossBackward0>)
tensor(2.7935, grad_fn=<NllLossBackward0>)
tensor(2.5695, grad_fn=<NllLossBackward0>)
tensor(2.4783, grad_fn=<NllLossBackward0>)
tensor(3.3333, grad_fn=<NllLossBackward0>)
tensor(3.28

tensor(3.2886, grad_fn=<NllLossBackward0>)
tensor(3.3110, grad_fn=<NllLossBackward0>)
tensor(3.7630, grad_fn=<NllLossBackward0>)
tensor(2.9118, grad_fn=<NllLossBackward0>)
tensor(3.9532, grad_fn=<NllLossBackward0>)
tensor(3.0581, grad_fn=<NllLossBackward0>)
tensor(2.5694, grad_fn=<NllLossBackward0>)
tensor(3.3247, grad_fn=<NllLossBackward0>)
tensor(2.9578, grad_fn=<NllLossBackward0>)
tensor(3.2867, grad_fn=<NllLossBackward0>)
tensor(2.4699, grad_fn=<NllLossBackward0>)
tensor(2.5106, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(3.7438, grad_fn=<NllLossBackward0>)
tensor(3.0877, grad_fn=<NllLossBackward0>)
tensor(3.5751, grad_fn=<NllLossBackward0>)
tensor(2.8573, grad_fn=<NllLossBackward0>)
tensor(3.0055, grad_fn=<NllLossBackward0>)
tensor(2.5700, grad_fn=<NllLossBackward0>)
tensor(4.0568, grad_fn=<NllLossBackward0>)
tensor(2.5854, grad_fn=<NllLossBackward0>)
tensor(2.8564, grad_fn=<NllLossBackward0>)
tensor(2.9998, grad_fn=<NllLossBackward0>)
tensor(3.21

tensor(2.5532, grad_fn=<NllLossBackward0>)
tensor(2.8363, grad_fn=<NllLossBackward0>)
tensor(2.3969, grad_fn=<NllLossBackward0>)
tensor(3.9203, grad_fn=<NllLossBackward0>)
tensor(3.1716, grad_fn=<NllLossBackward0>)
tensor(2.6179, grad_fn=<NllLossBackward0>)
tensor(2.5008, grad_fn=<NllLossBackward0>)
tensor(3.7200, grad_fn=<NllLossBackward0>)
tensor(2.0524, grad_fn=<NllLossBackward0>)
tensor(2.7671, grad_fn=<NllLossBackward0>)
tensor(3.0282, grad_fn=<NllLossBackward0>)
tensor(2.5780, grad_fn=<NllLossBackward0>)
tensor(2.2690, grad_fn=<NllLossBackward0>)
tensor(3.2562, grad_fn=<NllLossBackward0>)
tensor(3.2830, grad_fn=<NllLossBackward0>)
tensor(3.4029, grad_fn=<NllLossBackward0>)
tensor(3.2233, grad_fn=<NllLossBackward0>)
tensor(2.6185, grad_fn=<NllLossBackward0>)
tensor(3.1501, grad_fn=<NllLossBackward0>)
tensor(3.4630, grad_fn=<NllLossBackward0>)
tensor(3.3892, grad_fn=<NllLossBackward0>)
tensor(2.9960, grad_fn=<NllLossBackward0>)
tensor(4.1261, grad_fn=<NllLossBackward0>)
tensor(3.52

tensor(2.0471, grad_fn=<NllLossBackward0>)
tensor(4.0726, grad_fn=<NllLossBackward0>)
tensor(3.9618, grad_fn=<NllLossBackward0>)
tensor(2.8710, grad_fn=<NllLossBackward0>)
tensor(2.1181, grad_fn=<NllLossBackward0>)
tensor(2.9506, grad_fn=<NllLossBackward0>)
tensor(1.7505, grad_fn=<NllLossBackward0>)
tensor(3.2359, grad_fn=<NllLossBackward0>)
tensor(2.8102, grad_fn=<NllLossBackward0>)
tensor(3.0950, grad_fn=<NllLossBackward0>)
tensor(3.1191, grad_fn=<NllLossBackward0>)
tensor(3.2522, grad_fn=<NllLossBackward0>)
tensor(2.5544, grad_fn=<NllLossBackward0>)
tensor(2.8700, grad_fn=<NllLossBackward0>)
tensor(4.1225, grad_fn=<NllLossBackward0>)
tensor(2.1144, grad_fn=<NllLossBackward0>)
tensor(2.7817, grad_fn=<NllLossBackward0>)
tensor(2.5792, grad_fn=<NllLossBackward0>)
tensor(2.6893, grad_fn=<NllLossBackward0>)
tensor(2.8568, grad_fn=<NllLossBackward0>)
tensor(3.5079, grad_fn=<NllLossBackward0>)
tensor(3.3122, grad_fn=<NllLossBackward0>)
tensor(3.6511, grad_fn=<NllLossBackward0>)
tensor(2.52

tensor(3.0246, grad_fn=<NllLossBackward0>)
tensor(1.7329, grad_fn=<NllLossBackward0>)
tensor(2.3429, grad_fn=<NllLossBackward0>)
tensor(3.5025, grad_fn=<NllLossBackward0>)
tensor(3.3760, grad_fn=<NllLossBackward0>)
tensor(1.7319, grad_fn=<NllLossBackward0>)
tensor(2.8679, grad_fn=<NllLossBackward0>)
tensor(2.1036, grad_fn=<NllLossBackward0>)
tensor(2.8522, grad_fn=<NllLossBackward0>)
tensor(3.1738, grad_fn=<NllLossBackward0>)
tensor(3.1166, grad_fn=<NllLossBackward0>)
tensor(2.7996, grad_fn=<NllLossBackward0>)
tensor(3.7446, grad_fn=<NllLossBackward0>)
tensor(2.7939, grad_fn=<NllLossBackward0>)
tensor(2.8876, grad_fn=<NllLossBackward0>)
tensor(3.5922, grad_fn=<NllLossBackward0>)
tensor(3.2116, grad_fn=<NllLossBackward0>)
tensor(3.3854, grad_fn=<NllLossBackward0>)
tensor(3.1560, grad_fn=<NllLossBackward0>)
tensor(2.8172, grad_fn=<NllLossBackward0>)
tensor(3.1762, grad_fn=<NllLossBackward0>)
tensor(2.9430, grad_fn=<NllLossBackward0>)
tensor(2.9825, grad_fn=<NllLossBackward0>)
tensor(2.47

tensor(3.7620, grad_fn=<NllLossBackward0>)
tensor(2.5477, grad_fn=<NllLossBackward0>)
tensor(3.1999, grad_fn=<NllLossBackward0>)
tensor(2.0308, grad_fn=<NllLossBackward0>)
tensor(3.3064, grad_fn=<NllLossBackward0>)
tensor(2.6241, grad_fn=<NllLossBackward0>)
tensor(2.6236, grad_fn=<NllLossBackward0>)
tensor(2.0320, grad_fn=<NllLossBackward0>)
tensor(3.1626, grad_fn=<NllLossBackward0>)
tensor(3.7437, grad_fn=<NllLossBackward0>)
tensor(2.9177, grad_fn=<NllLossBackward0>)
tensor(3.7027, grad_fn=<NllLossBackward0>)
tensor(2.3993, grad_fn=<NllLossBackward0>)
tensor(2.0291, grad_fn=<NllLossBackward0>)
tensor(3.3205, grad_fn=<NllLossBackward0>)
tensor(3.2791, grad_fn=<NllLossBackward0>)
tensor(4.1830, grad_fn=<NllLossBackward0>)
tensor(3.1722, grad_fn=<NllLossBackward0>)
tensor(3.1755, grad_fn=<NllLossBackward0>)
tensor(2.8555, grad_fn=<NllLossBackward0>)
tensor(3.9650, grad_fn=<NllLossBackward0>)
tensor(2.3825, grad_fn=<NllLossBackward0>)
tensor(2.9882, grad_fn=<NllLossBackward0>)
tensor(2.56

tensor(1.7349, grad_fn=<NllLossBackward0>)
tensor(2.9182, grad_fn=<NllLossBackward0>)
tensor(3.7196, grad_fn=<NllLossBackward0>)
tensor(3.0289, grad_fn=<NllLossBackward0>)
tensor(3.3540, grad_fn=<NllLossBackward0>)
tensor(2.3824, grad_fn=<NllLossBackward0>)
tensor(2.3845, grad_fn=<NllLossBackward0>)
tensor(2.3803, grad_fn=<NllLossBackward0>)
tensor(3.7412, grad_fn=<NllLossBackward0>)
tensor(2.8119, grad_fn=<NllLossBackward0>)
tensor(3.3942, grad_fn=<NllLossBackward0>)
tensor(3.3834, grad_fn=<NllLossBackward0>)
tensor(2.0897, grad_fn=<NllLossBackward0>)
tensor(4.0875, grad_fn=<NllLossBackward0>)
tensor(2.7608, grad_fn=<NllLossBackward0>)
tensor(3.1491, grad_fn=<NllLossBackward0>)
tensor(3.1682, grad_fn=<NllLossBackward0>)
tensor(2.9061, grad_fn=<NllLossBackward0>)
tensor(2.9671, grad_fn=<NllLossBackward0>)
tensor(2.0267, grad_fn=<NllLossBackward0>)
tensor(2.5506, grad_fn=<NllLossBackward0>)
tensor(2.0902, grad_fn=<NllLossBackward0>)
tensor(3.6207, grad_fn=<NllLossBackward0>)
tensor(3.24

tensor(3.1629, grad_fn=<NllLossBackward0>)
tensor(2.8581, grad_fn=<NllLossBackward0>)
tensor(2.6659, grad_fn=<NllLossBackward0>)
tensor(3.1059, grad_fn=<NllLossBackward0>)
tensor(3.3105, grad_fn=<NllLossBackward0>)
tensor(2.6144, grad_fn=<NllLossBackward0>)
tensor(2.7580, grad_fn=<NllLossBackward0>)
tensor(2.8926, grad_fn=<NllLossBackward0>)
tensor(3.3405, grad_fn=<NllLossBackward0>)
tensor(3.3409, grad_fn=<NllLossBackward0>)
tensor(2.2742, grad_fn=<NllLossBackward0>)
tensor(3.3567, grad_fn=<NllLossBackward0>)
tensor(2.5821, grad_fn=<NllLossBackward0>)
tensor(3.1332, grad_fn=<NllLossBackward0>)
tensor(2.6636, grad_fn=<NllLossBackward0>)
tensor(1.7428, grad_fn=<NllLossBackward0>)
tensor(3.2374, grad_fn=<NllLossBackward0>)
tensor(2.0281, grad_fn=<NllLossBackward0>)
tensor(3.1965, grad_fn=<NllLossBackward0>)
tensor(3.3086, grad_fn=<NllLossBackward0>)
tensor(3.4272, grad_fn=<NllLossBackward0>)
tensor(3.1298, grad_fn=<NllLossBackward0>)
tensor(2.0857, grad_fn=<NllLossBackward0>)
tensor(3.45

tensor(3.7506, grad_fn=<NllLossBackward0>)
tensor(2.4695, grad_fn=<NllLossBackward0>)
tensor(3.3900, grad_fn=<NllLossBackward0>)
tensor(2.6103, grad_fn=<NllLossBackward0>)
tensor(2.8968, grad_fn=<NllLossBackward0>)
tensor(3.2438, grad_fn=<NllLossBackward0>)
tensor(1.7382, grad_fn=<NllLossBackward0>)
tensor(3.3110, grad_fn=<NllLossBackward0>)
tensor(2.9988, grad_fn=<NllLossBackward0>)
tensor(2.2672, grad_fn=<NllLossBackward0>)
tensor(2.3071, grad_fn=<NllLossBackward0>)
tensor(3.6977, grad_fn=<NllLossBackward0>)
tensor(4.1050, grad_fn=<NllLossBackward0>)
tensor(3.3980, grad_fn=<NllLossBackward0>)
tensor(3.3236, grad_fn=<NllLossBackward0>)
tensor(2.3713, grad_fn=<NllLossBackward0>)
tensor(3.2300, grad_fn=<NllLossBackward0>)
tensor(2.5553, grad_fn=<NllLossBackward0>)
tensor(4.1063, grad_fn=<NllLossBackward0>)
tensor(4.0476, grad_fn=<NllLossBackward0>)
tensor(3.1419, grad_fn=<NllLossBackward0>)
tensor(2.9043, grad_fn=<NllLossBackward0>)
tensor(2.4718, grad_fn=<NllLossBackward0>)
tensor(3.59

tensor(2.3605, grad_fn=<NllLossBackward0>)
tensor(3.5546, grad_fn=<NllLossBackward0>)
tensor(2.9575, grad_fn=<NllLossBackward0>)
tensor(3.9069, grad_fn=<NllLossBackward0>)
tensor(2.9324, grad_fn=<NllLossBackward0>)
tensor(3.0401, grad_fn=<NllLossBackward0>)
tensor(3.0890, grad_fn=<NllLossBackward0>)
tensor(3.0350, grad_fn=<NllLossBackward0>)
tensor(2.9953, grad_fn=<NllLossBackward0>)
tensor(2.5524, grad_fn=<NllLossBackward0>)
tensor(4.0570, grad_fn=<NllLossBackward0>)
tensor(3.2144, grad_fn=<NllLossBackward0>)
tensor(3.4831, grad_fn=<NllLossBackward0>)
tensor(2.9003, grad_fn=<NllLossBackward0>)
tensor(2.8022, grad_fn=<NllLossBackward0>)
tensor(3.5437, grad_fn=<NllLossBackward0>)
tensor(2.5489, grad_fn=<NllLossBackward0>)
tensor(2.0851, grad_fn=<NllLossBackward0>)
tensor(2.8986, grad_fn=<NllLossBackward0>)
tensor(3.6648, grad_fn=<NllLossBackward0>)
tensor(2.3735, grad_fn=<NllLossBackward0>)
tensor(2.6489, grad_fn=<NllLossBackward0>)
tensor(3.1774, grad_fn=<NllLossBackward0>)
tensor(2.54

tensor(2.8997, grad_fn=<NllLossBackward0>)
tensor(3.1294, grad_fn=<NllLossBackward0>)
tensor(3.1870, grad_fn=<NllLossBackward0>)
tensor(2.3706, grad_fn=<NllLossBackward0>)
tensor(3.5000, grad_fn=<NllLossBackward0>)
tensor(2.0105, grad_fn=<NllLossBackward0>)
tensor(2.0088, grad_fn=<NllLossBackward0>)
tensor(2.0755, grad_fn=<NllLossBackward0>)
tensor(3.5074, grad_fn=<NllLossBackward0>)
tensor(2.3038, grad_fn=<NllLossBackward0>)
tensor(2.4590, grad_fn=<NllLossBackward0>)
tensor(2.6070, grad_fn=<NllLossBackward0>)
tensor(2.0736, grad_fn=<NllLossBackward0>)
tensor(2.0075, grad_fn=<NllLossBackward0>)
tensor(3.0758, grad_fn=<NllLossBackward0>)
tensor(2.0077, grad_fn=<NllLossBackward0>)
tensor(2.5031, grad_fn=<NllLossBackward0>)
tensor(2.6489, grad_fn=<NllLossBackward0>)
tensor(3.4254, grad_fn=<NllLossBackward0>)
tensor(2.7779, grad_fn=<NllLossBackward0>)
tensor(3.3320, grad_fn=<NllLossBackward0>)
tensor(3.4916, grad_fn=<NllLossBackward0>)
tensor(2.4478, grad_fn=<NllLossBackward0>)
tensor(3.38

tensor(2.6446, grad_fn=<NllLossBackward0>)
tensor(2.8157, grad_fn=<NllLossBackward0>)
tensor(2.7334, grad_fn=<NllLossBackward0>)
tensor(2.3636, grad_fn=<NllLossBackward0>)
tensor(3.2319, grad_fn=<NllLossBackward0>)
tensor(2.4478, grad_fn=<NllLossBackward0>)
tensor(3.2428, grad_fn=<NllLossBackward0>)
tensor(2.5979, grad_fn=<NllLossBackward0>)
tensor(2.5754, grad_fn=<NllLossBackward0>)
tensor(3.2415, grad_fn=<NllLossBackward0>)
tensor(2.3517, grad_fn=<NllLossBackward0>)
tensor(2.0010, grad_fn=<NllLossBackward0>)
tensor(2.7856, grad_fn=<NllLossBackward0>)
tensor(3.5414, grad_fn=<NllLossBackward0>)
tensor(2.4877, grad_fn=<NllLossBackward0>)
tensor(3.9114, grad_fn=<NllLossBackward0>)
tensor(3.4192, grad_fn=<NllLossBackward0>)
tensor(3.6223, grad_fn=<NllLossBackward0>)
tensor(2.4620, grad_fn=<NllLossBackward0>)
tensor(3.6288, grad_fn=<NllLossBackward0>)
tensor(2.8928, grad_fn=<NllLossBackward0>)
tensor(2.4859, grad_fn=<NllLossBackward0>)
tensor(2.8552, grad_fn=<NllLossBackward0>)
tensor(3.13

tensor(2.8626, grad_fn=<NllLossBackward0>)
tensor(3.4621, grad_fn=<NllLossBackward0>)
tensor(1.9953, grad_fn=<NllLossBackward0>)
tensor(3.0016, grad_fn=<NllLossBackward0>)
tensor(2.4558, grad_fn=<NllLossBackward0>)
tensor(2.4830, grad_fn=<NllLossBackward0>)
tensor(2.2502, grad_fn=<NllLossBackward0>)
tensor(3.2249, grad_fn=<NllLossBackward0>)
tensor(3.7704, grad_fn=<NllLossBackward0>)
tensor(3.3052, grad_fn=<NllLossBackward0>)
tensor(3.3323, grad_fn=<NllLossBackward0>)
tensor(3.2937, grad_fn=<NllLossBackward0>)
tensor(3.9369, grad_fn=<NllLossBackward0>)
tensor(3.4936, grad_fn=<NllLossBackward0>)
tensor(1.7076, grad_fn=<NllLossBackward0>)
tensor(3.0314, grad_fn=<NllLossBackward0>)
tensor(2.8044, grad_fn=<NllLossBackward0>)
tensor(2.4422, grad_fn=<NllLossBackward0>)
tensor(1.9966, grad_fn=<NllLossBackward0>)
tensor(2.8084, grad_fn=<NllLossBackward0>)
tensor(3.2219, grad_fn=<NllLossBackward0>)
tensor(2.8603, grad_fn=<NllLossBackward0>)
tensor(3.6693, grad_fn=<NllLossBackward0>)
tensor(3.10

tensor(2.3442, grad_fn=<NllLossBackward0>)
tensor(2.4934, grad_fn=<NllLossBackward0>)
tensor(2.3404, grad_fn=<NllLossBackward0>)
tensor(3.5413, grad_fn=<NllLossBackward0>)
tensor(3.1311, grad_fn=<NllLossBackward0>)
tensor(2.5150, grad_fn=<NllLossBackward0>)
tensor(3.5874, grad_fn=<NllLossBackward0>)
tensor(2.4624, grad_fn=<NllLossBackward0>)
tensor(2.2496, grad_fn=<NllLossBackward0>)
tensor(3.0803, grad_fn=<NllLossBackward0>)
tensor(3.9551, grad_fn=<NllLossBackward0>)
tensor(2.3430, grad_fn=<NllLossBackward0>)
tensor(2.2777, grad_fn=<NllLossBackward0>)
tensor(3.6574, grad_fn=<NllLossBackward0>)
tensor(2.7679, grad_fn=<NllLossBackward0>)
tensor(2.0675, grad_fn=<NllLossBackward0>)
tensor(3.1596, grad_fn=<NllLossBackward0>)
tensor(2.4520, grad_fn=<NllLossBackward0>)
tensor(3.1302, grad_fn=<NllLossBackward0>)
tensor(1.7262, grad_fn=<NllLossBackward0>)
tensor(3.5475, grad_fn=<NllLossBackward0>)
tensor(3.6010, grad_fn=<NllLossBackward0>)
tensor(2.0675, grad_fn=<NllLossBackward0>)
tensor(2.34

tensor(3.0681, grad_fn=<NllLossBackward0>)
tensor(3.1348, grad_fn=<NllLossBackward0>)
tensor(2.9365, grad_fn=<NllLossBackward0>)
tensor(2.7963, grad_fn=<NllLossBackward0>)
tensor(1.7302, grad_fn=<NllLossBackward0>)
tensor(3.7626, grad_fn=<NllLossBackward0>)
tensor(3.0680, grad_fn=<NllLossBackward0>)
tensor(2.8068, grad_fn=<NllLossBackward0>)
tensor(2.7972, grad_fn=<NllLossBackward0>)
tensor(2.7685, grad_fn=<NllLossBackward0>)
tensor(2.0639, grad_fn=<NllLossBackward0>)
tensor(3.5414, grad_fn=<NllLossBackward0>)
tensor(2.0637, grad_fn=<NllLossBackward0>)
tensor(2.0623, grad_fn=<NllLossBackward0>)
tensor(2.4910, grad_fn=<NllLossBackward0>)
tensor(2.9691, grad_fn=<NllLossBackward0>)
tensor(3.3551, grad_fn=<NllLossBackward0>)
tensor(2.0013, grad_fn=<NllLossBackward0>)
tensor(2.6029, grad_fn=<NllLossBackward0>)
tensor(2.3377, grad_fn=<NllLossBackward0>)
tensor(2.9362, grad_fn=<NllLossBackward0>)
tensor(2.3467, grad_fn=<NllLossBackward0>)
tensor(2.7822, grad_fn=<NllLossBackward0>)
tensor(3.62

tensor(2.0042, grad_fn=<NllLossBackward0>)
tensor(2.3332, grad_fn=<NllLossBackward0>)
tensor(3.2565, grad_fn=<NllLossBackward0>)
tensor(4.0157, grad_fn=<NllLossBackward0>)
tensor(3.6914, grad_fn=<NllLossBackward0>)
tensor(2.0023, grad_fn=<NllLossBackward0>)
tensor(3.6074, grad_fn=<NllLossBackward0>)
tensor(2.5961, grad_fn=<NllLossBackward0>)
tensor(2.3324, grad_fn=<NllLossBackward0>)
tensor(2.8644, grad_fn=<NllLossBackward0>)
tensor(2.4899, grad_fn=<NllLossBackward0>)
tensor(2.5262, grad_fn=<NllLossBackward0>)
tensor(2.8734, grad_fn=<NllLossBackward0>)
tensor(3.0431, grad_fn=<NllLossBackward0>)
tensor(3.4177, grad_fn=<NllLossBackward0>)
tensor(3.0718, grad_fn=<NllLossBackward0>)
tensor(2.4864, grad_fn=<NllLossBackward0>)
tensor(3.0833, grad_fn=<NllLossBackward0>)
tensor(3.3901, grad_fn=<NllLossBackward0>)
tensor(1.7232, grad_fn=<NllLossBackward0>)
tensor(3.3982, grad_fn=<NllLossBackward0>)
tensor(2.8738, grad_fn=<NllLossBackward0>)
tensor(3.6268, grad_fn=<NllLossBackward0>)
tensor(2.76

tensor(2.4489, grad_fn=<NllLossBackward0>)
tensor(2.0599, grad_fn=<NllLossBackward0>)
tensor(2.3546, grad_fn=<NllLossBackward0>)
tensor(2.8724, grad_fn=<NllLossBackward0>)
tensor(2.4879, grad_fn=<NllLossBackward0>)
tensor(2.8087, grad_fn=<NllLossBackward0>)
tensor(2.8452, grad_fn=<NllLossBackward0>)
tensor(3.5787, grad_fn=<NllLossBackward0>)
tensor(3.9249, grad_fn=<NllLossBackward0>)
tensor(4.1841, grad_fn=<NllLossBackward0>)
tensor(3.6892, grad_fn=<NllLossBackward0>)
tensor(2.5167, grad_fn=<NllLossBackward0>)
tensor(3.7484, grad_fn=<NllLossBackward0>)
tensor(2.2582, grad_fn=<NllLossBackward0>)
tensor(2.4610, grad_fn=<NllLossBackward0>)
tensor(3.2132, grad_fn=<NllLossBackward0>)
tensor(2.2570, grad_fn=<NllLossBackward0>)
tensor(2.3548, grad_fn=<NllLossBackward0>)
tensor(2.9757, grad_fn=<NllLossBackward0>)
tensor(2.3182, grad_fn=<NllLossBackward0>)
tensor(3.3157, grad_fn=<NllLossBackward0>)
tensor(3.6252, grad_fn=<NllLossBackward0>)
tensor(2.8587, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(2.8894, grad_fn=<NllLossBackward0>)
tensor(3.3277, grad_fn=<NllLossBackward0>)
tensor(2.4476, grad_fn=<NllLossBackward0>)
tensor(2.4633, grad_fn=<NllLossBackward0>)
tensor(3.4789, grad_fn=<NllLossBackward0>)
tensor(2.9956, grad_fn=<NllLossBackward0>)
tensor(3.3404, grad_fn=<NllLossBackward0>)
tensor(2.5300, grad_fn=<NllLossBackward0>)
tensor(3.1817, grad_fn=<NllLossBackward0>)
tensor(2.2457, grad_fn=<NllLossBackward0>)
tensor(3.7674, grad_fn=<NllLossBackward0>)
tensor(2.8683, grad_fn=<NllLossBackward0>)
tensor(2.9937, grad_fn=<NllLossBackward0>)
tensor(2.5284, grad_fn=<NllLossBackward0>)
tensor(2.4844, grad_fn=<NllLossBackward0>)
tensor(3.2474, grad_fn=<NllLossBackward0>)
tensor(3.6187, grad_fn=<NllLossBackward0>)
tensor(2.7839, grad_fn=<NllLossBackward0>)
tensor(2.5393, grad_fn=<NllLossBackward0>)
tensor(2.8546, grad_fn=<NllLossBackward0>)
tensor(2.0584, grad_fn=<NllLossBackward0>)
tensor(2.8825, grad_fn=<NllLossBackward0>)
tensor(2.9714, grad_fn=<NllLossBackward0>)
tensor(3.22

tensor(1.9998, grad_fn=<NllLossBackward0>)
tensor(2.6082, grad_fn=<NllLossBackward0>)
tensor(2.9109, grad_fn=<NllLossBackward0>)
tensor(3.1298, grad_fn=<NllLossBackward0>)
tensor(2.5454, grad_fn=<NllLossBackward0>)
tensor(3.3194, grad_fn=<NllLossBackward0>)
tensor(2.5395, grad_fn=<NllLossBackward0>)
tensor(3.8506, grad_fn=<NllLossBackward0>)
tensor(1.7454, grad_fn=<NllLossBackward0>)
tensor(2.9108, grad_fn=<NllLossBackward0>)
tensor(2.3523, grad_fn=<NllLossBackward0>)
tensor(2.8493, grad_fn=<NllLossBackward0>)
tensor(2.8477, grad_fn=<NllLossBackward0>)
tensor(2.4901, grad_fn=<NllLossBackward0>)
tensor(3.1967, grad_fn=<NllLossBackward0>)
tensor(2.8295, grad_fn=<NllLossBackward0>)
tensor(2.4636, grad_fn=<NllLossBackward0>)
tensor(2.9228, grad_fn=<NllLossBackward0>)
tensor(3.2655, grad_fn=<NllLossBackward0>)
tensor(3.1943, grad_fn=<NllLossBackward0>)
tensor(2.7521, grad_fn=<NllLossBackward0>)
tensor(3.8793, grad_fn=<NllLossBackward0>)
tensor(3.6421, grad_fn=<NllLossBackward0>)
tensor(3.71

tensor(3.8199, grad_fn=<NllLossBackward0>)
tensor(2.3454, grad_fn=<NllLossBackward0>)
tensor(3.2842, grad_fn=<NllLossBackward0>)
tensor(2.4373, grad_fn=<NllLossBackward0>)
tensor(1.9991, grad_fn=<NllLossBackward0>)
tensor(3.1650, grad_fn=<NllLossBackward0>)
tensor(2.3121, grad_fn=<NllLossBackward0>)
tensor(3.5889, grad_fn=<NllLossBackward0>)
tensor(3.1067, grad_fn=<NllLossBackward0>)
tensor(2.3570, grad_fn=<NllLossBackward0>)
tensor(3.8585, grad_fn=<NllLossBackward0>)
tensor(4.0275, grad_fn=<NllLossBackward0>)
tensor(3.2785, grad_fn=<NllLossBackward0>)
tensor(2.8520, grad_fn=<NllLossBackward0>)
tensor(2.2382, grad_fn=<NllLossBackward0>)
tensor(3.3402, grad_fn=<NllLossBackward0>)
tensor(2.7438, grad_fn=<NllLossBackward0>)
tensor(3.3340, grad_fn=<NllLossBackward0>)
tensor(3.3440, grad_fn=<NllLossBackward0>)
tensor(3.3295, grad_fn=<NllLossBackward0>)
tensor(3.7675, grad_fn=<NllLossBackward0>)
tensor(3.4871, grad_fn=<NllLossBackward0>)
tensor(2.8788, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(2.9228, grad_fn=<NllLossBackward0>)
tensor(2.8259, grad_fn=<NllLossBackward0>)
tensor(3.4181, grad_fn=<NllLossBackward0>)
tensor(3.3786, grad_fn=<NllLossBackward0>)
tensor(2.6008, grad_fn=<NllLossBackward0>)
tensor(3.8959, grad_fn=<NllLossBackward0>)
tensor(3.8586, grad_fn=<NllLossBackward0>)
tensor(3.1201, grad_fn=<NllLossBackward0>)
tensor(2.0047, grad_fn=<NllLossBackward0>)
tensor(2.7302, grad_fn=<NllLossBackward0>)
tensor(2.2923, grad_fn=<NllLossBackward0>)
tensor(2.0020, grad_fn=<NllLossBackward0>)
tensor(2.6293, grad_fn=<NllLossBackward0>)
tensor(2.3551, grad_fn=<NllLossBackward0>)
tensor(2.4457, grad_fn=<NllLossBackward0>)
tensor(3.0694, grad_fn=<NllLossBackward0>)
tensor(2.4943, grad_fn=<NllLossBackward0>)
tensor(2.9817, grad_fn=<NllLossBackward0>)
tensor(2.2926, grad_fn=<NllLossBackward0>)
tensor(2.5936, grad_fn=<NllLossBackward0>)
tensor(3.4791, grad_fn=<NllLossBackward0>)
tensor(3.2340, grad_fn=<NllLossBackward0>)
tensor(2.7613, grad_fn=<NllLossBackward0>)
tensor(3.11

tensor(3.3711, grad_fn=<NllLossBackward0>)
tensor(3.6089, grad_fn=<NllLossBackward0>)
tensor(2.5158, grad_fn=<NllLossBackward0>)
tensor(3.1699, grad_fn=<NllLossBackward0>)
tensor(2.4678, grad_fn=<NllLossBackward0>)
tensor(3.0353, grad_fn=<NllLossBackward0>)
tensor(3.3583, grad_fn=<NllLossBackward0>)
tensor(3.0786, grad_fn=<NllLossBackward0>)
tensor(3.1265, grad_fn=<NllLossBackward0>)
tensor(2.5810, grad_fn=<NllLossBackward0>)
tensor(2.2394, grad_fn=<NllLossBackward0>)
tensor(2.6005, grad_fn=<NllLossBackward0>)
tensor(2.0457, grad_fn=<NllLossBackward0>)
tensor(3.6598, grad_fn=<NllLossBackward0>)
tensor(2.2473, grad_fn=<NllLossBackward0>)
tensor(2.4668, grad_fn=<NllLossBackward0>)
tensor(3.6666, grad_fn=<NllLossBackward0>)
tensor(3.5045, grad_fn=<NllLossBackward0>)
tensor(3.2795, grad_fn=<NllLossBackward0>)
tensor(2.4307, grad_fn=<NllLossBackward0>)
tensor(3.7790, grad_fn=<NllLossBackward0>)
tensor(2.0468, grad_fn=<NllLossBackward0>)
tensor(1.7454, grad_fn=<NllLossBackward0>)
tensor(2.79

tensor(2.0405, grad_fn=<NllLossBackward0>)
tensor(3.4452, grad_fn=<NllLossBackward0>)
tensor(2.4693, grad_fn=<NllLossBackward0>)
tensor(2.6661, grad_fn=<NllLossBackward0>)
tensor(2.6664, grad_fn=<NllLossBackward0>)
tensor(3.0011, grad_fn=<NllLossBackward0>)
tensor(2.3456, grad_fn=<NllLossBackward0>)
tensor(3.2402, grad_fn=<NllLossBackward0>)
tensor(2.0428, grad_fn=<NllLossBackward0>)
tensor(2.2919, grad_fn=<NllLossBackward0>)
tensor(4.1489, grad_fn=<NllLossBackward0>)
tensor(2.4978, grad_fn=<NllLossBackward0>)
tensor(2.5465, grad_fn=<NllLossBackward0>)
tensor(3.4512, grad_fn=<NllLossBackward0>)
tensor(2.6177, grad_fn=<NllLossBackward0>)
tensor(3.1215, grad_fn=<NllLossBackward0>)
tensor(3.7987, grad_fn=<NllLossBackward0>)
tensor(3.6294, grad_fn=<NllLossBackward0>)
tensor(3.6832, grad_fn=<NllLossBackward0>)
tensor(2.5711, grad_fn=<NllLossBackward0>)
tensor(2.4631, grad_fn=<NllLossBackward0>)
tensor(3.1133, grad_fn=<NllLossBackward0>)
tensor(2.4694, grad_fn=<NllLossBackward0>)
tensor(2.77

tensor(2.5939, grad_fn=<NllLossBackward0>)
tensor(2.9108, grad_fn=<NllLossBackward0>)
tensor(2.9266, grad_fn=<NllLossBackward0>)
tensor(3.6205, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(3.4531, grad_fn=<NllLossBackward0>)
tensor(2.4904, grad_fn=<NllLossBackward0>)
tensor(3.5577, grad_fn=<NllLossBackward0>)
tensor(3.7410, grad_fn=<NllLossBackward0>)
tensor(3.2773, grad_fn=<NllLossBackward0>)
tensor(2.6557, grad_fn=<NllLossBackward0>)
tensor(2.7723, grad_fn=<NllLossBackward0>)
tensor(3.5039, grad_fn=<NllLossBackward0>)
tensor(2.9021, grad_fn=<NllLossBackward0>)
tensor(2.7798, grad_fn=<NllLossBackward0>)
tensor(2.9956, grad_fn=<NllLossBackward0>)
tensor(3.4386, grad_fn=<NllLossBackward0>)
tensor(1.7328, grad_fn=<NllLossBackward0>)
tensor(2.7241, grad_fn=<NllLossBackward0>)
tensor(3.3789, grad_fn=<NllLossBackward0>)
tensor(1.7312, grad_fn=<NllLossBackward0>)
tensor(3.7626, grad_fn=<NllLossBackward0>)
tensor(2.5428, grad_fn=<NllLossBackward0>)
tensor(3.35

tensor(2.4676, grad_fn=<NllLossBackward0>)
tensor(2.8936, grad_fn=<NllLossBackward0>)
tensor(2.6515, grad_fn=<NllLossBackward0>)
tensor(3.2528, grad_fn=<NllLossBackward0>)
tensor(3.4703, grad_fn=<NllLossBackward0>)
tensor(2.6545, grad_fn=<NllLossBackward0>)
tensor(2.7512, grad_fn=<NllLossBackward0>)
tensor(3.2446, grad_fn=<NllLossBackward0>)
tensor(2.6769, grad_fn=<NllLossBackward0>)
tensor(3.1994, grad_fn=<NllLossBackward0>)
tensor(2.7154, grad_fn=<NllLossBackward0>)
tensor(2.6524, grad_fn=<NllLossBackward0>)
tensor(2.8417, grad_fn=<NllLossBackward0>)
tensor(3.0397, grad_fn=<NllLossBackward0>)
tensor(1.9877, grad_fn=<NllLossBackward0>)
tensor(3.6670, grad_fn=<NllLossBackward0>)
tensor(2.4200, grad_fn=<NllLossBackward0>)
tensor(3.1715, grad_fn=<NllLossBackward0>)
tensor(3.7877, grad_fn=<NllLossBackward0>)
tensor(2.2304, grad_fn=<NllLossBackward0>)
tensor(3.7697, grad_fn=<NllLossBackward0>)
tensor(3.1793, grad_fn=<NllLossBackward0>)
tensor(3.8245, grad_fn=<NllLossBackward0>)
tensor(3.19

tensor(1.7324, grad_fn=<NllLossBackward0>)
tensor(4.1226, grad_fn=<NllLossBackward0>)
tensor(3.2937, grad_fn=<NllLossBackward0>)
tensor(2.5940, grad_fn=<NllLossBackward0>)
tensor(3.2092, grad_fn=<NllLossBackward0>)
tensor(3.8548, grad_fn=<NllLossBackward0>)
tensor(2.2284, grad_fn=<NllLossBackward0>)
tensor(3.1751, grad_fn=<NllLossBackward0>)
tensor(4.0466, grad_fn=<NllLossBackward0>)
tensor(2.3046, grad_fn=<NllLossBackward0>)
tensor(3.0530, grad_fn=<NllLossBackward0>)
tensor(1.7323, grad_fn=<NllLossBackward0>)
tensor(3.0229, grad_fn=<NllLossBackward0>)
tensor(2.4155, grad_fn=<NllLossBackward0>)
tensor(2.4919, grad_fn=<NllLossBackward0>)
tensor(2.5416, grad_fn=<NllLossBackward0>)
tensor(2.8208, grad_fn=<NllLossBackward0>)
tensor(3.7160, grad_fn=<NllLossBackward0>)
tensor(3.7020, grad_fn=<NllLossBackward0>)
tensor(2.9468, grad_fn=<NllLossBackward0>)
tensor(2.9243, grad_fn=<NllLossBackward0>)
tensor(2.7792, grad_fn=<NllLossBackward0>)
tensor(3.0558, grad_fn=<NllLossBackward0>)
tensor(2.30

tensor(3.2452, grad_fn=<NllLossBackward0>)
tensor(2.3036, grad_fn=<NllLossBackward0>)
tensor(2.4474, grad_fn=<NllLossBackward0>)
tensor(4.2217, grad_fn=<NllLossBackward0>)
tensor(4.1167, grad_fn=<NllLossBackward0>)
tensor(1.9862, grad_fn=<NllLossBackward0>)
tensor(3.8748, grad_fn=<NllLossBackward0>)
tensor(4.0963, grad_fn=<NllLossBackward0>)
tensor(2.9472, grad_fn=<NllLossBackward0>)
tensor(2.4599, grad_fn=<NllLossBackward0>)
tensor(2.7626, grad_fn=<NllLossBackward0>)
tensor(2.4681, grad_fn=<NllLossBackward0>)
tensor(3.1659, grad_fn=<NllLossBackward0>)
tensor(2.9026, grad_fn=<NllLossBackward0>)
tensor(2.9210, grad_fn=<NllLossBackward0>)
tensor(3.3088, grad_fn=<NllLossBackward0>)
tensor(2.5393, grad_fn=<NllLossBackward0>)
tensor(1.7292, grad_fn=<NllLossBackward0>)
tensor(2.3377, grad_fn=<NllLossBackward0>)
tensor(3.3697, grad_fn=<NllLossBackward0>)
tensor(2.4931, grad_fn=<NllLossBackward0>)
tensor(2.2112, grad_fn=<NllLossBackward0>)
tensor(2.4913, grad_fn=<NllLossBackward0>)
tensor(3.21

tensor(2.5265, grad_fn=<NllLossBackward0>)
tensor(2.4728, grad_fn=<NllLossBackward0>)
tensor(1.9839, grad_fn=<NllLossBackward0>)
tensor(3.3322, grad_fn=<NllLossBackward0>)
tensor(2.1960, grad_fn=<NllLossBackward0>)
tensor(3.6589, grad_fn=<NllLossBackward0>)
tensor(3.7761, grad_fn=<NllLossBackward0>)
tensor(2.4066, grad_fn=<NllLossBackward0>)
tensor(2.4345, grad_fn=<NllLossBackward0>)
tensor(2.7530, grad_fn=<NllLossBackward0>)
tensor(1.7066, grad_fn=<NllLossBackward0>)
tensor(3.2240, grad_fn=<NllLossBackward0>)
tensor(2.4561, grad_fn=<NllLossBackward0>)
tensor(2.8755, grad_fn=<NllLossBackward0>)
tensor(3.7552, grad_fn=<NllLossBackward0>)
tensor(3.5597, grad_fn=<NllLossBackward0>)
tensor(1.9836, grad_fn=<NllLossBackward0>)
tensor(2.5240, grad_fn=<NllLossBackward0>)
tensor(3.1400, grad_fn=<NllLossBackward0>)
tensor(2.5431, grad_fn=<NllLossBackward0>)
tensor(2.7639, grad_fn=<NllLossBackward0>)
tensor(2.4711, grad_fn=<NllLossBackward0>)
tensor(2.7626, grad_fn=<NllLossBackward0>)
tensor(2.89

tensor(3.2123, grad_fn=<NllLossBackward0>)
tensor(2.5487, grad_fn=<NllLossBackward0>)
tensor(3.1023, grad_fn=<NllLossBackward0>)
tensor(3.4045, grad_fn=<NllLossBackward0>)
tensor(4.5008, grad_fn=<NllLossBackward0>)
tensor(2.4531, grad_fn=<NllLossBackward0>)
tensor(2.9252, grad_fn=<NllLossBackward0>)
tensor(1.9858, grad_fn=<NllLossBackward0>)
tensor(2.0291, grad_fn=<NllLossBackward0>)
tensor(3.5484, grad_fn=<NllLossBackward0>)
tensor(2.4842, grad_fn=<NllLossBackward0>)
tensor(1.9834, grad_fn=<NllLossBackward0>)
tensor(2.4503, grad_fn=<NllLossBackward0>)
tensor(4.0600, grad_fn=<NllLossBackward0>)
tensor(3.1282, grad_fn=<NllLossBackward0>)
tensor(2.8139, grad_fn=<NllLossBackward0>)
tensor(2.4496, grad_fn=<NllLossBackward0>)
tensor(3.0971, grad_fn=<NllLossBackward0>)
tensor(2.0300, grad_fn=<NllLossBackward0>)
tensor(2.7297, grad_fn=<NllLossBackward0>)
tensor(1.9843, grad_fn=<NllLossBackward0>)
tensor(2.8772, grad_fn=<NllLossBackward0>)
tensor(3.1217, grad_fn=<NllLossBackward0>)
tensor(3.21

tensor(1.9732, grad_fn=<NllLossBackward0>)
tensor(3.5926, grad_fn=<NllLossBackward0>)
tensor(2.9723, grad_fn=<NllLossBackward0>)
tensor(2.3323, grad_fn=<NllLossBackward0>)
tensor(2.8655, grad_fn=<NllLossBackward0>)
tensor(3.2777, grad_fn=<NllLossBackward0>)
tensor(2.5086, grad_fn=<NllLossBackward0>)
tensor(2.4595, grad_fn=<NllLossBackward0>)
tensor(2.2779, grad_fn=<NllLossBackward0>)
tensor(1.9702, grad_fn=<NllLossBackward0>)
tensor(1.9702, grad_fn=<NllLossBackward0>)
tensor(2.0313, grad_fn=<NllLossBackward0>)
tensor(4.2664, grad_fn=<NllLossBackward0>)
tensor(2.4859, grad_fn=<NllLossBackward0>)
tensor(3.2002, grad_fn=<NllLossBackward0>)
tensor(3.6600, grad_fn=<NllLossBackward0>)
tensor(3.8733, grad_fn=<NllLossBackward0>)
tensor(3.2457, grad_fn=<NllLossBackward0>)
tensor(3.1214, grad_fn=<NllLossBackward0>)
tensor(3.6962, grad_fn=<NllLossBackward0>)
tensor(3.0747, grad_fn=<NllLossBackward0>)
tensor(2.7974, grad_fn=<NllLossBackward0>)
tensor(3.1978, grad_fn=<NllLossBackward0>)
tensor(3.56

tensor(3.0637, grad_fn=<NllLossBackward0>)
tensor(2.2983, grad_fn=<NllLossBackward0>)
tensor(3.6825, grad_fn=<NllLossBackward0>)
tensor(2.8072, grad_fn=<NllLossBackward0>)
tensor(3.2387, grad_fn=<NllLossBackward0>)
tensor(2.2749, grad_fn=<NllLossBackward0>)
tensor(3.7769, grad_fn=<NllLossBackward0>)
tensor(2.8048, grad_fn=<NllLossBackward0>)
tensor(3.2650, grad_fn=<NllLossBackward0>)
tensor(1.9777, grad_fn=<NllLossBackward0>)
tensor(2.5923, grad_fn=<NllLossBackward0>)
tensor(2.9384, grad_fn=<NllLossBackward0>)
tensor(3.0987, grad_fn=<NllLossBackward0>)
tensor(2.7904, grad_fn=<NllLossBackward0>)
tensor(1.6868, grad_fn=<NllLossBackward0>)
tensor(2.9242, grad_fn=<NllLossBackward0>)
tensor(3.5897, grad_fn=<NllLossBackward0>)
tensor(2.2953, grad_fn=<NllLossBackward0>)
tensor(3.2068, grad_fn=<NllLossBackward0>)
tensor(2.4819, grad_fn=<NllLossBackward0>)
tensor(3.1563, grad_fn=<NllLossBackward0>)
tensor(1.9746, grad_fn=<NllLossBackward0>)
tensor(3.3683, grad_fn=<NllLossBackward0>)
tensor(3.20

tensor(2.3189, grad_fn=<NllLossBackward0>)
tensor(1.9806, grad_fn=<NllLossBackward0>)
tensor(2.9540, grad_fn=<NllLossBackward0>)
tensor(3.5831, grad_fn=<NllLossBackward0>)
tensor(3.1542, grad_fn=<NllLossBackward0>)
tensor(3.3489, grad_fn=<NllLossBackward0>)
tensor(3.3747, grad_fn=<NllLossBackward0>)
tensor(3.6547, grad_fn=<NllLossBackward0>)
tensor(3.0398, grad_fn=<NllLossBackward0>)
tensor(2.8180, grad_fn=<NllLossBackward0>)
tensor(2.5234, grad_fn=<NllLossBackward0>)
tensor(2.8718, grad_fn=<NllLossBackward0>)
tensor(3.6003, grad_fn=<NllLossBackward0>)
tensor(2.7951, grad_fn=<NllLossBackward0>)
tensor(3.7753, grad_fn=<NllLossBackward0>)
tensor(3.8169, grad_fn=<NllLossBackward0>)
tensor(2.6394, grad_fn=<NllLossBackward0>)
tensor(3.7115, grad_fn=<NllLossBackward0>)
tensor(2.5163, grad_fn=<NllLossBackward0>)
tensor(2.6422, grad_fn=<NllLossBackward0>)
tensor(3.1624, grad_fn=<NllLossBackward0>)
tensor(2.8475, grad_fn=<NllLossBackward0>)
tensor(3.1222, grad_fn=<NllLossBackward0>)
tensor(2.81

tensor(2.6675, grad_fn=<NllLossBackward0>)
tensor(3.3041, grad_fn=<NllLossBackward0>)
tensor(2.6661, grad_fn=<NllLossBackward0>)
tensor(2.7946, grad_fn=<NllLossBackward0>)
tensor(1.9726, grad_fn=<NllLossBackward0>)
tensor(2.6783, grad_fn=<NllLossBackward0>)
tensor(3.7734, grad_fn=<NllLossBackward0>)
tensor(3.2070, grad_fn=<NllLossBackward0>)
tensor(1.9704, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(2.9039, grad_fn=<NllLossBackward0>)
tensor(3.8110, grad_fn=<NllLossBackward0>)
tensor(3.1727, grad_fn=<NllLossBackward0>)
tensor(2.7840, grad_fn=<NllLossBackward0>)
tensor(2.0409, grad_fn=<NllLossBackward0>)
tensor(3.3202, grad_fn=<NllLossBackward0>)
tensor(2.6343, grad_fn=<NllLossBackward0>)
tensor(2.6736, grad_fn=<NllLossBackward0>)
tensor(2.3963, grad_fn=<NllLossBackward0>)
tensor(2.7915, grad_fn=<NllLossBackward0>)
tensor(3.9828, grad_fn=<NllLossBackward0>)
tensor(2.8982, grad_fn=<NllLossBackward0>)
tensor(2.4676, grad_fn=<NllLossBackward0>)
tensor(3.69

tensor(2.3680, grad_fn=<NllLossBackward0>)
tensor(2.8039, grad_fn=<NllLossBackward0>)
tensor(3.2648, grad_fn=<NllLossBackward0>)
tensor(3.4088, grad_fn=<NllLossBackward0>)
tensor(2.4167, grad_fn=<NllLossBackward0>)
tensor(3.7970, grad_fn=<NllLossBackward0>)
tensor(3.2194, grad_fn=<NllLossBackward0>)
tensor(2.9867, grad_fn=<NllLossBackward0>)
tensor(2.7751, grad_fn=<NllLossBackward0>)
tensor(2.3629, grad_fn=<NllLossBackward0>)
tensor(3.1057, grad_fn=<NllLossBackward0>)
tensor(2.2501, grad_fn=<NllLossBackward0>)
tensor(3.6516, grad_fn=<NllLossBackward0>)
tensor(3.5894, grad_fn=<NllLossBackward0>)
tensor(2.6622, grad_fn=<NllLossBackward0>)
tensor(2.7177, grad_fn=<NllLossBackward0>)
tensor(2.9078, grad_fn=<NllLossBackward0>)
tensor(2.4513, grad_fn=<NllLossBackward0>)
tensor(1.7227, grad_fn=<NllLossBackward0>)
tensor(2.0383, grad_fn=<NllLossBackward0>)
tensor(2.8201, grad_fn=<NllLossBackward0>)
tensor(3.1726, grad_fn=<NllLossBackward0>)
tensor(2.4795, grad_fn=<NllLossBackward0>)
tensor(3.06

tensor(3.0497, grad_fn=<NllLossBackward0>)
tensor(2.6719, grad_fn=<NllLossBackward0>)
tensor(2.8703, grad_fn=<NllLossBackward0>)
tensor(2.4053, grad_fn=<NllLossBackward0>)
tensor(3.8021, grad_fn=<NllLossBackward0>)
tensor(1.9718, grad_fn=<NllLossBackward0>)
tensor(3.2143, grad_fn=<NllLossBackward0>)
tensor(3.3675, grad_fn=<NllLossBackward0>)
tensor(2.5292, grad_fn=<NllLossBackward0>)
tensor(2.7437, grad_fn=<NllLossBackward0>)
tensor(3.4904, grad_fn=<NllLossBackward0>)
tensor(2.5149, grad_fn=<NllLossBackward0>)
tensor(3.0360, grad_fn=<NllLossBackward0>)
tensor(4.1010, grad_fn=<NllLossBackward0>)
tensor(2.5262, grad_fn=<NllLossBackward0>)
tensor(2.7211, grad_fn=<NllLossBackward0>)
tensor(2.9118, grad_fn=<NllLossBackward0>)
tensor(2.5253, grad_fn=<NllLossBackward0>)
tensor(3.0502, grad_fn=<NllLossBackward0>)
tensor(2.9211, grad_fn=<NllLossBackward0>)
tensor(1.9719, grad_fn=<NllLossBackward0>)
tensor(2.8071, grad_fn=<NllLossBackward0>)
tensor(2.7187, grad_fn=<NllLossBackward0>)
tensor(2.52

tensor(2.5310, grad_fn=<NllLossBackward0>)
tensor(2.6545, grad_fn=<NllLossBackward0>)
tensor(2.7320, grad_fn=<NllLossBackward0>)
tensor(3.7693, grad_fn=<NllLossBackward0>)
tensor(2.8450, grad_fn=<NllLossBackward0>)
tensor(2.7693, grad_fn=<NllLossBackward0>)
tensor(3.6419, grad_fn=<NllLossBackward0>)
tensor(3.4303, grad_fn=<NllLossBackward0>)
tensor(2.4656, grad_fn=<NllLossBackward0>)
tensor(2.9434, grad_fn=<NllLossBackward0>)
tensor(2.3105, grad_fn=<NllLossBackward0>)
tensor(2.6239, grad_fn=<NllLossBackward0>)
tensor(2.7366, grad_fn=<NllLossBackward0>)
tensor(3.4161, grad_fn=<NllLossBackward0>)
tensor(2.3874, grad_fn=<NllLossBackward0>)
tensor(3.6570, grad_fn=<NllLossBackward0>)
tensor(2.3111, grad_fn=<NllLossBackward0>)
tensor(2.5334, grad_fn=<NllLossBackward0>)
tensor(3.4045, grad_fn=<NllLossBackward0>)
tensor(1.9745, grad_fn=<NllLossBackward0>)
tensor(2.6338, grad_fn=<NllLossBackward0>)
tensor(2.7031, grad_fn=<NllLossBackward0>)
tensor(3.4028, grad_fn=<NllLossBackward0>)
tensor(2.24

tensor(2.6356, grad_fn=<NllLossBackward0>)
tensor(4.1734, grad_fn=<NllLossBackward0>)
tensor(4.6171, grad_fn=<NllLossBackward0>)
tensor(2.1972, grad_fn=<NllLossBackward0>)
tensor(2.6491, grad_fn=<NllLossBackward0>)
tensor(2.9424, grad_fn=<NllLossBackward0>)
tensor(3.3475, grad_fn=<NllLossBackward0>)
tensor(2.7155, grad_fn=<NllLossBackward0>)
tensor(3.3007, grad_fn=<NllLossBackward0>)
tensor(3.2825, grad_fn=<NllLossBackward0>)
tensor(2.5221, grad_fn=<NllLossBackward0>)
tensor(3.2896, grad_fn=<NllLossBackward0>)
tensor(3.1455, grad_fn=<NllLossBackward0>)
tensor(3.2720, grad_fn=<NllLossBackward0>)
tensor(3.9607, grad_fn=<NllLossBackward0>)
tensor(3.2231, grad_fn=<NllLossBackward0>)
tensor(2.2002, grad_fn=<NllLossBackward0>)
tensor(2.7731, grad_fn=<NllLossBackward0>)
tensor(2.4579, grad_fn=<NllLossBackward0>)
tensor(1.7113, grad_fn=<NllLossBackward0>)
tensor(2.4738, grad_fn=<NllLossBackward0>)
tensor(2.7807, grad_fn=<NllLossBackward0>)
tensor(3.2875, grad_fn=<NllLossBackward0>)
tensor(3.83

tensor(3.1738, grad_fn=<NllLossBackward0>)
tensor(2.9189, grad_fn=<NllLossBackward0>)
tensor(3.1287, grad_fn=<NllLossBackward0>)
tensor(1.6990, grad_fn=<NllLossBackward0>)
tensor(2.7115, grad_fn=<NllLossBackward0>)
tensor(3.8108, grad_fn=<NllLossBackward0>)
tensor(3.1110, grad_fn=<NllLossBackward0>)
tensor(2.2967, grad_fn=<NllLossBackward0>)
tensor(2.9416, grad_fn=<NllLossBackward0>)
tensor(3.2807, grad_fn=<NllLossBackward0>)
tensor(2.3997, grad_fn=<NllLossBackward0>)
tensor(3.5231, grad_fn=<NllLossBackward0>)
tensor(4.3865, grad_fn=<NllLossBackward0>)
tensor(2.5298, grad_fn=<NllLossBackward0>)
tensor(3.1267, grad_fn=<NllLossBackward0>)
tensor(2.9167, grad_fn=<NllLossBackward0>)
tensor(2.0347, grad_fn=<NllLossBackward0>)
tensor(2.7053, grad_fn=<NllLossBackward0>)
tensor(1.9613, grad_fn=<NllLossBackward0>)
tensor(3.2708, grad_fn=<NllLossBackward0>)
tensor(3.2747, grad_fn=<NllLossBackward0>)
tensor(2.7236, grad_fn=<NllLossBackward0>)
tensor(2.4668, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(1.9589, grad_fn=<NllLossBackward0>)
tensor(1.6946, grad_fn=<NllLossBackward0>)
tensor(3.2808, grad_fn=<NllLossBackward0>)
tensor(2.4387, grad_fn=<NllLossBackward0>)
tensor(3.1495, grad_fn=<NllLossBackward0>)
tensor(2.2985, grad_fn=<NllLossBackward0>)
tensor(2.7253, grad_fn=<NllLossBackward0>)
tensor(3.3886, grad_fn=<NllLossBackward0>)
tensor(3.1785, grad_fn=<NllLossBackward0>)
tensor(1.6928, grad_fn=<NllLossBackward0>)
tensor(2.0315, grad_fn=<NllLossBackward0>)
tensor(2.2209, grad_fn=<NllLossBackward0>)
tensor(4.3421, grad_fn=<NllLossBackward0>)
tensor(2.7780, grad_fn=<NllLossBackward0>)
tensor(2.7779, grad_fn=<NllLossBackward0>)
tensor(3.7510, grad_fn=<NllLossBackward0>)
tensor(3.4593, grad_fn=<NllLossBackward0>)
tensor(2.7020, grad_fn=<NllLossBackward0>)
tensor(2.1996, grad_fn=<NllLossBackward0>)
tensor(3.4399, grad_fn=<NllLossBackward0>)
tensor(1.9555, grad_fn=<NllLossBackward0>)
tensor(3.6487, grad_fn=<NllLossBackward0>)
tensor(1.9541, grad_fn=<NllLossBackward0>)
tensor(3.06

tensor(4.0394, grad_fn=<NllLossBackward0>)
tensor(2.0254, grad_fn=<NllLossBackward0>)
tensor(3.6901, grad_fn=<NllLossBackward0>)
tensor(3.8061, grad_fn=<NllLossBackward0>)
tensor(3.7395, grad_fn=<NllLossBackward0>)
tensor(3.2215, grad_fn=<NllLossBackward0>)
tensor(3.6411, grad_fn=<NllLossBackward0>)
tensor(2.8353, grad_fn=<NllLossBackward0>)
tensor(3.1002, grad_fn=<NllLossBackward0>)
tensor(3.7756, grad_fn=<NllLossBackward0>)
tensor(3.0218, grad_fn=<NllLossBackward0>)
tensor(3.1611, grad_fn=<NllLossBackward0>)
tensor(3.7020, grad_fn=<NllLossBackward0>)
tensor(2.7327, grad_fn=<NllLossBackward0>)
tensor(2.6306, grad_fn=<NllLossBackward0>)
tensor(3.1357, grad_fn=<NllLossBackward0>)
tensor(3.5086, grad_fn=<NllLossBackward0>)
tensor(2.7897, grad_fn=<NllLossBackward0>)
tensor(2.0322, grad_fn=<NllLossBackward0>)
tensor(2.7770, grad_fn=<NllLossBackward0>)
tensor(2.5211, grad_fn=<NllLossBackward0>)
tensor(2.0288, grad_fn=<NllLossBackward0>)
tensor(3.4092, grad_fn=<NllLossBackward0>)
tensor(2.28

tensor(2.2843, grad_fn=<NllLossBackward0>)
tensor(1.7245, grad_fn=<NllLossBackward0>)
tensor(2.7691, grad_fn=<NllLossBackward0>)
tensor(3.1338, grad_fn=<NllLossBackward0>)
tensor(2.6974, grad_fn=<NllLossBackward0>)
tensor(2.7881, grad_fn=<NllLossBackward0>)
tensor(2.8971, grad_fn=<NllLossBackward0>)
tensor(2.3089, grad_fn=<NllLossBackward0>)
tensor(1.7216, grad_fn=<NllLossBackward0>)
tensor(1.7184, grad_fn=<NllLossBackward0>)
tensor(2.3004, grad_fn=<NllLossBackward0>)
tensor(1.9663, grad_fn=<NllLossBackward0>)
tensor(2.3841, grad_fn=<NllLossBackward0>)
tensor(3.3890, grad_fn=<NllLossBackward0>)
tensor(4.3239, grad_fn=<NllLossBackward0>)
tensor(2.9503, grad_fn=<NllLossBackward0>)
tensor(2.7176, grad_fn=<NllLossBackward0>)
tensor(2.8068, grad_fn=<NllLossBackward0>)
tensor(3.1102, grad_fn=<NllLossBackward0>)
tensor(2.5242, grad_fn=<NllLossBackward0>)
tensor(2.2098, grad_fn=<NllLossBackward0>)
tensor(1.7132, grad_fn=<NllLossBackward0>)
tensor(3.2181, grad_fn=<NllLossBackward0>)
tensor(3.43

tensor(2.7274, grad_fn=<NllLossBackward0>)
tensor(2.7544, grad_fn=<NllLossBackward0>)
tensor(2.7022, grad_fn=<NllLossBackward0>)
tensor(1.9694, grad_fn=<NllLossBackward0>)
tensor(3.2231, grad_fn=<NllLossBackward0>)
tensor(1.9658, grad_fn=<NllLossBackward0>)
tensor(2.8324, grad_fn=<NllLossBackward0>)
tensor(3.7901, grad_fn=<NllLossBackward0>)
tensor(3.0781, grad_fn=<NllLossBackward0>)
tensor(2.7809, grad_fn=<NllLossBackward0>)
tensor(2.2057, grad_fn=<NllLossBackward0>)
tensor(3.1867, grad_fn=<NllLossBackward0>)
tensor(2.9127, grad_fn=<NllLossBackward0>)
tensor(3.4007, grad_fn=<NllLossBackward0>)
tensor(2.7289, grad_fn=<NllLossBackward0>)
tensor(3.2441, grad_fn=<NllLossBackward0>)
tensor(3.6964, grad_fn=<NllLossBackward0>)
tensor(2.9633, grad_fn=<NllLossBackward0>)
tensor(2.9963, grad_fn=<NllLossBackward0>)
tensor(3.3260, grad_fn=<NllLossBackward0>)
tensor(3.1106, grad_fn=<NllLossBackward0>)
tensor(3.2145, grad_fn=<NllLossBackward0>)
tensor(3.1747, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(3.6392, grad_fn=<NllLossBackward0>)
tensor(3.1995, grad_fn=<NllLossBackward0>)
tensor(3.2301, grad_fn=<NllLossBackward0>)
tensor(4.6401, grad_fn=<NllLossBackward0>)
tensor(3.1585, grad_fn=<NllLossBackward0>)
tensor(2.9496, grad_fn=<NllLossBackward0>)
tensor(3.7832, grad_fn=<NllLossBackward0>)
tensor(3.1554, grad_fn=<NllLossBackward0>)
tensor(3.2717, grad_fn=<NllLossBackward0>)
tensor(2.4330, grad_fn=<NllLossBackward0>)
tensor(3.1651, grad_fn=<NllLossBackward0>)
tensor(3.0073, grad_fn=<NllLossBackward0>)
tensor(3.0878, grad_fn=<NllLossBackward0>)
tensor(2.5620, grad_fn=<NllLossBackward0>)
tensor(2.3882, grad_fn=<NllLossBackward0>)
tensor(2.4321, grad_fn=<NllLossBackward0>)
tensor(3.2161, grad_fn=<NllLossBackward0>)
tensor(2.8872, grad_fn=<NllLossBackward0>)
tensor(2.4629, grad_fn=<NllLossBackward0>)
tensor(3.1584, grad_fn=<NllLossBackward0>)
tensor(3.2679, grad_fn=<NllLossBackward0>)
tensor(2.0150, grad_fn=<NllLossBackward0>)
tensor(3.8907, grad_fn=<NllLossBackward0>)
tensor(2.77

tensor(2.5152, grad_fn=<NllLossBackward0>)
tensor(3.8336, grad_fn=<NllLossBackward0>)
tensor(2.9285, grad_fn=<NllLossBackward0>)
tensor(3.1441, grad_fn=<NllLossBackward0>)
tensor(2.5134, grad_fn=<NllLossBackward0>)
tensor(3.8386, grad_fn=<NllLossBackward0>)
tensor(3.2062, grad_fn=<NllLossBackward0>)
tensor(3.2114, grad_fn=<NllLossBackward0>)
tensor(2.7352, grad_fn=<NllLossBackward0>)
tensor(2.3081, grad_fn=<NllLossBackward0>)
tensor(2.0350, grad_fn=<NllLossBackward0>)
tensor(1.9731, grad_fn=<NllLossBackward0>)
tensor(2.4602, grad_fn=<NllLossBackward0>)
tensor(2.5489, grad_fn=<NllLossBackward0>)
tensor(3.2115, grad_fn=<NllLossBackward0>)
tensor(3.7688, grad_fn=<NllLossBackward0>)
tensor(3.5507, grad_fn=<NllLossBackward0>)
tensor(3.2644, grad_fn=<NllLossBackward0>)
tensor(2.8256, grad_fn=<NllLossBackward0>)
tensor(2.4035, grad_fn=<NllLossBackward0>)
tensor(2.0299, grad_fn=<NllLossBackward0>)
tensor(3.1998, grad_fn=<NllLossBackward0>)
tensor(2.5226, grad_fn=<NllLossBackward0>)
tensor(3.08

tensor(2.8322, grad_fn=<NllLossBackward0>)
tensor(3.2135, grad_fn=<NllLossBackward0>)
tensor(2.6760, grad_fn=<NllLossBackward0>)
tensor(2.7457, grad_fn=<NllLossBackward0>)
tensor(4.2646, grad_fn=<NllLossBackward0>)
tensor(2.2075, grad_fn=<NllLossBackward0>)
tensor(2.4747, grad_fn=<NllLossBackward0>)
tensor(3.6103, grad_fn=<NllLossBackward0>)
tensor(3.0691, grad_fn=<NllLossBackward0>)
tensor(4.4371, grad_fn=<NllLossBackward0>)
tensor(2.2180, grad_fn=<NllLossBackward0>)
tensor(3.3214, grad_fn=<NllLossBackward0>)
tensor(2.7445, grad_fn=<NllLossBackward0>)
tensor(2.4286, grad_fn=<NllLossBackward0>)
tensor(3.6220, grad_fn=<NllLossBackward0>)
tensor(3.8058, grad_fn=<NllLossBackward0>)
tensor(2.4095, grad_fn=<NllLossBackward0>)
tensor(3.0213, grad_fn=<NllLossBackward0>)
tensor(2.6408, grad_fn=<NllLossBackward0>)
tensor(2.9098, grad_fn=<NllLossBackward0>)
tensor(2.5989, grad_fn=<NllLossBackward0>)
tensor(1.9619, grad_fn=<NllLossBackward0>)
tensor(2.8133, grad_fn=<NllLossBackward0>)
tensor(2.90

tensor(3.8685, grad_fn=<NllLossBackward0>)
tensor(3.2089, grad_fn=<NllLossBackward0>)
tensor(2.0268, grad_fn=<NllLossBackward0>)
tensor(2.0244, grad_fn=<NllLossBackward0>)
tensor(3.2516, grad_fn=<NllLossBackward0>)
tensor(2.2822, grad_fn=<NllLossBackward0>)
tensor(2.0255, grad_fn=<NllLossBackward0>)
tensor(2.7294, grad_fn=<NllLossBackward0>)
tensor(3.6363, grad_fn=<NllLossBackward0>)
tensor(4.4552, grad_fn=<NllLossBackward0>)
tensor(3.5863, grad_fn=<NllLossBackward0>)
tensor(2.9505, grad_fn=<NllLossBackward0>)
tensor(2.4313, grad_fn=<NllLossBackward0>)
tensor(2.8076, grad_fn=<NllLossBackward0>)
tensor(2.2123, grad_fn=<NllLossBackward0>)
tensor(3.6988, grad_fn=<NllLossBackward0>)
tensor(1.9666, grad_fn=<NllLossBackward0>)
tensor(2.7663, grad_fn=<NllLossBackward0>)
tensor(1.9663, grad_fn=<NllLossBackward0>)
tensor(3.2066, grad_fn=<NllLossBackward0>)
tensor(2.2100, grad_fn=<NllLossBackward0>)
tensor(2.8569, grad_fn=<NllLossBackward0>)
tensor(2.0229, grad_fn=<NllLossBackward0>)
tensor(3.19

tensor(3.4854, grad_fn=<NllLossBackward0>)
tensor(2.3278, grad_fn=<NllLossBackward0>)
tensor(1.7107, grad_fn=<NllLossBackward0>)
tensor(3.2481, grad_fn=<NllLossBackward0>)
tensor(2.5554, grad_fn=<NllLossBackward0>)
tensor(3.7669, grad_fn=<NllLossBackward0>)
tensor(2.9458, grad_fn=<NllLossBackward0>)
tensor(3.0784, grad_fn=<NllLossBackward0>)
tensor(2.3248, grad_fn=<NllLossBackward0>)
tensor(1.7079, grad_fn=<NllLossBackward0>)
tensor(2.0126, grad_fn=<NllLossBackward0>)
tensor(3.5826, grad_fn=<NllLossBackward0>)
tensor(2.5168, grad_fn=<NllLossBackward0>)
tensor(3.6573, grad_fn=<NllLossBackward0>)
tensor(2.7719, grad_fn=<NllLossBackward0>)
tensor(3.4181, grad_fn=<NllLossBackward0>)
tensor(2.3869, grad_fn=<NllLossBackward0>)
tensor(3.2022, grad_fn=<NllLossBackward0>)
tensor(3.6949, grad_fn=<NllLossBackward0>)
tensor(3.1952, grad_fn=<NllLossBackward0>)
tensor(2.9750, grad_fn=<NllLossBackward0>)
tensor(3.2059, grad_fn=<NllLossBackward0>)
tensor(3.0593, grad_fn=<NllLossBackward0>)
tensor(3.24

tensor(3.1712, grad_fn=<NllLossBackward0>)
tensor(3.1155, grad_fn=<NllLossBackward0>)
tensor(3.1992, grad_fn=<NllLossBackward0>)
tensor(1.9817, grad_fn=<NllLossBackward0>)
tensor(1.7256, grad_fn=<NllLossBackward0>)
tensor(1.7224, grad_fn=<NllLossBackward0>)
tensor(1.7192, grad_fn=<NllLossBackward0>)
tensor(2.3890, grad_fn=<NllLossBackward0>)
tensor(3.7373, grad_fn=<NllLossBackward0>)
tensor(2.4408, grad_fn=<NllLossBackward0>)
tensor(3.0194, grad_fn=<NllLossBackward0>)
tensor(2.6680, grad_fn=<NllLossBackward0>)
tensor(4.1984, grad_fn=<NllLossBackward0>)
tensor(4.1941, grad_fn=<NllLossBackward0>)
tensor(3.0869, grad_fn=<NllLossBackward0>)
tensor(3.0871, grad_fn=<NllLossBackward0>)
tensor(2.4766, grad_fn=<NllLossBackward0>)
tensor(3.6284, grad_fn=<NllLossBackward0>)
tensor(2.2789, grad_fn=<NllLossBackward0>)
tensor(2.2153, grad_fn=<NllLossBackward0>)
tensor(2.9389, grad_fn=<NllLossBackward0>)
tensor(2.4376, grad_fn=<NllLossBackward0>)
tensor(2.2121, grad_fn=<NllLossBackward0>)
tensor(2.50

tensor(2.6865, grad_fn=<NllLossBackward0>)
tensor(3.4652, grad_fn=<NllLossBackward0>)
tensor(3.4823, grad_fn=<NllLossBackward0>)
tensor(3.7071, grad_fn=<NllLossBackward0>)
tensor(4.2673, grad_fn=<NllLossBackward0>)
tensor(3.1916, grad_fn=<NllLossBackward0>)
tensor(2.7921, grad_fn=<NllLossBackward0>)
tensor(3.0936, grad_fn=<NllLossBackward0>)
tensor(2.7005, grad_fn=<NllLossBackward0>)
tensor(3.1840, grad_fn=<NllLossBackward0>)
tensor(2.8314, grad_fn=<NllLossBackward0>)
tensor(2.7648, grad_fn=<NllLossBackward0>)
tensor(2.6990, grad_fn=<NllLossBackward0>)
tensor(2.9591, grad_fn=<NllLossBackward0>)
tensor(2.6532, grad_fn=<NllLossBackward0>)
tensor(2.2704, grad_fn=<NllLossBackward0>)
tensor(2.9669, grad_fn=<NllLossBackward0>)
tensor(2.8811, grad_fn=<NllLossBackward0>)
tensor(3.5692, grad_fn=<NllLossBackward0>)
tensor(2.8886, grad_fn=<NllLossBackward0>)
tensor(3.2483, grad_fn=<NllLossBackward0>)
tensor(3.2828, grad_fn=<NllLossBackward0>)
tensor(3.1674, grad_fn=<NllLossBackward0>)
tensor(3.22

tensor(3.8384, grad_fn=<NllLossBackward0>)
tensor(3.1727, grad_fn=<NllLossBackward0>)
tensor(3.1710, grad_fn=<NllLossBackward0>)
tensor(3.2480, grad_fn=<NllLossBackward0>)
tensor(3.0935, grad_fn=<NllLossBackward0>)
tensor(2.6875, grad_fn=<NllLossBackward0>)
tensor(3.2114, grad_fn=<NllLossBackward0>)
tensor(1.7317, grad_fn=<NllLossBackward0>)
tensor(2.5926, grad_fn=<NllLossBackward0>)
tensor(2.9875, grad_fn=<NllLossBackward0>)
tensor(2.5879, grad_fn=<NllLossBackward0>)
tensor(3.4446, grad_fn=<NllLossBackward0>)
tensor(1.9892, grad_fn=<NllLossBackward0>)
tensor(3.5682, grad_fn=<NllLossBackward0>)
tensor(3.4232, grad_fn=<NllLossBackward0>)
tensor(2.5093, grad_fn=<NllLossBackward0>)
tensor(3.5855, grad_fn=<NllLossBackward0>)
tensor(4.0330, grad_fn=<NllLossBackward0>)
tensor(2.9649, grad_fn=<NllLossBackward0>)
tensor(3.5886, grad_fn=<NllLossBackward0>)
tensor(3.8988, grad_fn=<NllLossBackward0>)
tensor(3.6891, grad_fn=<NllLossBackward0>)
tensor(1.7342, grad_fn=<NllLossBackward0>)
tensor(3.66

tensor(3.1047, grad_fn=<NllLossBackward0>)
tensor(3.1802, grad_fn=<NllLossBackward0>)
tensor(2.7913, grad_fn=<NllLossBackward0>)
tensor(4.4600, grad_fn=<NllLossBackward0>)
tensor(2.0017, grad_fn=<NllLossBackward0>)
tensor(3.2167, grad_fn=<NllLossBackward0>)
tensor(1.9992, grad_fn=<NllLossBackward0>)
tensor(3.0407, grad_fn=<NllLossBackward0>)
tensor(3.6436, grad_fn=<NllLossBackward0>)
tensor(3.0695, grad_fn=<NllLossBackward0>)
tensor(2.4477, grad_fn=<NllLossBackward0>)
tensor(3.4794, grad_fn=<NllLossBackward0>)
tensor(3.2037, grad_fn=<NllLossBackward0>)
tensor(3.2150, grad_fn=<NllLossBackward0>)
tensor(2.7260, grad_fn=<NllLossBackward0>)
tensor(2.2967, grad_fn=<NllLossBackward0>)
tensor(2.5968, grad_fn=<NllLossBackward0>)
tensor(3.2859, grad_fn=<NllLossBackward0>)
tensor(2.9507, grad_fn=<NllLossBackward0>)
tensor(2.9036, grad_fn=<NllLossBackward0>)
tensor(2.9031, grad_fn=<NllLossBackward0>)
tensor(3.2447, grad_fn=<NllLossBackward0>)
tensor(2.6734, grad_fn=<NllLossBackward0>)
tensor(3.67

tensor(2.7546, grad_fn=<NllLossBackward0>)
tensor(2.2302, grad_fn=<NllLossBackward0>)
tensor(3.2262, grad_fn=<NllLossBackward0>)
tensor(3.6808, grad_fn=<NllLossBackward0>)
tensor(2.8829, grad_fn=<NllLossBackward0>)
tensor(3.4491, grad_fn=<NllLossBackward0>)
tensor(3.5107, grad_fn=<NllLossBackward0>)
tensor(2.4355, grad_fn=<NllLossBackward0>)
tensor(2.2314, grad_fn=<NllLossBackward0>)
tensor(1.7357, grad_fn=<NllLossBackward0>)
tensor(2.3879, grad_fn=<NllLossBackward0>)
tensor(2.7633, grad_fn=<NllLossBackward0>)
tensor(2.3046, grad_fn=<NllLossBackward0>)
tensor(4.1220, grad_fn=<NllLossBackward0>)
tensor(3.0826, grad_fn=<NllLossBackward0>)
tensor(3.5633, grad_fn=<NllLossBackward0>)
tensor(1.7326, grad_fn=<NllLossBackward0>)
tensor(3.6688, grad_fn=<NllLossBackward0>)
tensor(1.9971, grad_fn=<NllLossBackward0>)
tensor(2.8203, grad_fn=<NllLossBackward0>)
tensor(2.4583, grad_fn=<NllLossBackward0>)
tensor(3.2987, grad_fn=<NllLossBackward0>)
tensor(3.2292, grad_fn=<NllLossBackward0>)
tensor(2.76

tensor(3.5282, grad_fn=<NllLossBackward0>)
tensor(3.1034, grad_fn=<NllLossBackward0>)
tensor(3.8244, grad_fn=<NllLossBackward0>)
tensor(4.2691, grad_fn=<NllLossBackward0>)
tensor(3.4781, grad_fn=<NllLossBackward0>)
tensor(3.1073, grad_fn=<NllLossBackward0>)
tensor(3.6003, grad_fn=<NllLossBackward0>)
tensor(3.2438, grad_fn=<NllLossBackward0>)
tensor(4.3553, grad_fn=<NllLossBackward0>)
tensor(3.3067, grad_fn=<NllLossBackward0>)
tensor(2.3073, grad_fn=<NllLossBackward0>)
tensor(2.0379, grad_fn=<NllLossBackward0>)
tensor(2.2982, grad_fn=<NllLossBackward0>)
tensor(1.9961, grad_fn=<NllLossBackward0>)
tensor(3.6206, grad_fn=<NllLossBackward0>)
tensor(2.2275, grad_fn=<NllLossBackward0>)
tensor(3.8987, grad_fn=<NllLossBackward0>)
tensor(3.4700, grad_fn=<NllLossBackward0>)
tensor(3.7950, grad_fn=<NllLossBackward0>)
tensor(3.4653, grad_fn=<NllLossBackward0>)
tensor(2.8501, grad_fn=<NllLossBackward0>)
tensor(3.3926, grad_fn=<NllLossBackward0>)
tensor(3.0008, grad_fn=<NllLossBackward0>)
tensor(2.52

tensor(2.9906, grad_fn=<NllLossBackward0>)
tensor(3.3212, grad_fn=<NllLossBackward0>)
tensor(2.4337, grad_fn=<NllLossBackward0>)
tensor(2.6708, grad_fn=<NllLossBackward0>)
tensor(3.0780, grad_fn=<NllLossBackward0>)
tensor(4.3974, grad_fn=<NllLossBackward0>)
tensor(3.6446, grad_fn=<NllLossBackward0>)
tensor(2.7341, grad_fn=<NllLossBackward0>)
tensor(2.5689, grad_fn=<NllLossBackward0>)
tensor(2.4933, grad_fn=<NllLossBackward0>)
tensor(2.9373, grad_fn=<NllLossBackward0>)
tensor(1.7183, grad_fn=<NllLossBackward0>)
tensor(3.1414, grad_fn=<NllLossBackward0>)
tensor(3.0467, grad_fn=<NllLossBackward0>)
tensor(2.5243, grad_fn=<NllLossBackward0>)
tensor(2.8276, grad_fn=<NllLossBackward0>)
tensor(2.0323, grad_fn=<NllLossBackward0>)
tensor(3.2832, grad_fn=<NllLossBackward0>)
tensor(2.0021, grad_fn=<NllLossBackward0>)
tensor(2.7044, grad_fn=<NllLossBackward0>)
tensor(2.8022, grad_fn=<NllLossBackward0>)
tensor(2.9696, grad_fn=<NllLossBackward0>)
tensor(2.7815, grad_fn=<NllLossBackward0>)
tensor(2.49

tensor(2.7936, grad_fn=<NllLossBackward0>)
tensor(2.0402, grad_fn=<NllLossBackward0>)
tensor(2.4945, grad_fn=<NllLossBackward0>)
tensor(3.2262, grad_fn=<NllLossBackward0>)
tensor(2.5110, grad_fn=<NllLossBackward0>)
tensor(2.4192, grad_fn=<NllLossBackward0>)
tensor(3.6019, grad_fn=<NllLossBackward0>)
tensor(3.3559, grad_fn=<NllLossBackward0>)
tensor(2.5633, grad_fn=<NllLossBackward0>)
tensor(3.1046, grad_fn=<NllLossBackward0>)
tensor(2.8878, grad_fn=<NllLossBackward0>)
tensor(2.4976, grad_fn=<NllLossBackward0>)
tensor(2.8191, grad_fn=<NllLossBackward0>)
tensor(2.7730, grad_fn=<NllLossBackward0>)
tensor(3.5314, grad_fn=<NllLossBackward0>)
tensor(2.7063, grad_fn=<NllLossBackward0>)
tensor(4.1263, grad_fn=<NllLossBackward0>)
tensor(2.5119, grad_fn=<NllLossBackward0>)
tensor(3.2022, grad_fn=<NllLossBackward0>)
tensor(2.9828, grad_fn=<NllLossBackward0>)
tensor(2.9389, grad_fn=<NllLossBackward0>)
tensor(3.2093, grad_fn=<NllLossBackward0>)
tensor(2.0442, grad_fn=<NllLossBackward0>)
tensor(2.89

tensor(3.0600, grad_fn=<NllLossBackward0>)
tensor(2.6404, grad_fn=<NllLossBackward0>)
tensor(2.2263, grad_fn=<NllLossBackward0>)
tensor(2.6931, grad_fn=<NllLossBackward0>)
tensor(2.8078, grad_fn=<NllLossBackward0>)
tensor(1.9922, grad_fn=<NllLossBackward0>)
tensor(2.8055, grad_fn=<NllLossBackward0>)
tensor(1.7374, grad_fn=<NllLossBackward0>)
tensor(2.9805, grad_fn=<NllLossBackward0>)
tensor(2.5820, grad_fn=<NllLossBackward0>)
tensor(2.9283, grad_fn=<NllLossBackward0>)
tensor(2.3051, grad_fn=<NllLossBackward0>)
tensor(2.8062, grad_fn=<NllLossBackward0>)
tensor(1.7337, grad_fn=<NllLossBackward0>)
tensor(2.8937, grad_fn=<NllLossBackward0>)
tensor(3.6645, grad_fn=<NllLossBackward0>)
tensor(2.2241, grad_fn=<NllLossBackward0>)
tensor(2.6034, grad_fn=<NllLossBackward0>)
tensor(2.9397, grad_fn=<NllLossBackward0>)
tensor(2.5236, grad_fn=<NllLossBackward0>)
tensor(3.3920, grad_fn=<NllLossBackward0>)
tensor(2.7418, grad_fn=<NllLossBackward0>)
tensor(3.8301, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(3.1968, grad_fn=<NllLossBackward0>)
tensor(3.2449, grad_fn=<NllLossBackward0>)
tensor(2.2149, grad_fn=<NllLossBackward0>)
tensor(2.4274, grad_fn=<NllLossBackward0>)
tensor(2.9273, grad_fn=<NllLossBackward0>)
tensor(2.3592, grad_fn=<NllLossBackward0>)
tensor(2.3042, grad_fn=<NllLossBackward0>)
tensor(2.4769, grad_fn=<NllLossBackward0>)
tensor(2.7376, grad_fn=<NllLossBackward0>)
tensor(4.0590, grad_fn=<NllLossBackward0>)
tensor(2.8358, grad_fn=<NllLossBackward0>)
tensor(3.2150, grad_fn=<NllLossBackward0>)
tensor(3.1510, grad_fn=<NllLossBackward0>)
tensor(2.4200, grad_fn=<NllLossBackward0>)
tensor(2.6492, grad_fn=<NllLossBackward0>)
tensor(2.4734, grad_fn=<NllLossBackward0>)
tensor(3.4168, grad_fn=<NllLossBackward0>)
tensor(2.6768, grad_fn=<NllLossBackward0>)
tensor(3.6278, grad_fn=<NllLossBackward0>)
tensor(3.6883, grad_fn=<NllLossBackward0>)
tensor(2.7252, grad_fn=<NllLossBackward0>)
tensor(3.3227, grad_fn=<NllLossBackward0>)
tensor(3.1400, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(3.8432, grad_fn=<NllLossBackward0>)
tensor(3.2085, grad_fn=<NllLossBackward0>)
tensor(3.3583, grad_fn=<NllLossBackward0>)
tensor(2.4671, grad_fn=<NllLossBackward0>)
tensor(1.7438, grad_fn=<NllLossBackward0>)
tensor(2.5925, grad_fn=<NllLossBackward0>)
tensor(1.9962, grad_fn=<NllLossBackward0>)
tensor(2.6489, grad_fn=<NllLossBackward0>)
tensor(2.5254, grad_fn=<NllLossBackward0>)
tensor(3.5788, grad_fn=<NllLossBackward0>)
tensor(2.8300, grad_fn=<NllLossBackward0>)
tensor(2.3988, grad_fn=<NllLossBackward0>)
tensor(3.2576, grad_fn=<NllLossBackward0>)
tensor(3.8179, grad_fn=<NllLossBackward0>)
tensor(4.1099, grad_fn=<NllLossBackward0>)
tensor(3.1817, grad_fn=<NllLossBackward0>)
tensor(3.5692, grad_fn=<NllLossBackward0>)
tensor(2.8885, grad_fn=<NllLossBackward0>)
tensor(3.2223, grad_fn=<NllLossBackward0>)
tensor(2.5396, grad_fn=<NllLossBackward0>)
tensor(2.5946, grad_fn=<NllLossBackward0>)
tensor(2.3067, grad_fn=<NllLossBackward0>)
tensor(3.6132, grad_fn=<NllLossBackward0>)
tensor(2.53

tensor(3.0674, grad_fn=<NllLossBackward0>)
tensor(2.7301, grad_fn=<NllLossBackward0>)
tensor(3.2438, grad_fn=<NllLossBackward0>)
tensor(3.1605, grad_fn=<NllLossBackward0>)
tensor(3.7237, grad_fn=<NllLossBackward0>)
tensor(2.2997, grad_fn=<NllLossBackward0>)
tensor(3.2705, grad_fn=<NllLossBackward0>)
tensor(2.2872, grad_fn=<NllLossBackward0>)
tensor(3.0789, grad_fn=<NllLossBackward0>)
tensor(2.2189, grad_fn=<NllLossBackward0>)
tensor(3.5909, grad_fn=<NllLossBackward0>)
tensor(3.1218, grad_fn=<NllLossBackward0>)
tensor(3.9813, grad_fn=<NllLossBackward0>)
tensor(3.2112, grad_fn=<NllLossBackward0>)
tensor(2.7568, grad_fn=<NllLossBackward0>)
tensor(1.7492, grad_fn=<NllLossBackward0>)
tensor(3.5794, grad_fn=<NllLossBackward0>)
tensor(3.1325, grad_fn=<NllLossBackward0>)
tensor(3.0678, grad_fn=<NllLossBackward0>)
tensor(2.2345, grad_fn=<NllLossBackward0>)
tensor(2.2189, grad_fn=<NllLossBackward0>)
tensor(2.5969, grad_fn=<NllLossBackward0>)
tensor(2.5244, grad_fn=<NllLossBackward0>)
tensor(3.20

tensor(2.6659, grad_fn=<NllLossBackward0>)
tensor(4.4659, grad_fn=<NllLossBackward0>)
tensor(2.8903, grad_fn=<NllLossBackward0>)
tensor(3.7322, grad_fn=<NllLossBackward0>)
tensor(2.2285, grad_fn=<NllLossBackward0>)
tensor(2.3807, grad_fn=<NllLossBackward0>)
tensor(2.9066, grad_fn=<NllLossBackward0>)
tensor(2.4226, grad_fn=<NllLossBackward0>)
tensor(4.0166, grad_fn=<NllLossBackward0>)
tensor(2.9162, grad_fn=<NllLossBackward0>)
tensor(3.1450, grad_fn=<NllLossBackward0>)
tensor(2.6644, grad_fn=<NllLossBackward0>)
tensor(3.3972, grad_fn=<NllLossBackward0>)
tensor(2.9465, grad_fn=<NllLossBackward0>)
tensor(3.2701, grad_fn=<NllLossBackward0>)
tensor(2.3123, grad_fn=<NllLossBackward0>)
tensor(2.9654, grad_fn=<NllLossBackward0>)
tensor(2.5865, grad_fn=<NllLossBackward0>)
tensor(3.7243, grad_fn=<NllLossBackward0>)
tensor(3.4575, grad_fn=<NllLossBackward0>)
tensor(3.5164, grad_fn=<NllLossBackward0>)
tensor(3.0830, grad_fn=<NllLossBackward0>)
tensor(2.0669, grad_fn=<NllLossBackward0>)
tensor(4.32

tensor(3.4238, grad_fn=<NllLossBackward0>)
tensor(3.4233, grad_fn=<NllLossBackward0>)
tensor(3.7189, grad_fn=<NllLossBackward0>)
tensor(2.4016, grad_fn=<NllLossBackward0>)
tensor(4.0827, grad_fn=<NllLossBackward0>)
tensor(3.4536, grad_fn=<NllLossBackward0>)
tensor(2.6813, grad_fn=<NllLossBackward0>)
tensor(2.0234, grad_fn=<NllLossBackward0>)
tensor(3.0529, grad_fn=<NllLossBackward0>)
tensor(2.4620, grad_fn=<NllLossBackward0>)
tensor(3.1952, grad_fn=<NllLossBackward0>)
tensor(2.2382, grad_fn=<NllLossBackward0>)
tensor(2.6708, grad_fn=<NllLossBackward0>)
tensor(2.0222, grad_fn=<NllLossBackward0>)
tensor(3.9098, grad_fn=<NllLossBackward0>)
tensor(2.3082, grad_fn=<NllLossBackward0>)
tensor(2.5251, grad_fn=<NllLossBackward0>)
tensor(2.6006, grad_fn=<NllLossBackward0>)
tensor(2.6852, grad_fn=<NllLossBackward0>)
tensor(3.2591, grad_fn=<NllLossBackward0>)
tensor(2.4549, grad_fn=<NllLossBackward0>)
tensor(2.3079, grad_fn=<NllLossBackward0>)
tensor(2.3113, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(2.3188, grad_fn=<NllLossBackward0>)
tensor(2.8244, grad_fn=<NllLossBackward0>)
tensor(3.0516, grad_fn=<NllLossBackward0>)
tensor(3.6515, grad_fn=<NllLossBackward0>)
tensor(2.6474, grad_fn=<NllLossBackward0>)
tensor(3.0987, grad_fn=<NllLossBackward0>)
tensor(2.0852, grad_fn=<NllLossBackward0>)
tensor(2.9013, grad_fn=<NllLossBackward0>)
tensor(2.0242, grad_fn=<NllLossBackward0>)
tensor(2.3808, grad_fn=<NllLossBackward0>)
tensor(3.2127, grad_fn=<NllLossBackward0>)
tensor(1.7940, grad_fn=<NllLossBackward0>)
tensor(3.1643, grad_fn=<NllLossBackward0>)
tensor(2.2541, grad_fn=<NllLossBackward0>)
tensor(2.9575, grad_fn=<NllLossBackward0>)
tensor(3.4515, grad_fn=<NllLossBackward0>)
tensor(3.5132, grad_fn=<NllLossBackward0>)
tensor(3.7325, grad_fn=<NllLossBackward0>)
tensor(3.2239, grad_fn=<NllLossBackward0>)
tensor(3.3912, grad_fn=<NllLossBackward0>)
tensor(2.3139, grad_fn=<NllLossBackward0>)
tensor(3.1075, grad_fn=<NllLossBackward0>)
tensor(2.5255, grad_fn=<NllLossBackward0>)
tensor(2.24

tensor(3.7045, grad_fn=<NllLossBackward0>)
tensor(3.2108, grad_fn=<NllLossBackward0>)
tensor(3.3968, grad_fn=<NllLossBackward0>)
tensor(3.6833, grad_fn=<NllLossBackward0>)
tensor(3.5000, grad_fn=<NllLossBackward0>)
tensor(2.0224, grad_fn=<NllLossBackward0>)
tensor(2.7553, grad_fn=<NllLossBackward0>)
tensor(3.3333, grad_fn=<NllLossBackward0>)
tensor(2.4628, grad_fn=<NllLossBackward0>)
tensor(3.0639, grad_fn=<NllLossBackward0>)
tensor(2.7111, grad_fn=<NllLossBackward0>)
tensor(3.2279, grad_fn=<NllLossBackward0>)
tensor(2.4229, grad_fn=<NllLossBackward0>)
tensor(2.4193, grad_fn=<NllLossBackward0>)
tensor(2.6591, grad_fn=<NllLossBackward0>)
tensor(3.2900, grad_fn=<NllLossBackward0>)
tensor(2.2554, grad_fn=<NllLossBackward0>)
tensor(2.8726, grad_fn=<NllLossBackward0>)
tensor(2.0208, grad_fn=<NllLossBackward0>)
tensor(3.1927, grad_fn=<NllLossBackward0>)
tensor(2.4179, grad_fn=<NllLossBackward0>)
tensor(2.4709, grad_fn=<NllLossBackward0>)
tensor(3.4480, grad_fn=<NllLossBackward0>)
tensor(2.25

tensor(2.5794, grad_fn=<NllLossBackward0>)
tensor(2.7625, grad_fn=<NllLossBackward0>)
tensor(1.7932, grad_fn=<NllLossBackward0>)
tensor(3.0718, grad_fn=<NllLossBackward0>)
tensor(3.6736, grad_fn=<NllLossBackward0>)
tensor(2.4565, grad_fn=<NllLossBackward0>)
tensor(2.6184, grad_fn=<NllLossBackward0>)
tensor(3.1520, grad_fn=<NllLossBackward0>)
tensor(1.7891, grad_fn=<NllLossBackward0>)
tensor(2.2472, grad_fn=<NllLossBackward0>)
tensor(4.1920, grad_fn=<NllLossBackward0>)
tensor(2.9477, grad_fn=<NllLossBackward0>)
tensor(3.2198, grad_fn=<NllLossBackward0>)
tensor(2.9130, grad_fn=<NllLossBackward0>)
tensor(2.5702, grad_fn=<NllLossBackward0>)
tensor(2.9902, grad_fn=<NllLossBackward0>)
tensor(2.0068, grad_fn=<NllLossBackward0>)
tensor(3.1912, grad_fn=<NllLossBackward0>)
tensor(2.2965, grad_fn=<NllLossBackward0>)
tensor(3.2505, grad_fn=<NllLossBackward0>)
tensor(2.4550, grad_fn=<NllLossBackward0>)
tensor(2.8436, grad_fn=<NllLossBackward0>)
tensor(1.7761, grad_fn=<NllLossBackward0>)
tensor(3.45

tensor(2.8853, grad_fn=<NllLossBackward0>)
tensor(3.1697, grad_fn=<NllLossBackward0>)
tensor(3.0026, grad_fn=<NllLossBackward0>)
tensor(3.0911, grad_fn=<NllLossBackward0>)
tensor(3.1723, grad_fn=<NllLossBackward0>)
tensor(2.2417, grad_fn=<NllLossBackward0>)
tensor(2.2880, grad_fn=<NllLossBackward0>)
tensor(2.6183, grad_fn=<NllLossBackward0>)
tensor(1.9998, grad_fn=<NllLossBackward0>)
tensor(2.7245, grad_fn=<NllLossBackward0>)
tensor(2.9686, grad_fn=<NllLossBackward0>)
tensor(2.2834, grad_fn=<NllLossBackward0>)
tensor(2.0010, grad_fn=<NllLossBackward0>)
tensor(3.5176, grad_fn=<NllLossBackward0>)
tensor(3.0521, grad_fn=<NllLossBackward0>)
tensor(2.3787, grad_fn=<NllLossBackward0>)
tensor(2.2829, grad_fn=<NllLossBackward0>)
tensor(2.0005, grad_fn=<NllLossBackward0>)
tensor(2.6155, grad_fn=<NllLossBackward0>)
tensor(3.0439, grad_fn=<NllLossBackward0>)
tensor(2.6760, grad_fn=<NllLossBackward0>)
tensor(2.3138, grad_fn=<NllLossBackward0>)
tensor(3.3234, grad_fn=<NllLossBackward0>)
tensor(3.20

tensor(3.4558, grad_fn=<NllLossBackward0>)
tensor(2.7410, grad_fn=<NllLossBackward0>)
tensor(2.9031, grad_fn=<NllLossBackward0>)
tensor(2.8999, grad_fn=<NllLossBackward0>)
tensor(2.9020, grad_fn=<NllLossBackward0>)
tensor(2.8053, grad_fn=<NllLossBackward0>)
tensor(2.0099, grad_fn=<NllLossBackward0>)
tensor(2.8350, grad_fn=<NllLossBackward0>)
tensor(3.0972, grad_fn=<NllLossBackward0>)
tensor(3.5967, grad_fn=<NllLossBackward0>)
tensor(3.0478, grad_fn=<NllLossBackward0>)
tensor(2.8997, grad_fn=<NllLossBackward0>)
tensor(1.7744, grad_fn=<NllLossBackward0>)
tensor(3.2368, grad_fn=<NllLossBackward0>)
tensor(3.0327, grad_fn=<NllLossBackward0>)
tensor(2.9024, grad_fn=<NllLossBackward0>)
tensor(2.4425, grad_fn=<NllLossBackward0>)
tensor(3.4933, grad_fn=<NllLossBackward0>)
tensor(3.0155, grad_fn=<NllLossBackward0>)
tensor(2.5472, grad_fn=<NllLossBackward0>)
tensor(3.9096, grad_fn=<NllLossBackward0>)
tensor(3.0557, grad_fn=<NllLossBackward0>)
tensor(2.8284, grad_fn=<NllLossBackward0>)
tensor(2.83

tensor(3.0500, grad_fn=<NllLossBackward0>)
tensor(4.4185, grad_fn=<NllLossBackward0>)
tensor(2.8597, grad_fn=<NllLossBackward0>)
tensor(2.0152, grad_fn=<NllLossBackward0>)
tensor(3.2302, grad_fn=<NllLossBackward0>)
tensor(3.3671, grad_fn=<NllLossBackward0>)
tensor(3.3911, grad_fn=<NllLossBackward0>)
tensor(2.3990, grad_fn=<NllLossBackward0>)
tensor(2.6023, grad_fn=<NllLossBackward0>)
tensor(2.7032, grad_fn=<NllLossBackward0>)
tensor(2.2986, grad_fn=<NllLossBackward0>)
tensor(3.6396, grad_fn=<NllLossBackward0>)
tensor(2.3038, grad_fn=<NllLossBackward0>)
tensor(2.8593, grad_fn=<NllLossBackward0>)
tensor(3.2245, grad_fn=<NllLossBackward0>)
tensor(2.4029, grad_fn=<NllLossBackward0>)
tensor(3.7023, grad_fn=<NllLossBackward0>)
tensor(3.4286, grad_fn=<NllLossBackward0>)
tensor(2.6984, grad_fn=<NllLossBackward0>)
tensor(3.1331, grad_fn=<NllLossBackward0>)
tensor(2.4451, grad_fn=<NllLossBackward0>)
tensor(2.0174, grad_fn=<NllLossBackward0>)
tensor(2.3202, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(4.0514, grad_fn=<NllLossBackward0>)
tensor(2.9145, grad_fn=<NllLossBackward0>)
tensor(3.6301, grad_fn=<NllLossBackward0>)
tensor(3.3601, grad_fn=<NllLossBackward0>)
tensor(2.5643, grad_fn=<NllLossBackward0>)
tensor(3.1814, grad_fn=<NllLossBackward0>)
tensor(2.6402, grad_fn=<NllLossBackward0>)
tensor(2.0782, grad_fn=<NllLossBackward0>)
tensor(2.5234, grad_fn=<NllLossBackward0>)
tensor(2.2224, grad_fn=<NllLossBackward0>)
tensor(3.5484, grad_fn=<NllLossBackward0>)
tensor(3.1551, grad_fn=<NllLossBackward0>)
tensor(3.1393, grad_fn=<NllLossBackward0>)
tensor(4.6715, grad_fn=<NllLossBackward0>)
tensor(2.4269, grad_fn=<NllLossBackward0>)
tensor(3.0510, grad_fn=<NllLossBackward0>)
tensor(4.1011, grad_fn=<NllLossBackward0>)
tensor(2.6835, grad_fn=<NllLossBackward0>)
tensor(3.2018, grad_fn=<NllLossBackward0>)
tensor(3.2398, grad_fn=<NllLossBackward0>)
tensor(1.7593, grad_fn=<NllLossBackward0>)
tensor(3.2802, grad_fn=<NllLossBackward0>)
tensor(2.7371, grad_fn=<NllLossBackward0>)
tensor(2.00

tensor(3.1851, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(3.2024, grad_fn=<NllLossBackward0>)
tensor(2.9662, grad_fn=<NllLossBackward0>)
tensor(2.0757, grad_fn=<NllLossBackward0>)
tensor(3.3942, grad_fn=<NllLossBackward0>)
tensor(2.5114, grad_fn=<NllLossBackward0>)
tensor(3.1510, grad_fn=<NllLossBackward0>)
tensor(2.2669, grad_fn=<NllLossBackward0>)
tensor(3.8596, grad_fn=<NllLossBackward0>)
tensor(2.0021, grad_fn=<NllLossBackward0>)
tensor(3.4600, grad_fn=<NllLossBackward0>)
tensor(3.8527, grad_fn=<NllLossBackward0>)
tensor(2.2934, grad_fn=<NllLossBackward0>)
tensor(2.9585, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(3.2231, grad_fn=<NllLossBackward0>)
tensor(2.8760, grad_fn=<NllLossBackward0>)
tensor(3.6734, grad_fn=<NllLossBackward0>)
tensor(2.9786, grad_fn=<NllLossBackward0>)
tensor(2.9841, grad_fn=<NllLossBackward0>)
tensor(2.8930, grad_fn=<NllLossBackward0>)
tensor(2.3715, grad_fn=<NllLossBackward0>)
tensor(2.62

tensor(2.7827, grad_fn=<NllLossBackward0>)
tensor(1.7230, grad_fn=<NllLossBackward0>)
tensor(3.7042, grad_fn=<NllLossBackward0>)
tensor(2.0029, grad_fn=<NllLossBackward0>)
tensor(2.8261, grad_fn=<NllLossBackward0>)
tensor(2.8556, grad_fn=<NllLossBackward0>)
tensor(2.3608, grad_fn=<NllLossBackward0>)
tensor(3.6858, grad_fn=<NllLossBackward0>)
tensor(3.1893, grad_fn=<NllLossBackward0>)
tensor(4.0672, grad_fn=<NllLossBackward0>)
tensor(3.4382, grad_fn=<NllLossBackward0>)
tensor(2.5412, grad_fn=<NllLossBackward0>)
tensor(2.7085, grad_fn=<NllLossBackward0>)
tensor(2.8583, grad_fn=<NllLossBackward0>)
tensor(2.8437, grad_fn=<NllLossBackward0>)
tensor(2.4996, grad_fn=<NllLossBackward0>)
tensor(2.7506, grad_fn=<NllLossBackward0>)
tensor(3.6429, grad_fn=<NllLossBackward0>)
tensor(2.5638, grad_fn=<NllLossBackward0>)
tensor(3.0169, grad_fn=<NllLossBackward0>)
tensor(2.2761, grad_fn=<NllLossBackward0>)
tensor(3.5477, grad_fn=<NllLossBackward0>)
tensor(3.6933, grad_fn=<NllLossBackward0>)
tensor(2.65

tensor(2.3758, grad_fn=<NllLossBackward0>)
tensor(3.1517, grad_fn=<NllLossBackward0>)
tensor(2.3692, grad_fn=<NllLossBackward0>)
tensor(3.5168, grad_fn=<NllLossBackward0>)
tensor(2.6439, grad_fn=<NllLossBackward0>)
tensor(3.2720, grad_fn=<NllLossBackward0>)
tensor(2.9768, grad_fn=<NllLossBackward0>)
tensor(1.7488, grad_fn=<NllLossBackward0>)
tensor(2.3483, grad_fn=<NllLossBackward0>)
tensor(2.0211, grad_fn=<NllLossBackward0>)
tensor(2.5485, grad_fn=<NllLossBackward0>)
tensor(3.0928, grad_fn=<NllLossBackward0>)
tensor(2.2202, grad_fn=<NllLossBackward0>)
tensor(3.3983, grad_fn=<NllLossBackward0>)
tensor(2.7183, grad_fn=<NllLossBackward0>)
tensor(3.3608, grad_fn=<NllLossBackward0>)
tensor(2.2193, grad_fn=<NllLossBackward0>)
tensor(2.6142, grad_fn=<NllLossBackward0>)
tensor(2.4002, grad_fn=<NllLossBackward0>)
tensor(3.0522, grad_fn=<NllLossBackward0>)
tensor(2.8256, grad_fn=<NllLossBackward0>)
tensor(2.5608, grad_fn=<NllLossBackward0>)
tensor(3.1807, grad_fn=<NllLossBackward0>)
tensor(3.24

tensor(2.5069, grad_fn=<NllLossBackward0>)
tensor(4.1684, grad_fn=<NllLossBackward0>)
tensor(2.4141, grad_fn=<NllLossBackward0>)
tensor(2.2842, grad_fn=<NllLossBackward0>)
tensor(3.5046, grad_fn=<NllLossBackward0>)
tensor(3.7302, grad_fn=<NllLossBackward0>)
tensor(2.4124, grad_fn=<NllLossBackward0>)
tensor(3.5485, grad_fn=<NllLossBackward0>)
tensor(2.5438, grad_fn=<NllLossBackward0>)
tensor(2.7186, grad_fn=<NllLossBackward0>)
tensor(2.5593, grad_fn=<NllLossBackward0>)
tensor(2.2794, grad_fn=<NllLossBackward0>)
tensor(3.8642, grad_fn=<NllLossBackward0>)
tensor(3.1908, grad_fn=<NllLossBackward0>)
tensor(3.4254, grad_fn=<NllLossBackward0>)
tensor(2.4092, grad_fn=<NllLossBackward0>)
tensor(2.4122, grad_fn=<NllLossBackward0>)
tensor(3.1458, grad_fn=<NllLossBackward0>)
tensor(2.0299, grad_fn=<NllLossBackward0>)
tensor(2.6866, grad_fn=<NllLossBackward0>)
tensor(3.8808, grad_fn=<NllLossBackward0>)
tensor(2.6952, grad_fn=<NllLossBackward0>)
tensor(2.6080, grad_fn=<NllLossBackward0>)
tensor(2.02

tensor(2.8664, grad_fn=<NllLossBackward0>)
tensor(2.8893, grad_fn=<NllLossBackward0>)
tensor(3.1860, grad_fn=<NllLossBackward0>)
tensor(3.6526, grad_fn=<NllLossBackward0>)
tensor(2.2772, grad_fn=<NllLossBackward0>)
tensor(2.5347, grad_fn=<NllLossBackward0>)
tensor(2.6934, grad_fn=<NllLossBackward0>)
tensor(2.4898, grad_fn=<NllLossBackward0>)
tensor(2.9050, grad_fn=<NllLossBackward0>)
tensor(3.9629, grad_fn=<NllLossBackward0>)
tensor(3.3472, grad_fn=<NllLossBackward0>)
tensor(2.4962, grad_fn=<NllLossBackward0>)
tensor(3.6653, grad_fn=<NllLossBackward0>)
tensor(2.4890, grad_fn=<NllLossBackward0>)
tensor(3.2476, grad_fn=<NllLossBackward0>)
tensor(3.6850, grad_fn=<NllLossBackward0>)
tensor(3.0539, grad_fn=<NllLossBackward0>)
tensor(3.2213, grad_fn=<NllLossBackward0>)
tensor(2.4073, grad_fn=<NllLossBackward0>)
tensor(4.2115, grad_fn=<NllLossBackward0>)
tensor(3.5168, grad_fn=<NllLossBackward0>)
tensor(3.2808, grad_fn=<NllLossBackward0>)
tensor(2.3965, grad_fn=<NllLossBackward0>)
tensor(2.79

tensor(2.7186, grad_fn=<NllLossBackward0>)
tensor(2.0241, grad_fn=<NllLossBackward0>)
tensor(3.9136, grad_fn=<NllLossBackward0>)
tensor(2.4916, grad_fn=<NllLossBackward0>)
tensor(1.7066, grad_fn=<NllLossBackward0>)
tensor(2.4988, grad_fn=<NllLossBackward0>)
tensor(2.3693, grad_fn=<NllLossBackward0>)
tensor(3.6973, grad_fn=<NllLossBackward0>)
tensor(3.7891, grad_fn=<NllLossBackward0>)
tensor(3.3049, grad_fn=<NllLossBackward0>)
tensor(3.5540, grad_fn=<NllLossBackward0>)
tensor(2.3687, grad_fn=<NllLossBackward0>)
tensor(3.7876, grad_fn=<NllLossBackward0>)
tensor(3.1181, grad_fn=<NllLossBackward0>)
tensor(2.6849, grad_fn=<NllLossBackward0>)
tensor(3.8920, grad_fn=<NllLossBackward0>)
tensor(3.0623, grad_fn=<NllLossBackward0>)
tensor(2.9596, grad_fn=<NllLossBackward0>)
tensor(2.5297, grad_fn=<NllLossBackward0>)
tensor(2.5011, grad_fn=<NllLossBackward0>)
tensor(2.3801, grad_fn=<NllLossBackward0>)
tensor(2.6988, grad_fn=<NllLossBackward0>)
tensor(4.0371, grad_fn=<NllLossBackward0>)
tensor(2.69

tensor(2.8582, grad_fn=<NllLossBackward0>)
tensor(3.1573, grad_fn=<NllLossBackward0>)
tensor(3.5794, grad_fn=<NllLossBackward0>)
tensor(2.3703, grad_fn=<NllLossBackward0>)
tensor(3.1705, grad_fn=<NllLossBackward0>)
tensor(2.3670, grad_fn=<NllLossBackward0>)
tensor(2.8416, grad_fn=<NllLossBackward0>)
tensor(3.2138, grad_fn=<NllLossBackward0>)
tensor(2.7067, grad_fn=<NllLossBackward0>)
tensor(3.1783, grad_fn=<NllLossBackward0>)
tensor(3.7159, grad_fn=<NllLossBackward0>)
tensor(2.9454, grad_fn=<NllLossBackward0>)
tensor(1.7200, grad_fn=<NllLossBackward0>)
tensor(2.7322, grad_fn=<NllLossBackward0>)
tensor(3.1768, grad_fn=<NllLossBackward0>)
tensor(3.2108, grad_fn=<NllLossBackward0>)
tensor(3.1004, grad_fn=<NllLossBackward0>)
tensor(3.5916, grad_fn=<NllLossBackward0>)
tensor(2.7483, grad_fn=<NllLossBackward0>)
tensor(3.6244, grad_fn=<NllLossBackward0>)
tensor(3.1696, grad_fn=<NllLossBackward0>)
tensor(3.2960, grad_fn=<NllLossBackward0>)
tensor(2.8949, grad_fn=<NllLossBackward0>)
tensor(3.52

tensor(2.7187, grad_fn=<NllLossBackward0>)
tensor(3.8607, grad_fn=<NllLossBackward0>)
tensor(2.5361, grad_fn=<NllLossBackward0>)
tensor(3.6596, grad_fn=<NllLossBackward0>)
tensor(2.8613, grad_fn=<NllLossBackward0>)
tensor(1.7119, grad_fn=<NllLossBackward0>)
tensor(2.7169, grad_fn=<NllLossBackward0>)
tensor(3.1410, grad_fn=<NllLossBackward0>)
tensor(4.4999, grad_fn=<NllLossBackward0>)
tensor(3.1418, grad_fn=<NllLossBackward0>)
tensor(2.9587, grad_fn=<NllLossBackward0>)
tensor(1.7081, grad_fn=<NllLossBackward0>)
tensor(2.3279, grad_fn=<NllLossBackward0>)
tensor(2.2696, grad_fn=<NllLossBackward0>)
tensor(2.2626, grad_fn=<NllLossBackward0>)
tensor(2.8393, grad_fn=<NllLossBackward0>)
tensor(2.8152, grad_fn=<NllLossBackward0>)
tensor(2.4950, grad_fn=<NllLossBackward0>)
tensor(2.2613, grad_fn=<NllLossBackward0>)
tensor(2.7261, grad_fn=<NllLossBackward0>)
tensor(3.1858, grad_fn=<NllLossBackward0>)
tensor(2.3739, grad_fn=<NllLossBackward0>)
tensor(2.5123, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(3.2780, grad_fn=<NllLossBackward0>)
tensor(2.4849, grad_fn=<NllLossBackward0>)
tensor(3.1336, grad_fn=<NllLossBackward0>)
tensor(2.6163, grad_fn=<NllLossBackward0>)
tensor(3.2831, grad_fn=<NllLossBackward0>)
tensor(2.0193, grad_fn=<NllLossBackward0>)
tensor(2.0528, grad_fn=<NllLossBackward0>)
tensor(4.1506, grad_fn=<NllLossBackward0>)
tensor(2.1887, grad_fn=<NllLossBackward0>)
tensor(4.4605, grad_fn=<NllLossBackward0>)
tensor(2.9515, grad_fn=<NllLossBackward0>)
tensor(2.3763, grad_fn=<NllLossBackward0>)
tensor(2.0511, grad_fn=<NllLossBackward0>)
tensor(2.8431, grad_fn=<NllLossBackward0>)
tensor(2.3919, grad_fn=<NllLossBackward0>)
tensor(3.5855, grad_fn=<NllLossBackward0>)
tensor(2.8753, grad_fn=<NllLossBackward0>)
tensor(2.3959, grad_fn=<NllLossBackward0>)
tensor(3.6350, grad_fn=<NllLossBackward0>)
tensor(2.6802, grad_fn=<NllLossBackward0>)
tensor(2.3683, grad_fn=<NllLossBackward0>)
tensor(2.0160, grad_fn=<NllLossBackward0>)
tensor(4.0087, grad_fn=<NllLossBackward0>)
tensor(2.68

tensor(3.1913, grad_fn=<NllLossBackward0>)
tensor(2.7452, grad_fn=<NllLossBackward0>)
tensor(2.7384, grad_fn=<NllLossBackward0>)
tensor(4.0960, grad_fn=<NllLossBackward0>)
tensor(3.2133, grad_fn=<NllLossBackward0>)
tensor(2.6105, grad_fn=<NllLossBackward0>)
tensor(3.3088, grad_fn=<NllLossBackward0>)
tensor(2.4926, grad_fn=<NllLossBackward0>)
tensor(2.8345, grad_fn=<NllLossBackward0>)
tensor(2.7316, grad_fn=<NllLossBackward0>)
tensor(3.2164, grad_fn=<NllLossBackward0>)
tensor(2.3968, grad_fn=<NllLossBackward0>)
tensor(3.3241, grad_fn=<NllLossBackward0>)
tensor(1.7088, grad_fn=<NllLossBackward0>)
tensor(3.0163, grad_fn=<NllLossBackward0>)
tensor(3.1182, grad_fn=<NllLossBackward0>)
tensor(2.3785, grad_fn=<NllLossBackward0>)
tensor(3.2070, grad_fn=<NllLossBackward0>)
tensor(2.3451, grad_fn=<NllLossBackward0>)
tensor(3.4366, grad_fn=<NllLossBackward0>)
tensor(4.0622, grad_fn=<NllLossBackward0>)
tensor(2.3392, grad_fn=<NllLossBackward0>)
tensor(3.4443, grad_fn=<NllLossBackward0>)
tensor(3.69

tensor(2.0467, grad_fn=<NllLossBackward0>)
tensor(3.3203, grad_fn=<NllLossBackward0>)
tensor(2.0173, grad_fn=<NllLossBackward0>)
tensor(3.5536, grad_fn=<NllLossBackward0>)
tensor(2.5254, grad_fn=<NllLossBackward0>)
tensor(3.4631, grad_fn=<NllLossBackward0>)
tensor(2.8255, grad_fn=<NllLossBackward0>)
tensor(3.6407, grad_fn=<NllLossBackward0>)
tensor(2.3910, grad_fn=<NllLossBackward0>)
tensor(3.3481, grad_fn=<NllLossBackward0>)
tensor(3.1417, grad_fn=<NllLossBackward0>)
tensor(3.1197, grad_fn=<NllLossBackward0>)
tensor(3.1703, grad_fn=<NllLossBackward0>)
tensor(3.9799, grad_fn=<NllLossBackward0>)
tensor(1.7027, grad_fn=<NllLossBackward0>)
tensor(2.0433, grad_fn=<NllLossBackward0>)
tensor(3.2249, grad_fn=<NllLossBackward0>)
tensor(3.3953, grad_fn=<NllLossBackward0>)
tensor(3.6406, grad_fn=<NllLossBackward0>)
tensor(2.4805, grad_fn=<NllLossBackward0>)
tensor(2.4993, grad_fn=<NllLossBackward0>)
tensor(3.5320, grad_fn=<NllLossBackward0>)
tensor(4.0316, grad_fn=<NllLossBackward0>)
tensor(2.47

tensor(2.8653, grad_fn=<NllLossBackward0>)
tensor(3.9105, grad_fn=<NllLossBackward0>)
tensor(4.3631, grad_fn=<NllLossBackward0>)
tensor(3.6456, grad_fn=<NllLossBackward0>)
tensor(3.1110, grad_fn=<NllLossBackward0>)
tensor(3.5051, grad_fn=<NllLossBackward0>)
tensor(1.7128, grad_fn=<NllLossBackward0>)
tensor(4.1055, grad_fn=<NllLossBackward0>)
tensor(1.7101, grad_fn=<NllLossBackward0>)
tensor(2.7277, grad_fn=<NllLossBackward0>)
tensor(2.3885, grad_fn=<NllLossBackward0>)
tensor(2.9495, grad_fn=<NllLossBackward0>)
tensor(2.9959, grad_fn=<NllLossBackward0>)
tensor(3.2011, grad_fn=<NllLossBackward0>)
tensor(2.4831, grad_fn=<NllLossBackward0>)
tensor(2.5257, grad_fn=<NllLossBackward0>)
tensor(2.3706, grad_fn=<NllLossBackward0>)
tensor(1.7043, grad_fn=<NllLossBackward0>)
tensor(2.5007, grad_fn=<NllLossBackward0>)
tensor(2.3397, grad_fn=<NllLossBackward0>)
tensor(1.7026, grad_fn=<NllLossBackward0>)
tensor(2.5768, grad_fn=<NllLossBackward0>)
tensor(3.6492, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(3.5473, grad_fn=<NllLossBackward0>)
tensor(2.7358, grad_fn=<NllLossBackward0>)
tensor(2.3935, grad_fn=<NllLossBackward0>)
tensor(3.5121, grad_fn=<NllLossBackward0>)
tensor(2.3858, grad_fn=<NllLossBackward0>)
tensor(2.3922, grad_fn=<NllLossBackward0>)
tensor(2.6531, grad_fn=<NllLossBackward0>)
tensor(1.7033, grad_fn=<NllLossBackward0>)
tensor(2.8077, grad_fn=<NllLossBackward0>)
tensor(3.9267, grad_fn=<NllLossBackward0>)
tensor(2.8682, grad_fn=<NllLossBackward0>)
tensor(3.2119, grad_fn=<NllLossBackward0>)
tensor(2.8256, grad_fn=<NllLossBackward0>)
tensor(3.9936, grad_fn=<NllLossBackward0>)
tensor(3.0655, grad_fn=<NllLossBackward0>)
tensor(2.5215, grad_fn=<NllLossBackward0>)
tensor(2.3791, grad_fn=<NllLossBackward0>)
tensor(4.9601, grad_fn=<NllLossBackward0>)
tensor(2.7414, grad_fn=<NllLossBackward0>)
tensor(3.9245, grad_fn=<NllLossBackward0>)
tensor(3.5469, grad_fn=<NllLossBackward0>)
tensor(3.0152, grad_fn=<NllLossBackward0>)
tensor(2.7224, grad_fn=<NllLossBackward0>)
tensor(2.34

tensor(2.5741, grad_fn=<NllLossBackward0>)
tensor(3.1779, grad_fn=<NllLossBackward0>)
tensor(3.2215, grad_fn=<NllLossBackward0>)
tensor(3.0237, grad_fn=<NllLossBackward0>)
tensor(2.8683, grad_fn=<NllLossBackward0>)
tensor(3.6595, grad_fn=<NllLossBackward0>)
tensor(3.4910, grad_fn=<NllLossBackward0>)
tensor(2.3727, grad_fn=<NllLossBackward0>)
tensor(4.5084, grad_fn=<NllLossBackward0>)
tensor(3.5277, grad_fn=<NllLossBackward0>)
tensor(3.4930, grad_fn=<NllLossBackward0>)
tensor(2.8749, grad_fn=<NllLossBackward0>)
tensor(2.9720, grad_fn=<NllLossBackward0>)
tensor(2.0165, grad_fn=<NllLossBackward0>)
tensor(4.1664, grad_fn=<NllLossBackward0>)
tensor(3.4428, grad_fn=<NllLossBackward0>)
tensor(3.1328, grad_fn=<NllLossBackward0>)
tensor(2.4653, grad_fn=<NllLossBackward0>)
tensor(3.0532, grad_fn=<NllLossBackward0>)
tensor(2.9692, grad_fn=<NllLossBackward0>)
tensor(2.3719, grad_fn=<NllLossBackward0>)
tensor(3.8613, grad_fn=<NllLossBackward0>)
tensor(2.3296, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(3.1642, grad_fn=<NllLossBackward0>)
tensor(2.6492, grad_fn=<NllLossBackward0>)
tensor(2.0560, grad_fn=<NllLossBackward0>)
tensor(2.1994, grad_fn=<NllLossBackward0>)
tensor(3.0076, grad_fn=<NllLossBackward0>)
tensor(2.3355, grad_fn=<NllLossBackward0>)
tensor(2.8479, grad_fn=<NllLossBackward0>)
tensor(3.0442, grad_fn=<NllLossBackward0>)
tensor(2.2481, grad_fn=<NllLossBackward0>)
tensor(3.5323, grad_fn=<NllLossBackward0>)
tensor(3.0252, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(2.8192, grad_fn=<NllLossBackward0>)
tensor(2.8321, grad_fn=<NllLossBackward0>)
tensor(3.3464, grad_fn=<NllLossBackward0>)
tensor(3.2089, grad_fn=<NllLossBackward0>)
tensor(2.0202, grad_fn=<NllLossBackward0>)
tensor(2.3364, grad_fn=<NllLossBackward0>)
tensor(3.5827, grad_fn=<NllLossBackward0>)
tensor(2.7045, grad_fn=<NllLossBackward0>)
tensor(3.2026, grad_fn=<NllLossBackward0>)
tensor(3.3002, grad_fn=<NllLossBackward0>)
tensor(4.2458, grad_fn=<NllLossBackward0>)
tensor(2.25

tensor(3.1425, grad_fn=<NllLossBackward0>)
tensor(2.2606, grad_fn=<NllLossBackward0>)
tensor(2.3748, grad_fn=<NllLossBackward0>)
tensor(3.1431, grad_fn=<NllLossBackward0>)
tensor(3.0195, grad_fn=<NllLossBackward0>)
tensor(2.9664, grad_fn=<NllLossBackward0>)
tensor(3.7223, grad_fn=<NllLossBackward0>)
tensor(2.3978, grad_fn=<NllLossBackward0>)
tensor(3.7970, grad_fn=<NllLossBackward0>)
tensor(2.8673, grad_fn=<NllLossBackward0>)
tensor(2.3632, grad_fn=<NllLossBackward0>)
tensor(2.3314, grad_fn=<NllLossBackward0>)
tensor(3.5228, grad_fn=<NllLossBackward0>)
tensor(2.3574, grad_fn=<NllLossBackward0>)
tensor(3.7991, grad_fn=<NllLossBackward0>)
tensor(2.0522, grad_fn=<NllLossBackward0>)
tensor(3.7112, grad_fn=<NllLossBackward0>)
tensor(3.1468, grad_fn=<NllLossBackward0>)
tensor(3.1568, grad_fn=<NllLossBackward0>)
tensor(2.5390, grad_fn=<NllLossBackward0>)
tensor(2.1994, grad_fn=<NllLossBackward0>)
tensor(2.7887, grad_fn=<NllLossBackward0>)
tensor(3.4718, grad_fn=<NllLossBackward0>)
tensor(2.04

tensor(3.1071, grad_fn=<NllLossBackward0>)
tensor(2.0377, grad_fn=<NllLossBackward0>)
tensor(2.3576, grad_fn=<NllLossBackward0>)
tensor(3.3079, grad_fn=<NllLossBackward0>)
tensor(2.7855, grad_fn=<NllLossBackward0>)
tensor(4.3346, grad_fn=<NllLossBackward0>)
tensor(3.2415, grad_fn=<NllLossBackward0>)
tensor(2.7850, grad_fn=<NllLossBackward0>)
tensor(2.6598, grad_fn=<NllLossBackward0>)
tensor(1.7171, grad_fn=<NllLossBackward0>)
tensor(2.9815, grad_fn=<NllLossBackward0>)
tensor(2.7858, grad_fn=<NllLossBackward0>)
tensor(2.0370, grad_fn=<NllLossBackward0>)
tensor(4.4774, grad_fn=<NllLossBackward0>)
tensor(3.5330, grad_fn=<NllLossBackward0>)
tensor(2.5213, grad_fn=<NllLossBackward0>)
tensor(2.7788, grad_fn=<NllLossBackward0>)
tensor(2.3528, grad_fn=<NllLossBackward0>)
tensor(2.0316, grad_fn=<NllLossBackward0>)
tensor(2.7785, grad_fn=<NllLossBackward0>)
tensor(2.3966, grad_fn=<NllLossBackward0>)
tensor(3.5029, grad_fn=<NllLossBackward0>)
tensor(2.0251, grad_fn=<NllLossBackward0>)
tensor(3.22

tensor(3.4611, grad_fn=<NllLossBackward0>)
tensor(2.1788, grad_fn=<NllLossBackward0>)
tensor(2.8689, grad_fn=<NllLossBackward0>)
tensor(1.6843, grad_fn=<NllLossBackward0>)
tensor(3.1768, grad_fn=<NllLossBackward0>)
tensor(3.7406, grad_fn=<NllLossBackward0>)
tensor(3.3472, grad_fn=<NllLossBackward0>)
tensor(3.7734, grad_fn=<NllLossBackward0>)
tensor(3.0797, grad_fn=<NllLossBackward0>)
tensor(3.6335, grad_fn=<NllLossBackward0>)
tensor(2.0338, grad_fn=<NllLossBackward0>)
tensor(2.8563, grad_fn=<NllLossBackward0>)
tensor(3.1461, grad_fn=<NllLossBackward0>)
tensor(3.2023, grad_fn=<NllLossBackward0>)
tensor(3.3545, grad_fn=<NllLossBackward0>)
tensor(2.8805, grad_fn=<NllLossBackward0>)
tensor(1.6859, grad_fn=<NllLossBackward0>)
tensor(2.1782, grad_fn=<NllLossBackward0>)
tensor(2.0275, grad_fn=<NllLossBackward0>)
tensor(2.2588, grad_fn=<NllLossBackward0>)
tensor(2.3704, grad_fn=<NllLossBackward0>)
tensor(3.3205, grad_fn=<NllLossBackward0>)
tensor(2.3697, grad_fn=<NllLossBackward0>)
tensor(3.72

tensor(2.6008, grad_fn=<NllLossBackward0>)
tensor(3.1881, grad_fn=<NllLossBackward0>)
tensor(2.6173, grad_fn=<NllLossBackward0>)
tensor(1.6822, grad_fn=<NllLossBackward0>)
tensor(2.2621, grad_fn=<NllLossBackward0>)
tensor(2.5307, grad_fn=<NllLossBackward0>)
tensor(2.3598, grad_fn=<NllLossBackward0>)
tensor(2.5266, grad_fn=<NllLossBackward0>)
tensor(2.8075, grad_fn=<NllLossBackward0>)
tensor(2.3137, grad_fn=<NllLossBackward0>)
tensor(4.1785, grad_fn=<NllLossBackward0>)
tensor(3.0226, grad_fn=<NllLossBackward0>)
tensor(2.5235, grad_fn=<NllLossBackward0>)
tensor(2.7180, grad_fn=<NllLossBackward0>)
tensor(2.3193, grad_fn=<NllLossBackward0>)
tensor(3.1130, grad_fn=<NllLossBackward0>)
tensor(3.7089, grad_fn=<NllLossBackward0>)
tensor(2.6154, grad_fn=<NllLossBackward0>)
tensor(2.7192, grad_fn=<NllLossBackward0>)
tensor(3.9278, grad_fn=<NllLossBackward0>)
tensor(2.5303, grad_fn=<NllLossBackward0>)
tensor(3.8965, grad_fn=<NllLossBackward0>)
tensor(3.7094, grad_fn=<NllLossBackward0>)
tensor(2.71

tensor(2.6009, grad_fn=<NllLossBackward0>)
tensor(2.3721, grad_fn=<NllLossBackward0>)
tensor(1.6818, grad_fn=<NllLossBackward0>)
tensor(2.0204, grad_fn=<NllLossBackward0>)
tensor(2.0175, grad_fn=<NllLossBackward0>)
tensor(3.6620, grad_fn=<NllLossBackward0>)
tensor(2.9451, grad_fn=<NllLossBackward0>)
tensor(3.0193, grad_fn=<NllLossBackward0>)
tensor(3.0028, grad_fn=<NllLossBackward0>)
tensor(2.4588, grad_fn=<NllLossBackward0>)
tensor(3.1325, grad_fn=<NllLossBackward0>)
tensor(3.3590, grad_fn=<NllLossBackward0>)
tensor(3.4996, grad_fn=<NllLossBackward0>)
tensor(2.3705, grad_fn=<NllLossBackward0>)
tensor(1.6741, grad_fn=<NllLossBackward0>)
tensor(2.8799, grad_fn=<NllLossBackward0>)
tensor(3.7642, grad_fn=<NllLossBackward0>)
tensor(3.3044, grad_fn=<NllLossBackward0>)
tensor(3.2396, grad_fn=<NllLossBackward0>)
tensor(2.8710, grad_fn=<NllLossBackward0>)
tensor(2.2506, grad_fn=<NllLossBackward0>)
tensor(2.0152, grad_fn=<NllLossBackward0>)
tensor(3.6326, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(3.5298, grad_fn=<NllLossBackward0>)
tensor(2.6754, grad_fn=<NllLossBackward0>)
tensor(3.3416, grad_fn=<NllLossBackward0>)
tensor(2.3705, grad_fn=<NllLossBackward0>)
tensor(2.0188, grad_fn=<NllLossBackward0>)
tensor(2.2548, grad_fn=<NllLossBackward0>)
tensor(1.6775, grad_fn=<NllLossBackward0>)
tensor(3.2461, grad_fn=<NllLossBackward0>)
tensor(3.0628, grad_fn=<NllLossBackward0>)
tensor(4.0189, grad_fn=<NllLossBackward0>)
tensor(4.0632, grad_fn=<NllLossBackward0>)
tensor(3.2269, grad_fn=<NllLossBackward0>)
tensor(2.3141, grad_fn=<NllLossBackward0>)
tensor(2.7128, grad_fn=<NllLossBackward0>)
tensor(2.7446, grad_fn=<NllLossBackward0>)
tensor(3.4658, grad_fn=<NllLossBackward0>)
tensor(3.7957, grad_fn=<NllLossBackward0>)
tensor(3.0992, grad_fn=<NllLossBackward0>)
tensor(2.3727, grad_fn=<NllLossBackward0>)
tensor(4.4748, grad_fn=<NllLossBackward0>)
tensor(3.3844, grad_fn=<NllLossBackward0>)
tensor(2.1757, grad_fn=<NllLossBackward0>)
tensor(1.6793, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(3.6503, grad_fn=<NllLossBackward0>)
tensor(2.7421, grad_fn=<NllLossBackward0>)
tensor(2.7148, grad_fn=<NllLossBackward0>)
tensor(2.3822, grad_fn=<NllLossBackward0>)
tensor(2.9531, grad_fn=<NllLossBackward0>)
tensor(3.5888, grad_fn=<NllLossBackward0>)
tensor(2.0212, grad_fn=<NllLossBackward0>)
tensor(2.3219, grad_fn=<NllLossBackward0>)
tensor(3.4318, grad_fn=<NllLossBackward0>)
tensor(2.7925, grad_fn=<NllLossBackward0>)
tensor(2.6547, grad_fn=<NllLossBackward0>)
tensor(1.6898, grad_fn=<NllLossBackward0>)
tensor(2.0180, grad_fn=<NllLossBackward0>)
tensor(2.7104, grad_fn=<NllLossBackward0>)
tensor(2.3716, grad_fn=<NllLossBackward0>)
tensor(2.8241, grad_fn=<NllLossBackward0>)
tensor(2.8476, grad_fn=<NllLossBackward0>)
tensor(3.0979, grad_fn=<NllLossBackward0>)
tensor(2.3643, grad_fn=<NllLossBackward0>)
tensor(3.0122, grad_fn=<NllLossBackward0>)
tensor(3.2834, grad_fn=<NllLossBackward0>)
tensor(2.6946, grad_fn=<NllLossBackward0>)
tensor(2.0147, grad_fn=<NllLossBackward0>)
tensor(2.05

tensor(4.0156, grad_fn=<NllLossBackward0>)
tensor(2.8928, grad_fn=<NllLossBackward0>)
tensor(2.8319, grad_fn=<NllLossBackward0>)
tensor(2.6954, grad_fn=<NllLossBackward0>)
tensor(3.0651, grad_fn=<NllLossBackward0>)
tensor(3.1425, grad_fn=<NllLossBackward0>)
tensor(3.2758, grad_fn=<NllLossBackward0>)
tensor(2.1766, grad_fn=<NllLossBackward0>)
tensor(2.5357, grad_fn=<NllLossBackward0>)
tensor(1.7068, grad_fn=<NllLossBackward0>)
tensor(3.4845, grad_fn=<NllLossBackward0>)
tensor(3.4938, grad_fn=<NllLossBackward0>)
tensor(2.4679, grad_fn=<NllLossBackward0>)
tensor(3.5301, grad_fn=<NllLossBackward0>)
tensor(3.9696, grad_fn=<NllLossBackward0>)
tensor(2.7982, grad_fn=<NllLossBackward0>)
tensor(2.5828, grad_fn=<NllLossBackward0>)
tensor(2.0256, grad_fn=<NllLossBackward0>)
tensor(2.4633, grad_fn=<NllLossBackward0>)
tensor(2.4648, grad_fn=<NllLossBackward0>)
tensor(3.2136, grad_fn=<NllLossBackward0>)
tensor(2.8241, grad_fn=<NllLossBackward0>)
tensor(3.4514, grad_fn=<NllLossBackward0>)
tensor(3.68

tensor(3.6024, grad_fn=<NllLossBackward0>)
tensor(2.8193, grad_fn=<NllLossBackward0>)
tensor(2.0239, grad_fn=<NllLossBackward0>)
tensor(4.0128, grad_fn=<NllLossBackward0>)
tensor(2.4760, grad_fn=<NllLossBackward0>)
tensor(2.0221, grad_fn=<NllLossBackward0>)
tensor(2.0180, grad_fn=<NllLossBackward0>)
tensor(4.0873, grad_fn=<NllLossBackward0>)
tensor(2.1884, grad_fn=<NllLossBackward0>)
tensor(3.2536, grad_fn=<NllLossBackward0>)
tensor(4.3373, grad_fn=<NllLossBackward0>)
tensor(2.6777, grad_fn=<NllLossBackward0>)
tensor(3.4250, grad_fn=<NllLossBackward0>)
tensor(2.0186, grad_fn=<NllLossBackward0>)
tensor(2.4741, grad_fn=<NllLossBackward0>)
tensor(2.5278, grad_fn=<NllLossBackward0>)
tensor(2.3925, grad_fn=<NllLossBackward0>)
tensor(2.8859, grad_fn=<NllLossBackward0>)
tensor(4.3954, grad_fn=<NllLossBackward0>)
tensor(3.2267, grad_fn=<NllLossBackward0>)
tensor(2.3732, grad_fn=<NllLossBackward0>)
tensor(1.7146, grad_fn=<NllLossBackward0>)
tensor(3.1428, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(3.6252, grad_fn=<NllLossBackward0>)
tensor(2.0353, grad_fn=<NllLossBackward0>)
tensor(2.7420, grad_fn=<NllLossBackward0>)
tensor(2.8684, grad_fn=<NllLossBackward0>)
tensor(2.4659, grad_fn=<NllLossBackward0>)
tensor(2.7361, grad_fn=<NllLossBackward0>)
tensor(3.7442, grad_fn=<NllLossBackward0>)
tensor(2.3441, grad_fn=<NllLossBackward0>)
tensor(2.4671, grad_fn=<NllLossBackward0>)
tensor(2.6504, grad_fn=<NllLossBackward0>)
tensor(2.6955, grad_fn=<NllLossBackward0>)
tensor(2.8537, grad_fn=<NllLossBackward0>)
tensor(3.8804, grad_fn=<NllLossBackward0>)
tensor(2.8393, grad_fn=<NllLossBackward0>)
tensor(2.4951, grad_fn=<NllLossBackward0>)
tensor(2.0325, grad_fn=<NllLossBackward0>)
tensor(2.2647, grad_fn=<NllLossBackward0>)
tensor(2.6979, grad_fn=<NllLossBackward0>)
tensor(2.3801, grad_fn=<NllLossBackward0>)
tensor(2.3926, grad_fn=<NllLossBackward0>)
tensor(2.1865, grad_fn=<NllLossBackward0>)
tensor(3.3665, grad_fn=<NllLossBackward0>)
tensor(2.2685, grad_fn=<NllLossBackward0>)
tensor(2.82

tensor(2.2764, grad_fn=<NllLossBackward0>)
tensor(2.6061, grad_fn=<NllLossBackward0>)
tensor(3.0018, grad_fn=<NllLossBackward0>)
tensor(3.0492, grad_fn=<NllLossBackward0>)
tensor(2.8766, grad_fn=<NllLossBackward0>)
tensor(2.3475, grad_fn=<NllLossBackward0>)
tensor(2.8427, grad_fn=<NllLossBackward0>)
tensor(4.4112, grad_fn=<NllLossBackward0>)
tensor(3.4272, grad_fn=<NllLossBackward0>)
tensor(3.4692, grad_fn=<NllLossBackward0>)
tensor(3.1135, grad_fn=<NllLossBackward0>)
tensor(3.5363, grad_fn=<NllLossBackward0>)
tensor(1.7525, grad_fn=<NllLossBackward0>)
tensor(1.7490, grad_fn=<NllLossBackward0>)
tensor(2.3466, grad_fn=<NllLossBackward0>)
tensor(3.5387, grad_fn=<NllLossBackward0>)
tensor(2.7690, grad_fn=<NllLossBackward0>)
tensor(4.5799, grad_fn=<NllLossBackward0>)
tensor(2.3747, grad_fn=<NllLossBackward0>)
tensor(3.0843, grad_fn=<NllLossBackward0>)
tensor(2.7303, grad_fn=<NllLossBackward0>)
tensor(2.0787, grad_fn=<NllLossBackward0>)
tensor(2.6753, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(2.0352, grad_fn=<NllLossBackward0>)
tensor(2.9131, grad_fn=<NllLossBackward0>)
tensor(2.2775, grad_fn=<NllLossBackward0>)
tensor(3.4540, grad_fn=<NllLossBackward0>)
tensor(3.1891, grad_fn=<NllLossBackward0>)
tensor(2.8670, grad_fn=<NllLossBackward0>)
tensor(3.0535, grad_fn=<NllLossBackward0>)
tensor(3.1309, grad_fn=<NllLossBackward0>)
tensor(2.8280, grad_fn=<NllLossBackward0>)
tensor(3.5156, grad_fn=<NllLossBackward0>)
tensor(3.1538, grad_fn=<NllLossBackward0>)
tensor(2.3499, grad_fn=<NllLossBackward0>)
tensor(2.5361, grad_fn=<NllLossBackward0>)
tensor(3.0797, grad_fn=<NllLossBackward0>)
tensor(2.4760, grad_fn=<NllLossBackward0>)
tensor(3.0496, grad_fn=<NllLossBackward0>)
tensor(2.3711, grad_fn=<NllLossBackward0>)
tensor(3.6760, grad_fn=<NllLossBackward0>)
tensor(2.8051, grad_fn=<NllLossBackward0>)
tensor(2.2079, grad_fn=<NllLossBackward0>)
tensor(2.2912, grad_fn=<NllLossBackward0>)
tensor(2.8033, grad_fn=<NllLossBackward0>)
tensor(2.9868, grad_fn=<NllLossBackward0>)
tensor(2.62

tensor(3.7840, grad_fn=<NllLossBackward0>)
tensor(2.3972, grad_fn=<NllLossBackward0>)
tensor(2.3382, grad_fn=<NllLossBackward0>)
tensor(3.5521, grad_fn=<NllLossBackward0>)
tensor(3.3038, grad_fn=<NllLossBackward0>)
tensor(2.0909, grad_fn=<NllLossBackward0>)
tensor(3.2000, grad_fn=<NllLossBackward0>)
tensor(3.3338, grad_fn=<NllLossBackward0>)
tensor(3.9166, grad_fn=<NllLossBackward0>)
tensor(3.6593, grad_fn=<NllLossBackward0>)
tensor(2.3986, grad_fn=<NllLossBackward0>)
tensor(2.9142, grad_fn=<NllLossBackward0>)
tensor(2.3386, grad_fn=<NllLossBackward0>)
tensor(2.3499, grad_fn=<NllLossBackward0>)
tensor(3.4586, grad_fn=<NllLossBackward0>)
tensor(3.0149, grad_fn=<NllLossBackward0>)
tensor(2.3375, grad_fn=<NllLossBackward0>)
tensor(3.5129, grad_fn=<NllLossBackward0>)
tensor(3.6639, grad_fn=<NllLossBackward0>)
tensor(2.3437, grad_fn=<NllLossBackward0>)
tensor(2.7459, grad_fn=<NllLossBackward0>)
tensor(3.8127, grad_fn=<NllLossBackward0>)
tensor(2.7252, grad_fn=<NllLossBackward0>)
tensor(4.08

tensor(2.6157, grad_fn=<NllLossBackward0>)
tensor(2.9901, grad_fn=<NllLossBackward0>)
tensor(3.6063, grad_fn=<NllLossBackward0>)
tensor(2.0872, grad_fn=<NllLossBackward0>)
tensor(3.8835, grad_fn=<NllLossBackward0>)
tensor(2.7707, grad_fn=<NllLossBackward0>)
tensor(2.8379, grad_fn=<NllLossBackward0>)
tensor(1.7669, grad_fn=<NllLossBackward0>)
tensor(3.0289, grad_fn=<NllLossBackward0>)
tensor(3.4196, grad_fn=<NllLossBackward0>)
tensor(2.0554, grad_fn=<NllLossBackward0>)
tensor(2.0888, grad_fn=<NllLossBackward0>)
tensor(2.8219, grad_fn=<NllLossBackward0>)
tensor(3.0803, grad_fn=<NllLossBackward0>)
tensor(1.7612, grad_fn=<NllLossBackward0>)
tensor(2.4927, grad_fn=<NllLossBackward0>)
tensor(2.6812, grad_fn=<NllLossBackward0>)
tensor(2.9834, grad_fn=<NllLossBackward0>)
tensor(3.7252, grad_fn=<NllLossBackward0>)
tensor(1.7607, grad_fn=<NllLossBackward0>)
tensor(3.2426, grad_fn=<NllLossBackward0>)
tensor(2.3425, grad_fn=<NllLossBackward0>)
tensor(2.8088, grad_fn=<NllLossBackward0>)
tensor(2.57

tensor(3.6424, grad_fn=<NllLossBackward0>)
tensor(2.3883, grad_fn=<NllLossBackward0>)
tensor(3.5177, grad_fn=<NllLossBackward0>)
tensor(3.5473, grad_fn=<NllLossBackward0>)
tensor(3.0280, grad_fn=<NllLossBackward0>)
tensor(2.3838, grad_fn=<NllLossBackward0>)
tensor(4.7801, grad_fn=<NllLossBackward0>)
tensor(3.5509, grad_fn=<NllLossBackward0>)
tensor(2.3784, grad_fn=<NllLossBackward0>)
tensor(2.8365, grad_fn=<NllLossBackward0>)
tensor(2.7098, grad_fn=<NllLossBackward0>)
tensor(2.4826, grad_fn=<NllLossBackward0>)
tensor(3.9540, grad_fn=<NllLossBackward0>)
tensor(3.4708, grad_fn=<NllLossBackward0>)
tensor(2.0792, grad_fn=<NllLossBackward0>)
tensor(2.9758, grad_fn=<NllLossBackward0>)
tensor(3.7098, grad_fn=<NllLossBackward0>)
tensor(2.3748, grad_fn=<NllLossBackward0>)
tensor(3.7042, grad_fn=<NllLossBackward0>)
tensor(4.0894, grad_fn=<NllLossBackward0>)
tensor(2.3438, grad_fn=<NllLossBackward0>)
tensor(1.7663, grad_fn=<NllLossBackward0>)
tensor(3.5189, grad_fn=<NllLossBackward0>)
tensor(2.06

tensor(2.3929, grad_fn=<NllLossBackward0>)
tensor(4.9479, grad_fn=<NllLossBackward0>)
tensor(3.5486, grad_fn=<NllLossBackward0>)
tensor(2.4633, grad_fn=<NllLossBackward0>)
tensor(2.6507, grad_fn=<NllLossBackward0>)
tensor(2.5950, grad_fn=<NllLossBackward0>)
tensor(3.0141, grad_fn=<NllLossBackward0>)
tensor(3.1115, grad_fn=<NllLossBackward0>)
tensor(2.5928, grad_fn=<NllLossBackward0>)
tensor(2.8635, grad_fn=<NllLossBackward0>)
tensor(3.3051, grad_fn=<NllLossBackward0>)
tensor(3.0293, grad_fn=<NllLossBackward0>)
tensor(4.8146, grad_fn=<NllLossBackward0>)
tensor(2.0661, grad_fn=<NllLossBackward0>)
tensor(3.1261, grad_fn=<NllLossBackward0>)
tensor(4.3183, grad_fn=<NllLossBackward0>)
tensor(2.7908, grad_fn=<NllLossBackward0>)
tensor(3.6320, grad_fn=<NllLossBackward0>)
tensor(3.1202, grad_fn=<NllLossBackward0>)
tensor(3.9981, grad_fn=<NllLossBackward0>)
tensor(2.3915, grad_fn=<NllLossBackward0>)
tensor(1.7522, grad_fn=<NllLossBackward0>)
tensor(3.4756, grad_fn=<NllLossBackward0>)
tensor(3.34

tensor(2.3402, grad_fn=<NllLossBackward0>)
tensor(2.0696, grad_fn=<NllLossBackward0>)
tensor(2.6953, grad_fn=<NllLossBackward0>)
tensor(2.7532, grad_fn=<NllLossBackward0>)
tensor(2.0667, grad_fn=<NllLossBackward0>)
tensor(2.5236, grad_fn=<NllLossBackward0>)
tensor(3.2622, grad_fn=<NllLossBackward0>)
tensor(3.6369, grad_fn=<NllLossBackward0>)
tensor(2.8545, grad_fn=<NllLossBackward0>)
tensor(2.8869, grad_fn=<NllLossBackward0>)
tensor(2.0503, grad_fn=<NllLossBackward0>)
tensor(3.0005, grad_fn=<NllLossBackward0>)
tensor(2.4774, grad_fn=<NllLossBackward0>)
tensor(3.0865, grad_fn=<NllLossBackward0>)
tensor(2.3734, grad_fn=<NllLossBackward0>)
tensor(2.9316, grad_fn=<NllLossBackward0>)
tensor(2.8384, grad_fn=<NllLossBackward0>)
tensor(2.5297, grad_fn=<NllLossBackward0>)
tensor(2.8424, grad_fn=<NllLossBackward0>)
tensor(4.9517, grad_fn=<NllLossBackward0>)
tensor(2.0492, grad_fn=<NllLossBackward0>)
tensor(3.1664, grad_fn=<NllLossBackward0>)
tensor(2.8547, grad_fn=<NllLossBackward0>)
tensor(1.74

tensor(3.8568, grad_fn=<NllLossBackward0>)
tensor(3.2866, grad_fn=<NllLossBackward0>)
tensor(2.3393, grad_fn=<NllLossBackward0>)
tensor(2.0573, grad_fn=<NllLossBackward0>)
tensor(3.4553, grad_fn=<NllLossBackward0>)
tensor(2.5304, grad_fn=<NllLossBackward0>)
tensor(3.6402, grad_fn=<NllLossBackward0>)
tensor(2.2769, grad_fn=<NllLossBackward0>)
tensor(2.7245, grad_fn=<NllLossBackward0>)
tensor(3.1992, grad_fn=<NllLossBackward0>)
tensor(3.2998, grad_fn=<NllLossBackward0>)
tensor(2.4668, grad_fn=<NllLossBackward0>)
tensor(3.5201, grad_fn=<NllLossBackward0>)
tensor(2.9473, grad_fn=<NllLossBackward0>)
tensor(2.8030, grad_fn=<NllLossBackward0>)
tensor(1.7267, grad_fn=<NllLossBackward0>)
tensor(3.0027, grad_fn=<NllLossBackward0>)
tensor(4.0716, grad_fn=<NllLossBackward0>)
tensor(3.0146, grad_fn=<NllLossBackward0>)
tensor(4.6366, grad_fn=<NllLossBackward0>)
tensor(3.2917, grad_fn=<NllLossBackward0>)
tensor(2.9942, grad_fn=<NllLossBackward0>)
tensor(3.6456, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(3.2902, grad_fn=<NllLossBackward0>)
tensor(2.7022, grad_fn=<NllLossBackward0>)
tensor(3.4881, grad_fn=<NllLossBackward0>)
tensor(3.5594, grad_fn=<NllLossBackward0>)
tensor(4.2551, grad_fn=<NllLossBackward0>)
tensor(2.3683, grad_fn=<NllLossBackward0>)
tensor(3.6513, grad_fn=<NllLossBackward0>)
tensor(3.2200, grad_fn=<NllLossBackward0>)
tensor(2.3349, grad_fn=<NllLossBackward0>)
tensor(3.2711, grad_fn=<NllLossBackward0>)
tensor(2.3377, grad_fn=<NllLossBackward0>)
tensor(3.5931, grad_fn=<NllLossBackward0>)
tensor(2.3687, grad_fn=<NllLossBackward0>)
tensor(2.5265, grad_fn=<NllLossBackward0>)
tensor(2.3870, grad_fn=<NllLossBackward0>)
tensor(2.4779, grad_fn=<NllLossBackward0>)
tensor(2.8202, grad_fn=<NllLossBackward0>)
tensor(3.4129, grad_fn=<NllLossBackward0>)
tensor(2.7856, grad_fn=<NllLossBackward0>)
tensor(2.8576, grad_fn=<NllLossBackward0>)
tensor(2.0499, grad_fn=<NllLossBackward0>)
tensor(2.0522, grad_fn=<NllLossBackward0>)
tensor(2.3656, grad_fn=<NllLossBackward0>)
tensor(1.72

tensor(4.0991, grad_fn=<NllLossBackward0>)
tensor(2.3474, grad_fn=<NllLossBackward0>)
tensor(2.3815, grad_fn=<NllLossBackward0>)
tensor(2.0594, grad_fn=<NllLossBackward0>)
tensor(2.9555, grad_fn=<NllLossBackward0>)
tensor(2.3583, grad_fn=<NllLossBackward0>)
tensor(3.5739, grad_fn=<NllLossBackward0>)
tensor(3.4094, grad_fn=<NllLossBackward0>)
tensor(3.9917, grad_fn=<NllLossBackward0>)
tensor(4.0428, grad_fn=<NllLossBackward0>)
tensor(2.5764, grad_fn=<NllLossBackward0>)
tensor(3.2498, grad_fn=<NllLossBackward0>)
tensor(3.0008, grad_fn=<NllLossBackward0>)
tensor(2.5078, grad_fn=<NllLossBackward0>)
tensor(2.4005, grad_fn=<NllLossBackward0>)
tensor(2.7501, grad_fn=<NllLossBackward0>)
tensor(2.7534, grad_fn=<NllLossBackward0>)
tensor(4.1111, grad_fn=<NllLossBackward0>)
tensor(1.7495, grad_fn=<NllLossBackward0>)
tensor(4.4226, grad_fn=<NllLossBackward0>)
tensor(3.9575, grad_fn=<NllLossBackward0>)
tensor(2.9770, grad_fn=<NllLossBackward0>)
tensor(3.2013, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(3.6472, grad_fn=<NllLossBackward0>)
tensor(3.1960, grad_fn=<NllLossBackward0>)
tensor(2.3502, grad_fn=<NllLossBackward0>)
tensor(2.8588, grad_fn=<NllLossBackward0>)
tensor(2.5525, grad_fn=<NllLossBackward0>)
tensor(4.0259, grad_fn=<NllLossBackward0>)
tensor(2.9813, grad_fn=<NllLossBackward0>)
tensor(2.8356, grad_fn=<NllLossBackward0>)
tensor(2.4080, grad_fn=<NllLossBackward0>)
tensor(4.1128, grad_fn=<NllLossBackward0>)
tensor(2.9253, grad_fn=<NllLossBackward0>)
tensor(4.1087, grad_fn=<NllLossBackward0>)
tensor(2.0518, grad_fn=<NllLossBackward0>)
tensor(4.0909, grad_fn=<NllLossBackward0>)
tensor(2.8276, grad_fn=<NllLossBackward0>)
tensor(3.3328, grad_fn=<NllLossBackward0>)
tensor(2.5164, grad_fn=<NllLossBackward0>)
tensor(4.4341, grad_fn=<NllLossBackward0>)
tensor(3.5505, grad_fn=<NllLossBackward0>)
tensor(3.4569, grad_fn=<NllLossBackward0>)
tensor(2.4691, grad_fn=<NllLossBackward0>)
tensor(2.2209, grad_fn=<NllLossBackward0>)
tensor(3.8921, grad_fn=<NllLossBackward0>)
tensor(2.46

tensor(3.8875, grad_fn=<NllLossBackward0>)
tensor(2.9759, grad_fn=<NllLossBackward0>)
tensor(3.0328, grad_fn=<NllLossBackward0>)
tensor(2.3630, grad_fn=<NllLossBackward0>)
tensor(3.5000, grad_fn=<NllLossBackward0>)
tensor(3.0331, grad_fn=<NllLossBackward0>)
tensor(3.1390, grad_fn=<NllLossBackward0>)
tensor(1.8101, grad_fn=<NllLossBackward0>)
tensor(2.8652, grad_fn=<NllLossBackward0>)
tensor(2.3166, grad_fn=<NllLossBackward0>)
tensor(2.6708, grad_fn=<NllLossBackward0>)
tensor(2.4819, grad_fn=<NllLossBackward0>)
tensor(2.9754, grad_fn=<NllLossBackward0>)
tensor(3.5008, grad_fn=<NllLossBackward0>)
tensor(3.6365, grad_fn=<NllLossBackward0>)
tensor(2.7297, grad_fn=<NllLossBackward0>)
tensor(3.3853, grad_fn=<NllLossBackward0>)
tensor(3.8747, grad_fn=<NllLossBackward0>)
tensor(2.6978, grad_fn=<NllLossBackward0>)
tensor(2.8582, grad_fn=<NllLossBackward0>)
tensor(3.0244, grad_fn=<NllLossBackward0>)
tensor(4.3749, grad_fn=<NllLossBackward0>)
tensor(2.7029, grad_fn=<NllLossBackward0>)
tensor(3.14

tensor(2.9417, grad_fn=<NllLossBackward0>)
tensor(2.8673, grad_fn=<NllLossBackward0>)
tensor(3.2097, grad_fn=<NllLossBackward0>)
tensor(2.0923, grad_fn=<NllLossBackward0>)
tensor(2.3100, grad_fn=<NllLossBackward0>)
tensor(3.6772, grad_fn=<NllLossBackward0>)
tensor(3.3852, grad_fn=<NllLossBackward0>)
tensor(2.9424, grad_fn=<NllLossBackward0>)
tensor(3.1544, grad_fn=<NllLossBackward0>)
tensor(2.3644, grad_fn=<NllLossBackward0>)
tensor(3.3884, grad_fn=<NllLossBackward0>)
tensor(4.1664, grad_fn=<NllLossBackward0>)
tensor(2.8450, grad_fn=<NllLossBackward0>)
tensor(3.3845, grad_fn=<NllLossBackward0>)
tensor(2.5681, grad_fn=<NllLossBackward0>)
tensor(3.8640, grad_fn=<NllLossBackward0>)
tensor(2.8151, grad_fn=<NllLossBackward0>)
tensor(3.1008, grad_fn=<NllLossBackward0>)
tensor(2.6134, grad_fn=<NllLossBackward0>)
tensor(2.4312, grad_fn=<NllLossBackward0>)
tensor(2.3001, grad_fn=<NllLossBackward0>)
tensor(2.4178, grad_fn=<NllLossBackward0>)
tensor(2.0874, grad_fn=<NllLossBackward0>)
tensor(2.70

tensor(2.2561, grad_fn=<NllLossBackward0>)
tensor(2.0829, grad_fn=<NllLossBackward0>)
tensor(2.6730, grad_fn=<NllLossBackward0>)
tensor(2.2604, grad_fn=<NllLossBackward0>)
tensor(3.7880, grad_fn=<NllLossBackward0>)
tensor(1.8208, grad_fn=<NllLossBackward0>)
tensor(3.3294, grad_fn=<NllLossBackward0>)
tensor(2.0827, grad_fn=<NllLossBackward0>)
tensor(2.5234, grad_fn=<NllLossBackward0>)
tensor(2.2841, grad_fn=<NllLossBackward0>)
tensor(2.3115, grad_fn=<NllLossBackward0>)
tensor(3.1561, grad_fn=<NllLossBackward0>)
tensor(2.6878, grad_fn=<NllLossBackward0>)
tensor(2.4217, grad_fn=<NllLossBackward0>)
tensor(3.4631, grad_fn=<NllLossBackward0>)
tensor(3.1298, grad_fn=<NllLossBackward0>)
tensor(2.7266, grad_fn=<NllLossBackward0>)
tensor(3.1356, grad_fn=<NllLossBackward0>)
tensor(2.9669, grad_fn=<NllLossBackward0>)
tensor(3.9723, grad_fn=<NllLossBackward0>)
tensor(2.8195, grad_fn=<NllLossBackward0>)
tensor(2.9146, grad_fn=<NllLossBackward0>)
tensor(3.3399, grad_fn=<NllLossBackward0>)
tensor(3.33

tensor(2.0660, grad_fn=<NllLossBackward0>)
tensor(2.6851, grad_fn=<NllLossBackward0>)
tensor(3.9369, grad_fn=<NllLossBackward0>)
tensor(3.4593, grad_fn=<NllLossBackward0>)
tensor(2.5078, grad_fn=<NllLossBackward0>)
tensor(3.6295, grad_fn=<NllLossBackward0>)
tensor(3.7731, grad_fn=<NllLossBackward0>)
tensor(2.3750, grad_fn=<NllLossBackward0>)
tensor(2.2841, grad_fn=<NllLossBackward0>)
tensor(4.0178, grad_fn=<NllLossBackward0>)
tensor(2.3671, grad_fn=<NllLossBackward0>)
tensor(2.3711, grad_fn=<NllLossBackward0>)
tensor(2.6772, grad_fn=<NllLossBackward0>)
tensor(2.8700, grad_fn=<NllLossBackward0>)
tensor(2.2466, grad_fn=<NllLossBackward0>)
tensor(3.0922, grad_fn=<NllLossBackward0>)
tensor(2.0725, grad_fn=<NllLossBackward0>)
tensor(3.6052, grad_fn=<NllLossBackward0>)
tensor(3.3008, grad_fn=<NllLossBackward0>)
tensor(3.4563, grad_fn=<NllLossBackward0>)
tensor(2.2461, grad_fn=<NllLossBackward0>)
tensor(2.6776, grad_fn=<NllLossBackward0>)
tensor(2.4152, grad_fn=<NllLossBackward0>)
tensor(2.28

tensor(2.4146, grad_fn=<NllLossBackward0>)
tensor(2.5690, grad_fn=<NllLossBackward0>)
tensor(3.1586, grad_fn=<NllLossBackward0>)
tensor(1.8096, grad_fn=<NllLossBackward0>)
tensor(2.8069, grad_fn=<NllLossBackward0>)
tensor(3.6931, grad_fn=<NllLossBackward0>)
tensor(2.8970, grad_fn=<NllLossBackward0>)
tensor(2.0808, grad_fn=<NllLossBackward0>)
tensor(2.6971, grad_fn=<NllLossBackward0>)
tensor(3.2431, grad_fn=<NllLossBackward0>)
tensor(3.0070, grad_fn=<NllLossBackward0>)
tensor(2.7495, grad_fn=<NllLossBackward0>)
tensor(1.8050, grad_fn=<NllLossBackward0>)
tensor(2.8888, grad_fn=<NllLossBackward0>)
tensor(3.0446, grad_fn=<NllLossBackward0>)
tensor(2.4203, grad_fn=<NllLossBackward0>)
tensor(2.8321, grad_fn=<NllLossBackward0>)
tensor(2.2945, grad_fn=<NllLossBackward0>)
tensor(2.6346, grad_fn=<NllLossBackward0>)
tensor(3.1770, grad_fn=<NllLossBackward0>)
tensor(2.3182, grad_fn=<NllLossBackward0>)
tensor(2.9359, grad_fn=<NllLossBackward0>)
tensor(3.5175, grad_fn=<NllLossBackward0>)
tensor(3.29

tensor(3.3748, grad_fn=<NllLossBackward0>)
tensor(2.8930, grad_fn=<NllLossBackward0>)
tensor(2.5475, grad_fn=<NllLossBackward0>)
tensor(2.5071, grad_fn=<NllLossBackward0>)
tensor(2.0553, grad_fn=<NllLossBackward0>)
tensor(2.4042, grad_fn=<NllLossBackward0>)
tensor(2.9493, grad_fn=<NllLossBackward0>)
tensor(2.5103, grad_fn=<NllLossBackward0>)
tensor(1.7958, grad_fn=<NllLossBackward0>)
tensor(3.1044, grad_fn=<NllLossBackward0>)
tensor(3.6178, grad_fn=<NllLossBackward0>)
tensor(3.5057, grad_fn=<NllLossBackward0>)
tensor(2.4158, grad_fn=<NllLossBackward0>)
tensor(3.3111, grad_fn=<NllLossBackward0>)
tensor(3.3414, grad_fn=<NllLossBackward0>)
tensor(2.4990, grad_fn=<NllLossBackward0>)
tensor(3.7721, grad_fn=<NllLossBackward0>)
tensor(3.1901, grad_fn=<NllLossBackward0>)
tensor(3.9573, grad_fn=<NllLossBackward0>)
tensor(3.9293, grad_fn=<NllLossBackward0>)
tensor(2.7536, grad_fn=<NllLossBackward0>)
tensor(2.0720, grad_fn=<NllLossBackward0>)
tensor(2.7685, grad_fn=<NllLossBackward0>)
tensor(3.57

tensor(2.5692, grad_fn=<NllLossBackward0>)
tensor(2.9078, grad_fn=<NllLossBackward0>)
tensor(3.7838, grad_fn=<NllLossBackward0>)
tensor(2.9910, grad_fn=<NllLossBackward0>)
tensor(2.3538, grad_fn=<NllLossBackward0>)
tensor(3.6469, grad_fn=<NllLossBackward0>)
tensor(2.0491, grad_fn=<NllLossBackward0>)
tensor(3.1867, grad_fn=<NllLossBackward0>)
tensor(2.4751, grad_fn=<NllLossBackward0>)
tensor(2.4066, grad_fn=<NllLossBackward0>)
tensor(2.2904, grad_fn=<NllLossBackward0>)
tensor(3.4661, grad_fn=<NllLossBackward0>)
tensor(3.1286, grad_fn=<NllLossBackward0>)
tensor(3.4427, grad_fn=<NllLossBackward0>)
tensor(2.5237, grad_fn=<NllLossBackward0>)
tensor(3.1270, grad_fn=<NllLossBackward0>)
tensor(2.4697, grad_fn=<NllLossBackward0>)
tensor(2.9179, grad_fn=<NllLossBackward0>)
tensor(2.8310, grad_fn=<NllLossBackward0>)
tensor(2.0461, grad_fn=<NllLossBackward0>)
tensor(2.5007, grad_fn=<NllLossBackward0>)
tensor(2.0465, grad_fn=<NllLossBackward0>)
tensor(2.5315, grad_fn=<NllLossBackward0>)
tensor(2.49

tensor(2.0350, grad_fn=<NllLossBackward0>)
tensor(3.9254, grad_fn=<NllLossBackward0>)
tensor(2.3087, grad_fn=<NllLossBackward0>)
tensor(1.7678, grad_fn=<NllLossBackward0>)
tensor(3.6153, grad_fn=<NllLossBackward0>)
tensor(2.3559, grad_fn=<NllLossBackward0>)
tensor(2.8640, grad_fn=<NllLossBackward0>)
tensor(2.3555, grad_fn=<NllLossBackward0>)
tensor(2.3300, grad_fn=<NllLossBackward0>)
tensor(2.9235, grad_fn=<NllLossBackward0>)
tensor(2.9012, grad_fn=<NllLossBackward0>)
tensor(2.6628, grad_fn=<NllLossBackward0>)
tensor(3.8842, grad_fn=<NllLossBackward0>)
tensor(2.7582, grad_fn=<NllLossBackward0>)
tensor(2.0567, grad_fn=<NllLossBackward0>)
tensor(2.6576, grad_fn=<NllLossBackward0>)
tensor(2.3243, grad_fn=<NllLossBackward0>)
tensor(4.2432, grad_fn=<NllLossBackward0>)
tensor(3.6942, grad_fn=<NllLossBackward0>)
tensor(2.6768, grad_fn=<NllLossBackward0>)
tensor(3.6225, grad_fn=<NllLossBackward0>)
tensor(2.3216, grad_fn=<NllLossBackward0>)
tensor(2.2991, grad_fn=<NllLossBackward0>)
tensor(3.19

tensor(3.1912, grad_fn=<NllLossBackward0>)
tensor(3.0491, grad_fn=<NllLossBackward0>)
tensor(2.5197, grad_fn=<NllLossBackward0>)
tensor(2.6912, grad_fn=<NllLossBackward0>)
tensor(3.5744, grad_fn=<NllLossBackward0>)
tensor(3.6885, grad_fn=<NllLossBackward0>)
tensor(2.0306, grad_fn=<NllLossBackward0>)
tensor(3.7939, grad_fn=<NllLossBackward0>)
tensor(2.2363, grad_fn=<NllLossBackward0>)
tensor(3.8418, grad_fn=<NllLossBackward0>)
tensor(3.9279, grad_fn=<NllLossBackward0>)
tensor(2.5392, grad_fn=<NllLossBackward0>)
tensor(2.9742, grad_fn=<NllLossBackward0>)
tensor(2.3404, grad_fn=<NllLossBackward0>)
tensor(3.5020, grad_fn=<NllLossBackward0>)
tensor(3.1656, grad_fn=<NllLossBackward0>)
tensor(3.7271, grad_fn=<NllLossBackward0>)
tensor(4.4024, grad_fn=<NllLossBackward0>)
tensor(3.6090, grad_fn=<NllLossBackward0>)
tensor(3.0887, grad_fn=<NllLossBackward0>)
tensor(2.6299, grad_fn=<NllLossBackward0>)
tensor(4.2553, grad_fn=<NllLossBackward0>)
tensor(3.2110, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(2.9101, grad_fn=<NllLossBackward0>)
tensor(2.7052, grad_fn=<NllLossBackward0>)
tensor(3.9692, grad_fn=<NllLossBackward0>)
tensor(2.7354, grad_fn=<NllLossBackward0>)
tensor(2.4825, grad_fn=<NllLossBackward0>)
tensor(1.7567, grad_fn=<NllLossBackward0>)
tensor(3.0326, grad_fn=<NllLossBackward0>)
tensor(2.7357, grad_fn=<NllLossBackward0>)
tensor(2.9876, grad_fn=<NllLossBackward0>)
tensor(2.5142, grad_fn=<NllLossBackward0>)
tensor(2.0339, grad_fn=<NllLossBackward0>)
tensor(3.1318, grad_fn=<NllLossBackward0>)
tensor(3.5497, grad_fn=<NllLossBackward0>)
tensor(3.1976, grad_fn=<NllLossBackward0>)
tensor(2.3486, grad_fn=<NllLossBackward0>)
tensor(3.3227, grad_fn=<NllLossBackward0>)
tensor(2.4608, grad_fn=<NllLossBackward0>)
tensor(3.1292, grad_fn=<NllLossBackward0>)
tensor(3.5896, grad_fn=<NllLossBackward0>)
tensor(3.3488, grad_fn=<NllLossBackward0>)
tensor(2.0700, grad_fn=<NllLossBackward0>)
tensor(2.0682, grad_fn=<NllLossBackward0>)
tensor(2.5611, grad_fn=<NllLossBackward0>)
tensor(3.12

tensor(3.3851, grad_fn=<NllLossBackward0>)
tensor(2.3780, grad_fn=<NllLossBackward0>)
tensor(2.3902, grad_fn=<NllLossBackward0>)
tensor(3.4496, grad_fn=<NllLossBackward0>)
tensor(3.2772, grad_fn=<NllLossBackward0>)
tensor(4.1450, grad_fn=<NllLossBackward0>)
tensor(2.7544, grad_fn=<NllLossBackward0>)
tensor(2.8288, grad_fn=<NllLossBackward0>)
tensor(2.8631, grad_fn=<NllLossBackward0>)
tensor(2.9212, grad_fn=<NllLossBackward0>)
tensor(2.0256, grad_fn=<NllLossBackward0>)
tensor(2.0252, grad_fn=<NllLossBackward0>)
tensor(4.0230, grad_fn=<NllLossBackward0>)
tensor(1.7505, grad_fn=<NllLossBackward0>)
tensor(3.5106, grad_fn=<NllLossBackward0>)
tensor(3.6216, grad_fn=<NllLossBackward0>)
tensor(2.0643, grad_fn=<NllLossBackward0>)
tensor(2.7357, grad_fn=<NllLossBackward0>)
tensor(2.9751, grad_fn=<NllLossBackward0>)
tensor(2.2731, grad_fn=<NllLossBackward0>)
tensor(2.0247, grad_fn=<NllLossBackward0>)
tensor(2.5099, grad_fn=<NllLossBackward0>)
tensor(3.3549, grad_fn=<NllLossBackward0>)
tensor(1.74

tensor(3.8673, grad_fn=<NllLossBackward0>)
tensor(2.6775, grad_fn=<NllLossBackward0>)
tensor(2.3530, grad_fn=<NllLossBackward0>)
tensor(2.2660, grad_fn=<NllLossBackward0>)
tensor(2.6707, grad_fn=<NllLossBackward0>)
tensor(2.8326, grad_fn=<NllLossBackward0>)
tensor(3.1655, grad_fn=<NllLossBackward0>)
tensor(3.4007, grad_fn=<NllLossBackward0>)
tensor(2.2560, grad_fn=<NllLossBackward0>)
tensor(3.5806, grad_fn=<NllLossBackward0>)
tensor(2.3742, grad_fn=<NllLossBackward0>)
tensor(2.2552, grad_fn=<NllLossBackward0>)
tensor(2.3599, grad_fn=<NllLossBackward0>)
tensor(3.1451, grad_fn=<NllLossBackward0>)
tensor(2.3158, grad_fn=<NllLossBackward0>)
tensor(2.6618, grad_fn=<NllLossBackward0>)
tensor(3.3394, grad_fn=<NllLossBackward0>)
tensor(3.2605, grad_fn=<NllLossBackward0>)
tensor(2.7749, grad_fn=<NllLossBackward0>)
tensor(3.5716, grad_fn=<NllLossBackward0>)
tensor(2.3110, grad_fn=<NllLossBackward0>)
tensor(3.0286, grad_fn=<NllLossBackward0>)
tensor(2.3237, grad_fn=<NllLossBackward0>)
tensor(3.91

tensor(2.6670, grad_fn=<NllLossBackward0>)
tensor(2.6558, grad_fn=<NllLossBackward0>)
tensor(3.4878, grad_fn=<NllLossBackward0>)
tensor(2.5123, grad_fn=<NllLossBackward0>)
tensor(2.5448, grad_fn=<NllLossBackward0>)
tensor(2.7004, grad_fn=<NllLossBackward0>)
tensor(2.6577, grad_fn=<NllLossBackward0>)
tensor(1.7341, grad_fn=<NllLossBackward0>)
tensor(3.2217, grad_fn=<NllLossBackward0>)
tensor(3.3489, grad_fn=<NllLossBackward0>)
tensor(2.6895, grad_fn=<NllLossBackward0>)
tensor(2.8857, grad_fn=<NllLossBackward0>)
tensor(2.4521, grad_fn=<NllLossBackward0>)
tensor(3.2123, grad_fn=<NllLossBackward0>)
tensor(3.9270, grad_fn=<NllLossBackward0>)
tensor(3.7942, grad_fn=<NllLossBackward0>)
tensor(2.6992, grad_fn=<NllLossBackward0>)
tensor(2.9046, grad_fn=<NllLossBackward0>)
tensor(2.5469, grad_fn=<NllLossBackward0>)
tensor(2.7737, grad_fn=<NllLossBackward0>)
tensor(2.5187, grad_fn=<NllLossBackward0>)
tensor(2.6499, grad_fn=<NllLossBackward0>)
tensor(3.6054, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(2.7326, grad_fn=<NllLossBackward0>)
tensor(3.2870, grad_fn=<NllLossBackward0>)
tensor(4.0192, grad_fn=<NllLossBackward0>)
tensor(2.6039, grad_fn=<NllLossBackward0>)
tensor(3.5059, grad_fn=<NllLossBackward0>)
tensor(2.4767, grad_fn=<NllLossBackward0>)
tensor(4.1210, grad_fn=<NllLossBackward0>)
tensor(3.0566, grad_fn=<NllLossBackward0>)
tensor(1.7320, grad_fn=<NllLossBackward0>)
tensor(3.3601, grad_fn=<NllLossBackward0>)
tensor(3.6628, grad_fn=<NllLossBackward0>)
tensor(2.6339, grad_fn=<NllLossBackward0>)
tensor(3.1763, grad_fn=<NllLossBackward0>)
tensor(2.4720, grad_fn=<NllLossBackward0>)
tensor(2.2263, grad_fn=<NllLossBackward0>)
tensor(2.7362, grad_fn=<NllLossBackward0>)
tensor(1.7270, grad_fn=<NllLossBackward0>)
tensor(2.0049, grad_fn=<NllLossBackward0>)
tensor(3.0867, grad_fn=<NllLossBackward0>)
tensor(2.8294, grad_fn=<NllLossBackward0>)
tensor(2.0045, grad_fn=<NllLossBackward0>)
tensor(2.6670, grad_fn=<NllLossBackward0>)
tensor(3.8112, grad_fn=<NllLossBackward0>)
tensor(2.06

tensor(2.6064, grad_fn=<NllLossBackward0>)
tensor(3.0768, grad_fn=<NllLossBackward0>)
tensor(2.2139, grad_fn=<NllLossBackward0>)
tensor(2.6832, grad_fn=<NllLossBackward0>)
tensor(3.3101, grad_fn=<NllLossBackward0>)
tensor(2.2609, grad_fn=<NllLossBackward0>)
tensor(2.9985, grad_fn=<NllLossBackward0>)
tensor(3.1609, grad_fn=<NllLossBackward0>)
tensor(2.7235, grad_fn=<NllLossBackward0>)
tensor(2.8971, grad_fn=<NllLossBackward0>)
tensor(3.9172, grad_fn=<NllLossBackward0>)
tensor(3.6775, grad_fn=<NllLossBackward0>)
tensor(3.0592, grad_fn=<NllLossBackward0>)
tensor(2.4472, grad_fn=<NllLossBackward0>)
tensor(2.2918, grad_fn=<NllLossBackward0>)
tensor(2.2230, grad_fn=<NllLossBackward0>)
tensor(4.4489, grad_fn=<NllLossBackward0>)
tensor(2.7590, grad_fn=<NllLossBackward0>)
tensor(3.5639, grad_fn=<NllLossBackward0>)
tensor(2.3841, grad_fn=<NllLossBackward0>)
tensor(1.7154, grad_fn=<NllLossBackward0>)
tensor(3.3956, grad_fn=<NllLossBackward0>)
tensor(2.3443, grad_fn=<NllLossBackward0>)
tensor(2.88

tensor(2.4868, grad_fn=<NllLossBackward0>)
tensor(2.8122, grad_fn=<NllLossBackward0>)
tensor(2.6223, grad_fn=<NllLossBackward0>)
tensor(3.7826, grad_fn=<NllLossBackward0>)
tensor(3.1016, grad_fn=<NllLossBackward0>)
tensor(2.0000, grad_fn=<NllLossBackward0>)
tensor(3.3886, grad_fn=<NllLossBackward0>)
tensor(3.1843, grad_fn=<NllLossBackward0>)
tensor(2.8094, grad_fn=<NllLossBackward0>)
tensor(2.3865, grad_fn=<NllLossBackward0>)
tensor(4.1151, grad_fn=<NllLossBackward0>)
tensor(1.7296, grad_fn=<NllLossBackward0>)
tensor(3.2151, grad_fn=<NllLossBackward0>)
tensor(2.4436, grad_fn=<NllLossBackward0>)
tensor(3.3461, grad_fn=<NllLossBackward0>)
tensor(2.6731, grad_fn=<NllLossBackward0>)
tensor(3.7212, grad_fn=<NllLossBackward0>)
tensor(2.5487, grad_fn=<NllLossBackward0>)
tensor(4.0366, grad_fn=<NllLossBackward0>)
tensor(4.0263, grad_fn=<NllLossBackward0>)
tensor(2.0714, grad_fn=<NllLossBackward0>)
tensor(3.5986, grad_fn=<NllLossBackward0>)
tensor(2.9955, grad_fn=<NllLossBackward0>)
tensor(3.65

tensor(2.6504, grad_fn=<NllLossBackward0>)
tensor(2.8725, grad_fn=<NllLossBackward0>)
tensor(1.7243, grad_fn=<NllLossBackward0>)
tensor(2.4910, grad_fn=<NllLossBackward0>)
tensor(1.7214, grad_fn=<NllLossBackward0>)
tensor(3.0557, grad_fn=<NllLossBackward0>)
tensor(2.7110, grad_fn=<NllLossBackward0>)
tensor(2.6664, grad_fn=<NllLossBackward0>)
tensor(3.8168, grad_fn=<NllLossBackward0>)
tensor(2.7279, grad_fn=<NllLossBackward0>)
tensor(2.6654, grad_fn=<NllLossBackward0>)
tensor(2.8689, grad_fn=<NllLossBackward0>)
tensor(3.5433, grad_fn=<NllLossBackward0>)
tensor(4.6764, grad_fn=<NllLossBackward0>)
tensor(2.7376, grad_fn=<NllLossBackward0>)
tensor(2.3351, grad_fn=<NllLossBackward0>)
tensor(3.0311, grad_fn=<NllLossBackward0>)
tensor(3.3009, grad_fn=<NllLossBackward0>)
tensor(2.2221, grad_fn=<NllLossBackward0>)
tensor(2.9995, grad_fn=<NllLossBackward0>)
tensor(2.8636, grad_fn=<NllLossBackward0>)
tensor(2.9881, grad_fn=<NllLossBackward0>)
tensor(2.0578, grad_fn=<NllLossBackward0>)
tensor(1.72

tensor(3.4622, grad_fn=<NllLossBackward0>)
tensor(3.0402, grad_fn=<NllLossBackward0>)
tensor(3.5751, grad_fn=<NllLossBackward0>)
tensor(4.3968, grad_fn=<NllLossBackward0>)
tensor(2.9854, grad_fn=<NllLossBackward0>)
tensor(2.3451, grad_fn=<NllLossBackward0>)
tensor(2.0035, grad_fn=<NllLossBackward0>)
tensor(3.2261, grad_fn=<NllLossBackward0>)
tensor(3.1507, grad_fn=<NllLossBackward0>)
tensor(3.7020, grad_fn=<NllLossBackward0>)
tensor(3.1758, grad_fn=<NllLossBackward0>)
tensor(3.5792, grad_fn=<NllLossBackward0>)
tensor(1.7500, grad_fn=<NllLossBackward0>)
tensor(3.0362, grad_fn=<NllLossBackward0>)
tensor(2.9820, grad_fn=<NllLossBackward0>)
tensor(3.8668, grad_fn=<NllLossBackward0>)
tensor(3.1173, grad_fn=<NllLossBackward0>)
tensor(3.2068, grad_fn=<NllLossBackward0>)
tensor(3.6561, grad_fn=<NllLossBackward0>)
tensor(2.3278, grad_fn=<NllLossBackward0>)
tensor(3.6182, grad_fn=<NllLossBackward0>)
tensor(3.3821, grad_fn=<NllLossBackward0>)
tensor(3.3016, grad_fn=<NllLossBackward0>)
tensor(2.89

tensor(3.1576, grad_fn=<NllLossBackward0>)
tensor(3.7675, grad_fn=<NllLossBackward0>)
tensor(1.7646, grad_fn=<NllLossBackward0>)
tensor(2.8196, grad_fn=<NllLossBackward0>)
tensor(2.9645, grad_fn=<NllLossBackward0>)
tensor(2.6190, grad_fn=<NllLossBackward0>)
tensor(3.5350, grad_fn=<NllLossBackward0>)
tensor(2.6629, grad_fn=<NllLossBackward0>)
tensor(2.3464, grad_fn=<NllLossBackward0>)
tensor(2.6951, grad_fn=<NllLossBackward0>)
tensor(3.5580, grad_fn=<NllLossBackward0>)
tensor(3.4748, grad_fn=<NllLossBackward0>)
tensor(2.8833, grad_fn=<NllLossBackward0>)
tensor(2.3530, grad_fn=<NllLossBackward0>)
tensor(2.2294, grad_fn=<NllLossBackward0>)
tensor(2.3915, grad_fn=<NllLossBackward0>)
tensor(3.1085, grad_fn=<NllLossBackward0>)
tensor(3.8102, grad_fn=<NllLossBackward0>)
tensor(2.3479, grad_fn=<NllLossBackward0>)
tensor(3.6093, grad_fn=<NllLossBackward0>)
tensor(2.5483, grad_fn=<NllLossBackward0>)
tensor(3.5352, grad_fn=<NllLossBackward0>)
tensor(2.3439, grad_fn=<NllLossBackward0>)
tensor(2.73

KeyboardInterrupt: 