# Dataloading 01

In this notebook, we'll figure out how to use PyTorch's DataLoader class to load our massive files without reading the entirety of them into memory

In [1]:
import comet_ml
import dask.dataframe as dd
import pandas as pd 
import torch
import linecache 
import csv
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import torch.nn.functional as F
import sys, os
from pathlib import Path
import plotly.express as px 
from sklearn.utils.class_weight import compute_class_weight
import torch

sys.path.append('../src')
sys.path.append('../src/models/lib')
here = Path().cwd()

We'll first design a custom dataset to use with PyTorch's `DataLoader` class

In [2]:
from models.lib.neural import *

In [3]:
primary = GeneExpressionData(
    filename='../data/processed/primary.csv',
    labelname='../data/processed/meta_primary_labels.csv',
    class_label='Subtype'
)

In [4]:
class Model(nn.Module):
    def __init__(self, inputs, outputs):
        super(Model, self).__init__()

        layers = [
            nn.Linear(1024, 1024),
            nn.ReLU(),
            # nn.Dropout(0.5),
            nn.BatchNorm1d(1024, 1024),
        ]

        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(inputs, 1024),
            *layers,
            nn.Linear(1024, outputs),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = Model(
    inputs = primary.num_features(),
    outputs = primary.num_labels(),
)

In [6]:
train_size = int(0.80 * len(primary))
test_size = len(primary) - train_size

train, test = torch.utils.data.random_split(primary, [train_size, test_size])

trainloader = DataLoader(
    train, 
    batch_size=2,
)

validloader = DataLoader(
    test, 
    batch_size=2, 
)

In [7]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)


In [9]:
epochs = 5
min_valid_loss = np.inf

for e in range(epochs):
    train_loss = 0.0
    model.train()     # Optional when not using Model Specific layer
    for data, labels in trainloader:
        print(loss)
        optimizer.zero_grad()
        target = model(data)
        loss = criterion(target,labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    valid_loss = 0.0
    model.eval() # Optional when not using Model Specific layer
    for data, labels in validloader:
        target = model(data)
        loss = criterion(target,labels)
        valid_loss = loss.item() * data.size(0)

    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainloader)} \t\t Validation Loss: {valid_loss / len(validloader)}')
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model.pth')

tensor(1.7818, grad_fn=<NllLossBackward0>)
tensor(3.2780, grad_fn=<NllLossBackward0>)
tensor(2.8724, grad_fn=<NllLossBackward0>)
tensor(2.6151, grad_fn=<NllLossBackward0>)
tensor(3.5795, grad_fn=<NllLossBackward0>)
tensor(3.8546, grad_fn=<NllLossBackward0>)
tensor(2.8463, grad_fn=<NllLossBackward0>)
tensor(2.2711, grad_fn=<NllLossBackward0>)
tensor(3.1268, grad_fn=<NllLossBackward0>)
tensor(4.5536, grad_fn=<NllLossBackward0>)
tensor(1.7892, grad_fn=<NllLossBackward0>)
tensor(2.7816, grad_fn=<NllLossBackward0>)
tensor(2.9241, grad_fn=<NllLossBackward0>)
tensor(2.2694, grad_fn=<NllLossBackward0>)
tensor(3.4514, grad_fn=<NllLossBackward0>)
tensor(3.4947, grad_fn=<NllLossBackward0>)
tensor(3.8730, grad_fn=<NllLossBackward0>)
tensor(3.3989, grad_fn=<NllLossBackward0>)
tensor(3.6261, grad_fn=<NllLossBackward0>)
tensor(2.6731, grad_fn=<NllLossBackward0>)
tensor(2.3506, grad_fn=<NllLossBackward0>)
tensor(3.0805, grad_fn=<NllLossBackward0>)
tensor(3.3627, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(4.1568, grad_fn=<NllLossBackward0>)
tensor(2.9650, grad_fn=<NllLossBackward0>)
tensor(4.0574, grad_fn=<NllLossBackward0>)
tensor(2.3564, grad_fn=<NllLossBackward0>)
tensor(2.7719, grad_fn=<NllLossBackward0>)
tensor(2.4207, grad_fn=<NllLossBackward0>)
tensor(1.7640, grad_fn=<NllLossBackward0>)
tensor(2.6288, grad_fn=<NllLossBackward0>)
tensor(3.1793, grad_fn=<NllLossBackward0>)
tensor(2.9968, grad_fn=<NllLossBackward0>)
tensor(1.7625, grad_fn=<NllLossBackward0>)
tensor(2.4609, grad_fn=<NllLossBackward0>)
tensor(3.4735, grad_fn=<NllLossBackward0>)
tensor(3.3488, grad_fn=<NllLossBackward0>)
tensor(2.5613, grad_fn=<NllLossBackward0>)
tensor(3.1302, grad_fn=<NllLossBackward0>)
tensor(3.2291, grad_fn=<NllLossBackward0>)
tensor(3.2759, grad_fn=<NllLossBackward0>)
tensor(2.4619, grad_fn=<NllLossBackward0>)
tensor(2.3560, grad_fn=<NllLossBackward0>)
tensor(2.2400, grad_fn=<NllLossBackward0>)
tensor(2.8507, grad_fn=<NllLossBackward0>)
tensor(3.0562, grad_fn=<NllLossBackward0>)
tensor(3.20

tensor(2.4076, grad_fn=<NllLossBackward0>)
tensor(2.5231, grad_fn=<NllLossBackward0>)
tensor(3.4504, grad_fn=<NllLossBackward0>)
tensor(2.3478, grad_fn=<NllLossBackward0>)
tensor(2.6931, grad_fn=<NllLossBackward0>)
tensor(3.5171, grad_fn=<NllLossBackward0>)
tensor(2.0212, grad_fn=<NllLossBackward0>)
tensor(4.3053, grad_fn=<NllLossBackward0>)
tensor(2.9315, grad_fn=<NllLossBackward0>)
tensor(2.2394, grad_fn=<NllLossBackward0>)
tensor(2.8463, grad_fn=<NllLossBackward0>)
tensor(3.4043, grad_fn=<NllLossBackward0>)
tensor(3.3470, grad_fn=<NllLossBackward0>)
tensor(2.2409, grad_fn=<NllLossBackward0>)
tensor(3.7083, grad_fn=<NllLossBackward0>)
tensor(2.9386, grad_fn=<NllLossBackward0>)
tensor(2.3469, grad_fn=<NllLossBackward0>)
tensor(2.2976, grad_fn=<NllLossBackward0>)
tensor(3.6301, grad_fn=<NllLossBackward0>)
tensor(3.9385, grad_fn=<NllLossBackward0>)
tensor(3.7339, grad_fn=<NllLossBackward0>)
tensor(2.2920, grad_fn=<NllLossBackward0>)
tensor(2.7764, grad_fn=<NllLossBackward0>)
tensor(2.83

tensor(2.6081, grad_fn=<NllLossBackward0>)
tensor(3.1047, grad_fn=<NllLossBackward0>)
tensor(2.9596, grad_fn=<NllLossBackward0>)
tensor(2.9273, grad_fn=<NllLossBackward0>)
tensor(4.4333, grad_fn=<NllLossBackward0>)
tensor(3.3991, grad_fn=<NllLossBackward0>)
tensor(3.2989, grad_fn=<NllLossBackward0>)
tensor(2.9322, grad_fn=<NllLossBackward0>)
tensor(3.2424, grad_fn=<NllLossBackward0>)
tensor(3.3871, grad_fn=<NllLossBackward0>)
tensor(2.7177, grad_fn=<NllLossBackward0>)
tensor(1.7274, grad_fn=<NllLossBackward0>)
tensor(3.0285, grad_fn=<NllLossBackward0>)
tensor(1.7242, grad_fn=<NllLossBackward0>)
tensor(3.5246, grad_fn=<NllLossBackward0>)
tensor(1.7212, grad_fn=<NllLossBackward0>)
tensor(3.3237, grad_fn=<NllLossBackward0>)
tensor(3.2294, grad_fn=<NllLossBackward0>)
tensor(3.0116, grad_fn=<NllLossBackward0>)
tensor(4.2135, grad_fn=<NllLossBackward0>)
tensor(3.5521, grad_fn=<NllLossBackward0>)
tensor(2.3476, grad_fn=<NllLossBackward0>)
tensor(3.0555, grad_fn=<NllLossBackward0>)
tensor(2.07

tensor(3.3489, grad_fn=<NllLossBackward0>)
tensor(2.0811, grad_fn=<NllLossBackward0>)
tensor(2.4481, grad_fn=<NllLossBackward0>)
tensor(3.5642, grad_fn=<NllLossBackward0>)
tensor(3.8397, grad_fn=<NllLossBackward0>)
tensor(3.2600, grad_fn=<NllLossBackward0>)
tensor(2.8991, grad_fn=<NllLossBackward0>)
tensor(3.2657, grad_fn=<NllLossBackward0>)
tensor(3.0920, grad_fn=<NllLossBackward0>)
tensor(2.0208, grad_fn=<NllLossBackward0>)
tensor(3.1198, grad_fn=<NllLossBackward0>)
tensor(2.3380, grad_fn=<NllLossBackward0>)
tensor(4.0167, grad_fn=<NllLossBackward0>)
tensor(2.4462, grad_fn=<NllLossBackward0>)
tensor(4.5967, grad_fn=<NllLossBackward0>)
tensor(3.4705, grad_fn=<NllLossBackward0>)
tensor(3.5595, grad_fn=<NllLossBackward0>)
tensor(3.0387, grad_fn=<NllLossBackward0>)
tensor(3.2631, grad_fn=<NllLossBackward0>)
tensor(3.1216, grad_fn=<NllLossBackward0>)
tensor(2.3834, grad_fn=<NllLossBackward0>)
tensor(2.4234, grad_fn=<NllLossBackward0>)
tensor(2.3916, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(3.0187, grad_fn=<NllLossBackward0>)
tensor(1.7363, grad_fn=<NllLossBackward0>)
tensor(3.5314, grad_fn=<NllLossBackward0>)
tensor(4.0798, grad_fn=<NllLossBackward0>)
tensor(2.3612, grad_fn=<NllLossBackward0>)
tensor(2.6275, grad_fn=<NllLossBackward0>)
tensor(2.6065, grad_fn=<NllLossBackward0>)
tensor(2.6520, grad_fn=<NllLossBackward0>)
tensor(2.8337, grad_fn=<NllLossBackward0>)
tensor(3.4341, grad_fn=<NllLossBackward0>)
tensor(3.5482, grad_fn=<NllLossBackward0>)
tensor(2.8352, grad_fn=<NllLossBackward0>)
tensor(2.4838, grad_fn=<NllLossBackward0>)
tensor(2.5128, grad_fn=<NllLossBackward0>)
tensor(2.4447, grad_fn=<NllLossBackward0>)
tensor(2.4155, grad_fn=<NllLossBackward0>)
tensor(2.4482, grad_fn=<NllLossBackward0>)
tensor(3.4311, grad_fn=<NllLossBackward0>)
tensor(3.6192, grad_fn=<NllLossBackward0>)
tensor(2.5692, grad_fn=<NllLossBackward0>)
tensor(2.7240, grad_fn=<NllLossBackward0>)
tensor(2.5110, grad_fn=<NllLossBackward0>)
tensor(2.0175, grad_fn=<NllLossBackward0>)
tensor(3.01

tensor(2.3421, grad_fn=<NllLossBackward0>)
tensor(2.7822, grad_fn=<NllLossBackward0>)
tensor(3.0005, grad_fn=<NllLossBackward0>)
tensor(3.4111, grad_fn=<NllLossBackward0>)
tensor(3.6717, grad_fn=<NllLossBackward0>)
tensor(2.5844, grad_fn=<NllLossBackward0>)
tensor(3.0616, grad_fn=<NllLossBackward0>)
tensor(2.6867, grad_fn=<NllLossBackward0>)
tensor(2.6548, grad_fn=<NllLossBackward0>)
tensor(2.4976, grad_fn=<NllLossBackward0>)
tensor(2.9097, grad_fn=<NllLossBackward0>)
tensor(3.0105, grad_fn=<NllLossBackward0>)
tensor(2.4111, grad_fn=<NllLossBackward0>)
tensor(2.5725, grad_fn=<NllLossBackward0>)
tensor(3.9928, grad_fn=<NllLossBackward0>)
tensor(3.8777, grad_fn=<NllLossBackward0>)
tensor(3.4768, grad_fn=<NllLossBackward0>)
tensor(2.0054, grad_fn=<NllLossBackward0>)
tensor(3.1256, grad_fn=<NllLossBackward0>)
tensor(2.8445, grad_fn=<NllLossBackward0>)
tensor(2.7888, grad_fn=<NllLossBackward0>)
tensor(2.9469, grad_fn=<NllLossBackward0>)
tensor(3.1944, grad_fn=<NllLossBackward0>)
tensor(2.24

tensor(2.9404, grad_fn=<NllLossBackward0>)
tensor(2.3038, grad_fn=<NllLossBackward0>)
tensor(3.2287, grad_fn=<NllLossBackward0>)
tensor(2.8780, grad_fn=<NllLossBackward0>)
tensor(2.9049, grad_fn=<NllLossBackward0>)
tensor(2.3139, grad_fn=<NllLossBackward0>)
tensor(4.0600, grad_fn=<NllLossBackward0>)
tensor(4.7998, grad_fn=<NllLossBackward0>)
tensor(3.2236, grad_fn=<NllLossBackward0>)
tensor(2.4281, grad_fn=<NllLossBackward0>)
tensor(2.0863, grad_fn=<NllLossBackward0>)
tensor(2.0886, grad_fn=<NllLossBackward0>)
tensor(2.7788, grad_fn=<NllLossBackward0>)
tensor(3.1134, grad_fn=<NllLossBackward0>)
tensor(3.8943, grad_fn=<NllLossBackward0>)
tensor(3.5615, grad_fn=<NllLossBackward0>)
tensor(2.6306, grad_fn=<NllLossBackward0>)
tensor(3.5793, grad_fn=<NllLossBackward0>)
tensor(2.0209, grad_fn=<NllLossBackward0>)
tensor(2.5891, grad_fn=<NllLossBackward0>)
tensor(2.9386, grad_fn=<NllLossBackward0>)
tensor(2.9606, grad_fn=<NllLossBackward0>)
tensor(2.3005, grad_fn=<NllLossBackward0>)
tensor(3.34

tensor(3.3798, grad_fn=<NllLossBackward0>)
tensor(2.7597, grad_fn=<NllLossBackward0>)
tensor(2.4098, grad_fn=<NllLossBackward0>)
tensor(4.1378, grad_fn=<NllLossBackward0>)
tensor(2.2891, grad_fn=<NllLossBackward0>)
tensor(2.4807, grad_fn=<NllLossBackward0>)
tensor(3.1773, grad_fn=<NllLossBackward0>)
tensor(3.4748, grad_fn=<NllLossBackward0>)
tensor(3.5122, grad_fn=<NllLossBackward0>)
tensor(4.2953, grad_fn=<NllLossBackward0>)
tensor(1.7455, grad_fn=<NllLossBackward0>)
tensor(2.3677, grad_fn=<NllLossBackward0>)
tensor(3.6356, grad_fn=<NllLossBackward0>)
tensor(4.9019, grad_fn=<NllLossBackward0>)
tensor(2.3075, grad_fn=<NllLossBackward0>)
tensor(3.5348, grad_fn=<NllLossBackward0>)
tensor(2.3535, grad_fn=<NllLossBackward0>)
tensor(2.0886, grad_fn=<NllLossBackward0>)
tensor(3.7726, grad_fn=<NllLossBackward0>)
tensor(2.2856, grad_fn=<NllLossBackward0>)
tensor(2.8794, grad_fn=<NllLossBackward0>)
tensor(3.5986, grad_fn=<NllLossBackward0>)
tensor(2.5175, grad_fn=<NllLossBackward0>)
tensor(3.55

tensor(3.3742, grad_fn=<NllLossBackward0>)
tensor(2.5010, grad_fn=<NllLossBackward0>)
tensor(3.8832, grad_fn=<NllLossBackward0>)
tensor(2.6251, grad_fn=<NllLossBackward0>)
tensor(3.4325, grad_fn=<NllLossBackward0>)
tensor(3.1856, grad_fn=<NllLossBackward0>)
tensor(3.2242, grad_fn=<NllLossBackward0>)
tensor(3.8822, grad_fn=<NllLossBackward0>)
tensor(2.3525, grad_fn=<NllLossBackward0>)
tensor(2.0021, grad_fn=<NllLossBackward0>)
tensor(2.7130, grad_fn=<NllLossBackward0>)
tensor(2.9333, grad_fn=<NllLossBackward0>)
tensor(2.2987, grad_fn=<NllLossBackward0>)
tensor(3.4665, grad_fn=<NllLossBackward0>)
tensor(2.7069, grad_fn=<NllLossBackward0>)
tensor(3.1175, grad_fn=<NllLossBackward0>)
tensor(3.4599, grad_fn=<NllLossBackward0>)
tensor(2.6909, grad_fn=<NllLossBackward0>)
tensor(2.4341, grad_fn=<NllLossBackward0>)
tensor(2.4375, grad_fn=<NllLossBackward0>)
tensor(2.7972, grad_fn=<NllLossBackward0>)
tensor(2.7239, grad_fn=<NllLossBackward0>)
tensor(2.4300, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(2.6756, grad_fn=<NllLossBackward0>)
tensor(3.0146, grad_fn=<NllLossBackward0>)
tensor(3.2240, grad_fn=<NllLossBackward0>)
tensor(2.7029, grad_fn=<NllLossBackward0>)
tensor(2.4425, grad_fn=<NllLossBackward0>)
tensor(2.2141, grad_fn=<NllLossBackward0>)
tensor(3.5457, grad_fn=<NllLossBackward0>)
tensor(3.0844, grad_fn=<NllLossBackward0>)
tensor(2.6490, grad_fn=<NllLossBackward0>)
tensor(3.4370, grad_fn=<NllLossBackward0>)
tensor(3.0604, grad_fn=<NllLossBackward0>)
tensor(2.5343, grad_fn=<NllLossBackward0>)
tensor(3.1612, grad_fn=<NllLossBackward0>)
tensor(1.7493, grad_fn=<NllLossBackward0>)
tensor(1.7452, grad_fn=<NllLossBackward0>)
tensor(3.5444, grad_fn=<NllLossBackward0>)
tensor(3.2174, grad_fn=<NllLossBackward0>)
tensor(1.7432, grad_fn=<NllLossBackward0>)
tensor(3.5403, grad_fn=<NllLossBackward0>)
tensor(3.4354, grad_fn=<NllLossBackward0>)
tensor(2.6424, grad_fn=<NllLossBackward0>)
tensor(3.9319, grad_fn=<NllLossBackward0>)
tensor(2.9910, grad_fn=<NllLossBackward0>)
tensor(2.83

tensor(2.7297, grad_fn=<NllLossBackward0>)
tensor(3.4511, grad_fn=<NllLossBackward0>)
tensor(3.5509, grad_fn=<NllLossBackward0>)
tensor(2.9732, grad_fn=<NllLossBackward0>)
tensor(2.9413, grad_fn=<NllLossBackward0>)
tensor(2.7966, grad_fn=<NllLossBackward0>)
tensor(3.6742, grad_fn=<NllLossBackward0>)
tensor(4.4012, grad_fn=<NllLossBackward0>)
tensor(3.2061, grad_fn=<NllLossBackward0>)
tensor(2.3057, grad_fn=<NllLossBackward0>)
tensor(3.0867, grad_fn=<NllLossBackward0>)
tensor(2.2335, grad_fn=<NllLossBackward0>)
tensor(2.4130, grad_fn=<NllLossBackward0>)
tensor(2.3715, grad_fn=<NllLossBackward0>)
tensor(2.2794, grad_fn=<NllLossBackward0>)
tensor(2.0149, grad_fn=<NllLossBackward0>)
tensor(3.4650, grad_fn=<NllLossBackward0>)
tensor(2.9435, grad_fn=<NllLossBackward0>)
tensor(2.9710, grad_fn=<NllLossBackward0>)
tensor(2.3122, grad_fn=<NllLossBackward0>)
tensor(2.2980, grad_fn=<NllLossBackward0>)
tensor(2.9560, grad_fn=<NllLossBackward0>)
tensor(3.5186, grad_fn=<NllLossBackward0>)
tensor(3.13

tensor(2.4521, grad_fn=<NllLossBackward0>)
tensor(2.4663, grad_fn=<NllLossBackward0>)
tensor(3.6780, grad_fn=<NllLossBackward0>)
tensor(2.7216, grad_fn=<NllLossBackward0>)
tensor(2.8012, grad_fn=<NllLossBackward0>)
tensor(3.1754, grad_fn=<NllLossBackward0>)
tensor(2.4346, grad_fn=<NllLossBackward0>)
tensor(3.1337, grad_fn=<NllLossBackward0>)
tensor(3.0119, grad_fn=<NllLossBackward0>)
tensor(2.9474, grad_fn=<NllLossBackward0>)
tensor(2.7083, grad_fn=<NllLossBackward0>)
tensor(3.6611, grad_fn=<NllLossBackward0>)
tensor(2.0934, grad_fn=<NllLossBackward0>)
tensor(4.3123, grad_fn=<NllLossBackward0>)
tensor(3.4099, grad_fn=<NllLossBackward0>)
tensor(2.7429, grad_fn=<NllLossBackward0>)
tensor(2.4362, grad_fn=<NllLossBackward0>)
tensor(2.0129, grad_fn=<NllLossBackward0>)
tensor(2.8416, grad_fn=<NllLossBackward0>)
tensor(2.7779, grad_fn=<NllLossBackward0>)
tensor(2.4531, grad_fn=<NllLossBackward0>)
tensor(2.6761, grad_fn=<NllLossBackward0>)
tensor(2.4570, grad_fn=<NllLossBackward0>)
tensor(2.23

tensor(2.9048, grad_fn=<NllLossBackward0>)
tensor(2.0944, grad_fn=<NllLossBackward0>)
tensor(3.5787, grad_fn=<NllLossBackward0>)
tensor(3.4127, grad_fn=<NllLossBackward0>)
tensor(2.2884, grad_fn=<NllLossBackward0>)
tensor(3.4778, grad_fn=<NllLossBackward0>)
tensor(3.2171, grad_fn=<NllLossBackward0>)
tensor(2.9144, grad_fn=<NllLossBackward0>)
tensor(2.3846, grad_fn=<NllLossBackward0>)
tensor(2.6246, grad_fn=<NllLossBackward0>)
tensor(4.3804, grad_fn=<NllLossBackward0>)
tensor(3.5262, grad_fn=<NllLossBackward0>)
tensor(2.9153, grad_fn=<NllLossBackward0>)
tensor(2.3532, grad_fn=<NllLossBackward0>)
tensor(2.6925, grad_fn=<NllLossBackward0>)
tensor(3.7033, grad_fn=<NllLossBackward0>)
tensor(2.6282, grad_fn=<NllLossBackward0>)
tensor(2.8443, grad_fn=<NllLossBackward0>)
tensor(3.0234, grad_fn=<NllLossBackward0>)
tensor(2.2275, grad_fn=<NllLossBackward0>)
tensor(3.2254, grad_fn=<NllLossBackward0>)
tensor(2.4589, grad_fn=<NllLossBackward0>)
tensor(3.0017, grad_fn=<NllLossBackward0>)
tensor(2.23

tensor(2.6815, grad_fn=<NllLossBackward0>)
tensor(3.1769, grad_fn=<NllLossBackward0>)
tensor(2.5730, grad_fn=<NllLossBackward0>)
tensor(3.0168, grad_fn=<NllLossBackward0>)
tensor(2.6131, grad_fn=<NllLossBackward0>)
tensor(3.6657, grad_fn=<NllLossBackward0>)
tensor(2.4027, grad_fn=<NllLossBackward0>)
tensor(2.5527, grad_fn=<NllLossBackward0>)
tensor(3.0112, grad_fn=<NllLossBackward0>)
tensor(2.5462, grad_fn=<NllLossBackward0>)
tensor(2.2313, grad_fn=<NllLossBackward0>)
tensor(2.4766, grad_fn=<NllLossBackward0>)
tensor(3.1206, grad_fn=<NllLossBackward0>)
tensor(2.5303, grad_fn=<NllLossBackward0>)
tensor(2.7289, grad_fn=<NllLossBackward0>)
tensor(2.3609, grad_fn=<NllLossBackward0>)
tensor(2.5691, grad_fn=<NllLossBackward0>)
tensor(3.6126, grad_fn=<NllLossBackward0>)
tensor(2.7493, grad_fn=<NllLossBackward0>)
tensor(2.7466, grad_fn=<NllLossBackward0>)
tensor(2.4810, grad_fn=<NllLossBackward0>)
tensor(3.1561, grad_fn=<NllLossBackward0>)
tensor(3.0022, grad_fn=<NllLossBackward0>)
tensor(2.80

tensor(2.5291, grad_fn=<NllLossBackward0>)
tensor(2.2886, grad_fn=<NllLossBackward0>)
tensor(2.3675, grad_fn=<NllLossBackward0>)
tensor(2.6776, grad_fn=<NllLossBackward0>)
tensor(3.0827, grad_fn=<NllLossBackward0>)
tensor(3.1228, grad_fn=<NllLossBackward0>)
tensor(2.4866, grad_fn=<NllLossBackward0>)
tensor(2.7041, grad_fn=<NllLossBackward0>)
tensor(4.4677, grad_fn=<NllLossBackward0>)
tensor(2.0730, grad_fn=<NllLossBackward0>)
tensor(2.6788, grad_fn=<NllLossBackward0>)
tensor(3.3764, grad_fn=<NllLossBackward0>)
tensor(3.6023, grad_fn=<NllLossBackward0>)
tensor(2.5512, grad_fn=<NllLossBackward0>)
tensor(1.7233, grad_fn=<NllLossBackward0>)
tensor(3.8681, grad_fn=<NllLossBackward0>)
tensor(3.0698, grad_fn=<NllLossBackward0>)
tensor(1.7210, grad_fn=<NllLossBackward0>)
tensor(3.3463, grad_fn=<NllLossBackward0>)
tensor(2.3499, grad_fn=<NllLossBackward0>)
tensor(2.5576, grad_fn=<NllLossBackward0>)
tensor(2.3568, grad_fn=<NllLossBackward0>)
tensor(2.8190, grad_fn=<NllLossBackward0>)
tensor(2.77

tensor(3.6163, grad_fn=<NllLossBackward0>)
tensor(2.6718, grad_fn=<NllLossBackward0>)
tensor(2.7027, grad_fn=<NllLossBackward0>)
tensor(2.9406, grad_fn=<NllLossBackward0>)
tensor(2.4504, grad_fn=<NllLossBackward0>)
tensor(3.0074, grad_fn=<NllLossBackward0>)
tensor(2.4301, grad_fn=<NllLossBackward0>)
tensor(3.0688, grad_fn=<NllLossBackward0>)
tensor(2.5647, grad_fn=<NllLossBackward0>)
tensor(3.4979, grad_fn=<NllLossBackward0>)
tensor(2.6917, grad_fn=<NllLossBackward0>)
tensor(2.5483, grad_fn=<NllLossBackward0>)
tensor(2.0732, grad_fn=<NllLossBackward0>)
tensor(3.0629, grad_fn=<NllLossBackward0>)
tensor(2.8510, grad_fn=<NllLossBackward0>)
tensor(1.7306, grad_fn=<NllLossBackward0>)
tensor(2.2313, grad_fn=<NllLossBackward0>)
tensor(3.4985, grad_fn=<NllLossBackward0>)
tensor(1.7258, grad_fn=<NllLossBackward0>)
tensor(3.6806, grad_fn=<NllLossBackward0>)
tensor(2.8195, grad_fn=<NllLossBackward0>)
tensor(2.4204, grad_fn=<NllLossBackward0>)
tensor(3.1498, grad_fn=<NllLossBackward0>)
tensor(3.30

tensor(2.3618, grad_fn=<NllLossBackward0>)
tensor(3.7549, grad_fn=<NllLossBackward0>)
tensor(4.2376, grad_fn=<NllLossBackward0>)
tensor(3.1521, grad_fn=<NllLossBackward0>)
tensor(3.2028, grad_fn=<NllLossBackward0>)
tensor(2.7381, grad_fn=<NllLossBackward0>)
tensor(1.9841, grad_fn=<NllLossBackward0>)
tensor(3.8699, grad_fn=<NllLossBackward0>)
tensor(2.7553, grad_fn=<NllLossBackward0>)
tensor(4.1419, grad_fn=<NllLossBackward0>)
tensor(2.9446, grad_fn=<NllLossBackward0>)
tensor(2.2772, grad_fn=<NllLossBackward0>)
tensor(2.8603, grad_fn=<NllLossBackward0>)
tensor(3.6339, grad_fn=<NllLossBackward0>)
tensor(3.9647, grad_fn=<NllLossBackward0>)
tensor(2.7727, grad_fn=<NllLossBackward0>)
tensor(3.0076, grad_fn=<NllLossBackward0>)
tensor(2.6307, grad_fn=<NllLossBackward0>)
tensor(2.6164, grad_fn=<NllLossBackward0>)
tensor(2.4131, grad_fn=<NllLossBackward0>)
tensor(3.0792, grad_fn=<NllLossBackward0>)
tensor(3.0895, grad_fn=<NllLossBackward0>)
tensor(2.0686, grad_fn=<NllLossBackward0>)
tensor(3.51

tensor(2.8270, grad_fn=<NllLossBackward0>)
tensor(2.9727, grad_fn=<NllLossBackward0>)
tensor(2.4110, grad_fn=<NllLossBackward0>)
tensor(2.4704, grad_fn=<NllLossBackward0>)
tensor(3.3510, grad_fn=<NllLossBackward0>)
tensor(2.7302, grad_fn=<NllLossBackward0>)
tensor(3.5783, grad_fn=<NllLossBackward0>)
tensor(3.1826, grad_fn=<NllLossBackward0>)
tensor(2.3371, grad_fn=<NllLossBackward0>)
tensor(2.6844, grad_fn=<NllLossBackward0>)
tensor(2.5949, grad_fn=<NllLossBackward0>)
tensor(3.8794, grad_fn=<NllLossBackward0>)
tensor(4.0703, grad_fn=<NllLossBackward0>)
tensor(2.3303, grad_fn=<NllLossBackward0>)
tensor(2.8409, grad_fn=<NllLossBackward0>)
tensor(1.9893, grad_fn=<NllLossBackward0>)
tensor(3.1616, grad_fn=<NllLossBackward0>)
tensor(3.4851, grad_fn=<NllLossBackward0>)
tensor(4.4185, grad_fn=<NllLossBackward0>)
tensor(2.3319, grad_fn=<NllLossBackward0>)
tensor(1.7217, grad_fn=<NllLossBackward0>)
tensor(2.3768, grad_fn=<NllLossBackward0>)
tensor(2.7658, grad_fn=<NllLossBackward0>)
tensor(2.66

tensor(3.2892, grad_fn=<NllLossBackward0>)
tensor(2.5225, grad_fn=<NllLossBackward0>)
tensor(3.3390, grad_fn=<NllLossBackward0>)
tensor(2.4329, grad_fn=<NllLossBackward0>)
tensor(2.4220, grad_fn=<NllLossBackward0>)
tensor(3.1768, grad_fn=<NllLossBackward0>)
tensor(2.2193, grad_fn=<NllLossBackward0>)
tensor(2.6422, grad_fn=<NllLossBackward0>)
tensor(2.8478, grad_fn=<NllLossBackward0>)
tensor(3.2127, grad_fn=<NllLossBackward0>)
tensor(3.5154, grad_fn=<NllLossBackward0>)
tensor(1.9981, grad_fn=<NllLossBackward0>)
tensor(3.4235, grad_fn=<NllLossBackward0>)
tensor(2.2359, grad_fn=<NllLossBackward0>)
tensor(3.5193, grad_fn=<NllLossBackward0>)
tensor(2.7192, grad_fn=<NllLossBackward0>)
tensor(2.3105, grad_fn=<NllLossBackward0>)
tensor(3.2086, grad_fn=<NllLossBackward0>)
tensor(2.8428, grad_fn=<NllLossBackward0>)
tensor(3.0077, grad_fn=<NllLossBackward0>)
tensor(2.6822, grad_fn=<NllLossBackward0>)
tensor(3.6326, grad_fn=<NllLossBackward0>)
tensor(4.7838, grad_fn=<NllLossBackward0>)
tensor(3.42

tensor(3.3184, grad_fn=<NllLossBackward0>)
tensor(3.0714, grad_fn=<NllLossBackward0>)
tensor(4.1090, grad_fn=<NllLossBackward0>)
tensor(3.4211, grad_fn=<NllLossBackward0>)
tensor(2.3851, grad_fn=<NllLossBackward0>)
tensor(2.0010, grad_fn=<NllLossBackward0>)
tensor(2.4856, grad_fn=<NllLossBackward0>)
tensor(2.7270, grad_fn=<NllLossBackward0>)
tensor(2.9201, grad_fn=<NllLossBackward0>)
tensor(2.9157, grad_fn=<NllLossBackward0>)
tensor(3.8808, grad_fn=<NllLossBackward0>)
tensor(2.9668, grad_fn=<NllLossBackward0>)
tensor(2.9171, grad_fn=<NllLossBackward0>)
tensor(3.8972, grad_fn=<NllLossBackward0>)
tensor(3.1922, grad_fn=<NllLossBackward0>)
tensor(2.9167, grad_fn=<NllLossBackward0>)
tensor(2.4630, grad_fn=<NllLossBackward0>)
tensor(2.3799, grad_fn=<NllLossBackward0>)
tensor(1.7589, grad_fn=<NllLossBackward0>)
tensor(3.0970, grad_fn=<NllLossBackward0>)
tensor(2.4538, grad_fn=<NllLossBackward0>)
tensor(3.2161, grad_fn=<NllLossBackward0>)
tensor(3.0729, grad_fn=<NllLossBackward0>)
tensor(2.51

tensor(2.9372, grad_fn=<NllLossBackward0>)
tensor(3.5078, grad_fn=<NllLossBackward0>)
tensor(2.6673, grad_fn=<NllLossBackward0>)
tensor(3.9923, grad_fn=<NllLossBackward0>)
tensor(2.0038, grad_fn=<NllLossBackward0>)
tensor(3.3265, grad_fn=<NllLossBackward0>)
tensor(2.8320, grad_fn=<NllLossBackward0>)
tensor(1.7491, grad_fn=<NllLossBackward0>)
tensor(3.0180, grad_fn=<NllLossBackward0>)
tensor(2.7916, grad_fn=<NllLossBackward0>)
tensor(2.4791, grad_fn=<NllLossBackward0>)
tensor(2.6919, grad_fn=<NllLossBackward0>)
tensor(3.0575, grad_fn=<NllLossBackward0>)
tensor(2.2266, grad_fn=<NllLossBackward0>)
tensor(3.1082, grad_fn=<NllLossBackward0>)
tensor(2.3287, grad_fn=<NllLossBackward0>)
tensor(4.0238, grad_fn=<NllLossBackward0>)
tensor(2.5729, grad_fn=<NllLossBackward0>)
tensor(3.0026, grad_fn=<NllLossBackward0>)
tensor(2.4074, grad_fn=<NllLossBackward0>)
tensor(2.8208, grad_fn=<NllLossBackward0>)
tensor(2.6024, grad_fn=<NllLossBackward0>)
tensor(3.6610, grad_fn=<NllLossBackward0>)
tensor(2.70

tensor(2.6261, grad_fn=<NllLossBackward0>)
tensor(2.8701, grad_fn=<NllLossBackward0>)
tensor(3.5084, grad_fn=<NllLossBackward0>)
tensor(3.7027, grad_fn=<NllLossBackward0>)
tensor(2.4059, grad_fn=<NllLossBackward0>)
tensor(3.4146, grad_fn=<NllLossBackward0>)
tensor(3.0517, grad_fn=<NllLossBackward0>)
tensor(3.3156, grad_fn=<NllLossBackward0>)
tensor(3.5204, grad_fn=<NllLossBackward0>)
tensor(2.9213, grad_fn=<NllLossBackward0>)
tensor(2.2300, grad_fn=<NllLossBackward0>)
tensor(2.3233, grad_fn=<NllLossBackward0>)
tensor(3.0477, grad_fn=<NllLossBackward0>)
tensor(2.9528, grad_fn=<NllLossBackward0>)
tensor(3.3946, grad_fn=<NllLossBackward0>)
tensor(2.9489, grad_fn=<NllLossBackward0>)
tensor(2.5375, grad_fn=<NllLossBackward0>)
tensor(3.4221, grad_fn=<NllLossBackward0>)
tensor(2.3433, grad_fn=<NllLossBackward0>)
tensor(3.5757, grad_fn=<NllLossBackward0>)
tensor(2.9851, grad_fn=<NllLossBackward0>)
tensor(3.1353, grad_fn=<NllLossBackward0>)
tensor(2.0145, grad_fn=<NllLossBackward0>)
tensor(2.08

tensor(2.8088, grad_fn=<NllLossBackward0>)
tensor(3.9555, grad_fn=<NllLossBackward0>)
tensor(3.8552, grad_fn=<NllLossBackward0>)
tensor(2.3278, grad_fn=<NllLossBackward0>)
tensor(4.3553, grad_fn=<NllLossBackward0>)
tensor(1.7474, grad_fn=<NllLossBackward0>)
tensor(3.3877, grad_fn=<NllLossBackward0>)
tensor(2.7944, grad_fn=<NllLossBackward0>)
tensor(3.2487, grad_fn=<NllLossBackward0>)
tensor(3.6813, grad_fn=<NllLossBackward0>)
tensor(3.0343, grad_fn=<NllLossBackward0>)
tensor(2.7765, grad_fn=<NllLossBackward0>)
tensor(3.6275, grad_fn=<NllLossBackward0>)
tensor(3.9514, grad_fn=<NllLossBackward0>)
tensor(2.9560, grad_fn=<NllLossBackward0>)
tensor(2.6353, grad_fn=<NllLossBackward0>)
tensor(3.0622, grad_fn=<NllLossBackward0>)
tensor(2.8154, grad_fn=<NllLossBackward0>)
tensor(3.0882, grad_fn=<NllLossBackward0>)
tensor(3.3002, grad_fn=<NllLossBackward0>)
tensor(3.1162, grad_fn=<NllLossBackward0>)
tensor(2.6898, grad_fn=<NllLossBackward0>)
tensor(3.4827, grad_fn=<NllLossBackward0>)
tensor(2.28

tensor(3.0404, grad_fn=<NllLossBackward0>)
tensor(2.8992, grad_fn=<NllLossBackward0>)
tensor(2.4186, grad_fn=<NllLossBackward0>)
tensor(2.7172, grad_fn=<NllLossBackward0>)
tensor(3.1176, grad_fn=<NllLossBackward0>)
tensor(2.3181, grad_fn=<NllLossBackward0>)
tensor(2.0607, grad_fn=<NllLossBackward0>)
tensor(2.9803, grad_fn=<NllLossBackward0>)
tensor(2.4284, grad_fn=<NllLossBackward0>)
tensor(2.9050, grad_fn=<NllLossBackward0>)
tensor(2.8826, grad_fn=<NllLossBackward0>)
tensor(3.1047, grad_fn=<NllLossBackward0>)
tensor(2.8606, grad_fn=<NllLossBackward0>)
tensor(3.2563, grad_fn=<NllLossBackward0>)
tensor(4.2924, grad_fn=<NllLossBackward0>)
tensor(3.5964, grad_fn=<NllLossBackward0>)
tensor(2.3529, grad_fn=<NllLossBackward0>)
tensor(2.5962, grad_fn=<NllLossBackward0>)
tensor(2.6700, grad_fn=<NllLossBackward0>)
tensor(2.8774, grad_fn=<NllLossBackward0>)
tensor(2.2807, grad_fn=<NllLossBackward0>)
tensor(2.4286, grad_fn=<NllLossBackward0>)
tensor(2.6926, grad_fn=<NllLossBackward0>)
tensor(2.03

tensor(3.8535, grad_fn=<NllLossBackward0>)
tensor(3.0336, grad_fn=<NllLossBackward0>)
tensor(2.4394, grad_fn=<NllLossBackward0>)
tensor(2.4152, grad_fn=<NllLossBackward0>)
tensor(2.3093, grad_fn=<NllLossBackward0>)
tensor(2.9525, grad_fn=<NllLossBackward0>)
tensor(3.3092, grad_fn=<NllLossBackward0>)
tensor(2.4863, grad_fn=<NllLossBackward0>)
tensor(3.9417, grad_fn=<NllLossBackward0>)
tensor(2.6014, grad_fn=<NllLossBackward0>)
tensor(2.8278, grad_fn=<NllLossBackward0>)
tensor(4.7655, grad_fn=<NllLossBackward0>)
tensor(4.5529, grad_fn=<NllLossBackward0>)
tensor(2.2667, grad_fn=<NllLossBackward0>)
tensor(3.7742, grad_fn=<NllLossBackward0>)
tensor(2.2381, grad_fn=<NllLossBackward0>)
tensor(2.3293, grad_fn=<NllLossBackward0>)
tensor(2.0873, grad_fn=<NllLossBackward0>)
tensor(2.8739, grad_fn=<NllLossBackward0>)
tensor(2.4067, grad_fn=<NllLossBackward0>)
tensor(2.5145, grad_fn=<NllLossBackward0>)
tensor(2.9366, grad_fn=<NllLossBackward0>)
tensor(2.8340, grad_fn=<NllLossBackward0>)
tensor(2.96

tensor(2.0275, grad_fn=<NllLossBackward0>)
tensor(2.3100, grad_fn=<NllLossBackward0>)
tensor(3.5230, grad_fn=<NllLossBackward0>)
tensor(2.0304, grad_fn=<NllLossBackward0>)
tensor(3.0368, grad_fn=<NllLossBackward0>)
tensor(4.2912, grad_fn=<NllLossBackward0>)
tensor(3.2259, grad_fn=<NllLossBackward0>)
tensor(2.2158, grad_fn=<NllLossBackward0>)
tensor(2.9857, grad_fn=<NllLossBackward0>)
tensor(2.9094, grad_fn=<NllLossBackward0>)
tensor(3.2155, grad_fn=<NllLossBackward0>)
tensor(2.6612, grad_fn=<NllLossBackward0>)
tensor(3.6103, grad_fn=<NllLossBackward0>)
tensor(2.4711, grad_fn=<NllLossBackward0>)
tensor(3.1443, grad_fn=<NllLossBackward0>)
tensor(2.8910, grad_fn=<NllLossBackward0>)
tensor(3.1020, grad_fn=<NllLossBackward0>)
tensor(3.5436, grad_fn=<NllLossBackward0>)
tensor(4.4150, grad_fn=<NllLossBackward0>)
tensor(3.8896, grad_fn=<NllLossBackward0>)
tensor(2.0576, grad_fn=<NllLossBackward0>)
tensor(2.6834, grad_fn=<NllLossBackward0>)
tensor(3.8821, grad_fn=<NllLossBackward0>)
tensor(3.42

tensor(2.0429, grad_fn=<NllLossBackward0>)
tensor(2.9427, grad_fn=<NllLossBackward0>)
tensor(2.7492, grad_fn=<NllLossBackward0>)
tensor(3.1070, grad_fn=<NllLossBackward0>)
tensor(3.0601, grad_fn=<NllLossBackward0>)
tensor(3.2087, grad_fn=<NllLossBackward0>)
tensor(2.3085, grad_fn=<NllLossBackward0>)
tensor(2.7043, grad_fn=<NllLossBackward0>)
tensor(2.2775, grad_fn=<NllLossBackward0>)
tensor(1.7533, grad_fn=<NllLossBackward0>)
tensor(3.0919, grad_fn=<NllLossBackward0>)
tensor(2.8908, grad_fn=<NllLossBackward0>)
tensor(3.5640, grad_fn=<NllLossBackward0>)
tensor(4.1259, grad_fn=<NllLossBackward0>)
tensor(3.1734, grad_fn=<NllLossBackward0>)
tensor(2.8803, grad_fn=<NllLossBackward0>)
tensor(3.6235, grad_fn=<NllLossBackward0>)
tensor(2.5165, grad_fn=<NllLossBackward0>)
tensor(2.6314, grad_fn=<NllLossBackward0>)
tensor(2.3407, grad_fn=<NllLossBackward0>)
tensor(3.6748, grad_fn=<NllLossBackward0>)
tensor(2.8392, grad_fn=<NllLossBackward0>)
tensor(2.6807, grad_fn=<NllLossBackward0>)
tensor(3.10

tensor(3.4307, grad_fn=<NllLossBackward0>)
tensor(1.7576, grad_fn=<NllLossBackward0>)
tensor(4.3799, grad_fn=<NllLossBackward0>)
tensor(2.4534, grad_fn=<NllLossBackward0>)
tensor(3.2396, grad_fn=<NllLossBackward0>)
tensor(2.6550, grad_fn=<NllLossBackward0>)
tensor(2.9928, grad_fn=<NllLossBackward0>)
tensor(2.0402, grad_fn=<NllLossBackward0>)
tensor(2.9871, grad_fn=<NllLossBackward0>)
tensor(1.7545, grad_fn=<NllLossBackward0>)
tensor(2.9530, grad_fn=<NllLossBackward0>)
tensor(3.4280, grad_fn=<NllLossBackward0>)
tensor(2.7411, grad_fn=<NllLossBackward0>)
tensor(2.2957, grad_fn=<NllLossBackward0>)
tensor(2.6756, grad_fn=<NllLossBackward0>)
tensor(2.0183, grad_fn=<NllLossBackward0>)
tensor(3.0733, grad_fn=<NllLossBackward0>)
tensor(2.8749, grad_fn=<NllLossBackward0>)
tensor(2.2876, grad_fn=<NllLossBackward0>)
tensor(3.0592, grad_fn=<NllLossBackward0>)
tensor(3.0527, grad_fn=<NllLossBackward0>)
tensor(3.1819, grad_fn=<NllLossBackward0>)
tensor(3.2339, grad_fn=<NllLossBackward0>)
tensor(2.23

tensor(3.9080, grad_fn=<NllLossBackward0>)
tensor(3.0570, grad_fn=<NllLossBackward0>)
tensor(3.0286, grad_fn=<NllLossBackward0>)
tensor(3.0130, grad_fn=<NllLossBackward0>)
tensor(2.9832, grad_fn=<NllLossBackward0>)
tensor(2.6750, grad_fn=<NllLossBackward0>)
tensor(2.4836, grad_fn=<NllLossBackward0>)
tensor(3.6547, grad_fn=<NllLossBackward0>)
tensor(2.2797, grad_fn=<NllLossBackward0>)
tensor(2.8940, grad_fn=<NllLossBackward0>)
tensor(3.0896, grad_fn=<NllLossBackward0>)
tensor(3.1876, grad_fn=<NllLossBackward0>)
tensor(2.6580, grad_fn=<NllLossBackward0>)
tensor(2.7188, grad_fn=<NllLossBackward0>)
tensor(3.4648, grad_fn=<NllLossBackward0>)
tensor(2.6829, grad_fn=<NllLossBackward0>)
tensor(2.3061, grad_fn=<NllLossBackward0>)
tensor(2.0468, grad_fn=<NllLossBackward0>)
tensor(2.9455, grad_fn=<NllLossBackward0>)
tensor(2.4645, grad_fn=<NllLossBackward0>)
tensor(2.8562, grad_fn=<NllLossBackward0>)
tensor(2.7238, grad_fn=<NllLossBackward0>)
tensor(2.8385, grad_fn=<NllLossBackward0>)
tensor(2.96

tensor(3.1407, grad_fn=<NllLossBackward0>)
tensor(2.4543, grad_fn=<NllLossBackward0>)
tensor(4.4337, grad_fn=<NllLossBackward0>)
tensor(3.5664, grad_fn=<NllLossBackward0>)
tensor(2.2739, grad_fn=<NllLossBackward0>)
tensor(2.4514, grad_fn=<NllLossBackward0>)
tensor(2.4104, grad_fn=<NllLossBackward0>)
tensor(3.0958, grad_fn=<NllLossBackward0>)
tensor(3.3397, grad_fn=<NllLossBackward0>)
tensor(2.8752, grad_fn=<NllLossBackward0>)
tensor(3.0991, grad_fn=<NllLossBackward0>)
tensor(2.4997, grad_fn=<NllLossBackward0>)
tensor(2.7231, grad_fn=<NllLossBackward0>)
tensor(2.5671, grad_fn=<NllLossBackward0>)
tensor(3.0358, grad_fn=<NllLossBackward0>)
tensor(3.1336, grad_fn=<NllLossBackward0>)
tensor(3.4040, grad_fn=<NllLossBackward0>)
tensor(2.7121, grad_fn=<NllLossBackward0>)
tensor(2.0683, grad_fn=<NllLossBackward0>)
tensor(2.5871, grad_fn=<NllLossBackward0>)
tensor(2.0293, grad_fn=<NllLossBackward0>)
tensor(5.1683, grad_fn=<NllLossBackward0>)
tensor(3.8133, grad_fn=<NllLossBackward0>)
tensor(2.03

tensor(3.6155, grad_fn=<NllLossBackward0>)
tensor(2.7956, grad_fn=<NllLossBackward0>)
tensor(2.5201, grad_fn=<NllLossBackward0>)
tensor(3.2095, grad_fn=<NllLossBackward0>)
tensor(2.9908, grad_fn=<NllLossBackward0>)
tensor(2.4075, grad_fn=<NllLossBackward0>)
tensor(3.1349, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(4.6624, grad_fn=<NllLossBackward0>)
tensor(3.6063, grad_fn=<NllLossBackward0>)
tensor(2.9707, grad_fn=<NllLossBackward0>)
tensor(2.5521, grad_fn=<NllLossBackward0>)
tensor(4.1215, grad_fn=<NllLossBackward0>)
tensor(2.6463, grad_fn=<NllLossBackward0>)
tensor(3.4133, grad_fn=<NllLossBackward0>)
tensor(2.4213, grad_fn=<NllLossBackward0>)
tensor(3.0155, grad_fn=<NllLossBackward0>)
tensor(3.7959, grad_fn=<NllLossBackward0>)
tensor(3.1180, grad_fn=<NllLossBackward0>)
tensor(2.9932, grad_fn=<NllLossBackward0>)
tensor(3.5493, grad_fn=<NllLossBackward0>)
tensor(2.7782, grad_fn=<NllLossBackward0>)
tensor(2.8446, grad_fn=<NllLossBackward0>)
tensor(2.25

tensor(2.8639, grad_fn=<NllLossBackward0>)
tensor(3.0832, grad_fn=<NllLossBackward0>)
tensor(2.6801, grad_fn=<NllLossBackward0>)
tensor(2.2377, grad_fn=<NllLossBackward0>)
tensor(2.2672, grad_fn=<NllLossBackward0>)
tensor(3.0310, grad_fn=<NllLossBackward0>)
tensor(2.6887, grad_fn=<NllLossBackward0>)
tensor(2.5173, grad_fn=<NllLossBackward0>)
tensor(1.8141, grad_fn=<NllLossBackward0>)
tensor(2.8693, grad_fn=<NllLossBackward0>)
tensor(2.2659, grad_fn=<NllLossBackward0>)
tensor(1.8090, grad_fn=<NllLossBackward0>)
tensor(2.7002, grad_fn=<NllLossBackward0>)
tensor(2.6364, grad_fn=<NllLossBackward0>)
tensor(3.2016, grad_fn=<NllLossBackward0>)
tensor(3.0831, grad_fn=<NllLossBackward0>)
tensor(2.6054, grad_fn=<NllLossBackward0>)
tensor(3.9007, grad_fn=<NllLossBackward0>)
tensor(2.8831, grad_fn=<NllLossBackward0>)
tensor(3.9532, grad_fn=<NllLossBackward0>)
tensor(4.3915, grad_fn=<NllLossBackward0>)
tensor(2.2717, grad_fn=<NllLossBackward0>)
tensor(3.8320, grad_fn=<NllLossBackward0>)
tensor(3.10

tensor(2.5724, grad_fn=<NllLossBackward0>)
tensor(2.8736, grad_fn=<NllLossBackward0>)
tensor(3.7227, grad_fn=<NllLossBackward0>)
tensor(1.8231, grad_fn=<NllLossBackward0>)
tensor(2.4539, grad_fn=<NllLossBackward0>)
tensor(3.2324, grad_fn=<NllLossBackward0>)
tensor(3.1145, grad_fn=<NllLossBackward0>)
tensor(2.6513, grad_fn=<NllLossBackward0>)
tensor(1.8197, grad_fn=<NllLossBackward0>)
tensor(3.2310, grad_fn=<NllLossBackward0>)
tensor(2.8889, grad_fn=<NllLossBackward0>)
tensor(3.9576, grad_fn=<NllLossBackward0>)
tensor(2.9364, grad_fn=<NllLossBackward0>)
tensor(2.0183, grad_fn=<NllLossBackward0>)
tensor(1.8175, grad_fn=<NllLossBackward0>)
tensor(3.1668, grad_fn=<NllLossBackward0>)
tensor(4.0174, grad_fn=<NllLossBackward0>)
tensor(2.4744, grad_fn=<NllLossBackward0>)
tensor(4.0054, grad_fn=<NllLossBackward0>)
tensor(2.5237, grad_fn=<NllLossBackward0>)
tensor(3.0877, grad_fn=<NllLossBackward0>)
tensor(2.7345, grad_fn=<NllLossBackward0>)
tensor(3.0023, grad_fn=<NllLossBackward0>)
tensor(2.02

tensor(3.9471, grad_fn=<NllLossBackward0>)
tensor(2.4513, grad_fn=<NllLossBackward0>)
tensor(3.1250, grad_fn=<NllLossBackward0>)
tensor(4.4396, grad_fn=<NllLossBackward0>)
tensor(2.2644, grad_fn=<NllLossBackward0>)
tensor(3.5474, grad_fn=<NllLossBackward0>)
tensor(2.4152, grad_fn=<NllLossBackward0>)
tensor(2.5630, grad_fn=<NllLossBackward0>)
tensor(2.9783, grad_fn=<NllLossBackward0>)
tensor(1.8146, grad_fn=<NllLossBackward0>)
tensor(4.0240, grad_fn=<NllLossBackward0>)
tensor(3.1687, grad_fn=<NllLossBackward0>)
tensor(2.9769, grad_fn=<NllLossBackward0>)
tensor(2.8780, grad_fn=<NllLossBackward0>)
tensor(2.6334, grad_fn=<NllLossBackward0>)
tensor(3.0367, grad_fn=<NllLossBackward0>)
tensor(3.2750, grad_fn=<NllLossBackward0>)
tensor(2.7302, grad_fn=<NllLossBackward0>)
tensor(3.4577, grad_fn=<NllLossBackward0>)
tensor(3.1540, grad_fn=<NllLossBackward0>)
tensor(4.1353, grad_fn=<NllLossBackward0>)
tensor(3.1160, grad_fn=<NllLossBackward0>)
tensor(2.8492, grad_fn=<NllLossBackward0>)
tensor(1.82

tensor(3.9063, grad_fn=<NllLossBackward0>)
tensor(2.6921, grad_fn=<NllLossBackward0>)
tensor(2.2524, grad_fn=<NllLossBackward0>)
tensor(2.5747, grad_fn=<NllLossBackward0>)
tensor(2.6093, grad_fn=<NllLossBackward0>)
tensor(3.5307, grad_fn=<NllLossBackward0>)
tensor(2.0258, grad_fn=<NllLossBackward0>)
tensor(3.8814, grad_fn=<NllLossBackward0>)
tensor(3.0621, grad_fn=<NllLossBackward0>)
tensor(3.9517, grad_fn=<NllLossBackward0>)
tensor(3.0206, grad_fn=<NllLossBackward0>)
tensor(2.0181, grad_fn=<NllLossBackward0>)
tensor(2.2893, grad_fn=<NllLossBackward0>)
tensor(3.9149, grad_fn=<NllLossBackward0>)
tensor(3.0994, grad_fn=<NllLossBackward0>)
tensor(2.6166, grad_fn=<NllLossBackward0>)
tensor(2.3524, grad_fn=<NllLossBackward0>)
tensor(2.9583, grad_fn=<NllLossBackward0>)
tensor(2.8747, grad_fn=<NllLossBackward0>)
tensor(2.8794, grad_fn=<NllLossBackward0>)
tensor(4.0371, grad_fn=<NllLossBackward0>)
tensor(4.1989, grad_fn=<NllLossBackward0>)
tensor(3.6393, grad_fn=<NllLossBackward0>)
tensor(2.40

tensor(2.4600, grad_fn=<NllLossBackward0>)
tensor(2.0156, grad_fn=<NllLossBackward0>)
tensor(2.4371, grad_fn=<NllLossBackward0>)
tensor(2.9544, grad_fn=<NllLossBackward0>)
tensor(2.4025, grad_fn=<NllLossBackward0>)
tensor(2.2979, grad_fn=<NllLossBackward0>)
tensor(3.8915, grad_fn=<NllLossBackward0>)
tensor(2.4931, grad_fn=<NllLossBackward0>)
tensor(2.2548, grad_fn=<NllLossBackward0>)
tensor(2.2247, grad_fn=<NllLossBackward0>)
tensor(3.0579, grad_fn=<NllLossBackward0>)
tensor(2.4850, grad_fn=<NllLossBackward0>)
tensor(3.7287, grad_fn=<NllLossBackward0>)
tensor(3.0646, grad_fn=<NllLossBackward0>)
tensor(2.4112, grad_fn=<NllLossBackward0>)
tensor(3.0207, grad_fn=<NllLossBackward0>)
tensor(2.2930, grad_fn=<NllLossBackward0>)
tensor(2.9971, grad_fn=<NllLossBackward0>)
tensor(3.0257, grad_fn=<NllLossBackward0>)
tensor(2.0805, grad_fn=<NllLossBackward0>)
tensor(2.9946, grad_fn=<NllLossBackward0>)
tensor(2.8559, grad_fn=<NllLossBackward0>)
tensor(1.8129, grad_fn=<NllLossBackward0>)
tensor(3.51

tensor(4.0621, grad_fn=<NllLossBackward0>)
tensor(4.6233, grad_fn=<NllLossBackward0>)
tensor(3.4568, grad_fn=<NllLossBackward0>)
tensor(3.1290, grad_fn=<NllLossBackward0>)
tensor(2.3582, grad_fn=<NllLossBackward0>)
tensor(3.0782, grad_fn=<NllLossBackward0>)
tensor(3.5121, grad_fn=<NllLossBackward0>)
tensor(2.6652, grad_fn=<NllLossBackward0>)
tensor(2.7485, grad_fn=<NllLossBackward0>)
tensor(2.0333, grad_fn=<NllLossBackward0>)
tensor(2.2598, grad_fn=<NllLossBackward0>)
tensor(2.4782, grad_fn=<NllLossBackward0>)
tensor(2.9003, grad_fn=<NllLossBackward0>)
tensor(3.9614, grad_fn=<NllLossBackward0>)
tensor(3.3091, grad_fn=<NllLossBackward0>)
tensor(3.0382, grad_fn=<NllLossBackward0>)
tensor(2.3862, grad_fn=<NllLossBackward0>)
tensor(3.5436, grad_fn=<NllLossBackward0>)
tensor(2.3624, grad_fn=<NllLossBackward0>)
tensor(2.3249, grad_fn=<NllLossBackward0>)
tensor(2.3165, grad_fn=<NllLossBackward0>)
tensor(2.3659, grad_fn=<NllLossBackward0>)
tensor(3.1834, grad_fn=<NllLossBackward0>)
tensor(2.10

tensor(2.0448, grad_fn=<NllLossBackward0>)
tensor(2.0520, grad_fn=<NllLossBackward0>)
tensor(2.6327, grad_fn=<NllLossBackward0>)
tensor(2.2467, grad_fn=<NllLossBackward0>)
tensor(2.9872, grad_fn=<NllLossBackward0>)
tensor(3.1875, grad_fn=<NllLossBackward0>)
tensor(2.5304, grad_fn=<NllLossBackward0>)
tensor(2.9649, grad_fn=<NllLossBackward0>)
tensor(3.5867, grad_fn=<NllLossBackward0>)
tensor(3.6342, grad_fn=<NllLossBackward0>)
tensor(1.8299, grad_fn=<NllLossBackward0>)
tensor(3.5306, grad_fn=<NllLossBackward0>)
tensor(3.7529, grad_fn=<NllLossBackward0>)
tensor(2.9671, grad_fn=<NllLossBackward0>)
tensor(2.9319, grad_fn=<NllLossBackward0>)
tensor(2.3947, grad_fn=<NllLossBackward0>)
tensor(2.4434, grad_fn=<NllLossBackward0>)
tensor(1.8309, grad_fn=<NllLossBackward0>)
tensor(2.5402, grad_fn=<NllLossBackward0>)
tensor(2.6215, grad_fn=<NllLossBackward0>)
tensor(2.5064, grad_fn=<NllLossBackward0>)
tensor(2.9862, grad_fn=<NllLossBackward0>)
tensor(2.1060, grad_fn=<NllLossBackward0>)
tensor(2.48

tensor(3.3881, grad_fn=<NllLossBackward0>)
tensor(2.9699, grad_fn=<NllLossBackward0>)
tensor(3.1912, grad_fn=<NllLossBackward0>)
tensor(2.6857, grad_fn=<NllLossBackward0>)
tensor(2.4759, grad_fn=<NllLossBackward0>)
tensor(2.8705, grad_fn=<NllLossBackward0>)
tensor(3.4808, grad_fn=<NllLossBackward0>)
tensor(2.9677, grad_fn=<NllLossBackward0>)
tensor(2.0587, grad_fn=<NllLossBackward0>)
tensor(2.4056, grad_fn=<NllLossBackward0>)
tensor(3.0656, grad_fn=<NllLossBackward0>)
tensor(2.9989, grad_fn=<NllLossBackward0>)
tensor(2.3332, grad_fn=<NllLossBackward0>)
tensor(2.1134, grad_fn=<NllLossBackward0>)
tensor(2.5791, grad_fn=<NllLossBackward0>)
tensor(2.4315, grad_fn=<NllLossBackward0>)
tensor(3.8723, grad_fn=<NllLossBackward0>)
tensor(2.7143, grad_fn=<NllLossBackward0>)
tensor(3.4692, grad_fn=<NllLossBackward0>)
tensor(3.0391, grad_fn=<NllLossBackward0>)
tensor(2.8833, grad_fn=<NllLossBackward0>)
tensor(2.0518, grad_fn=<NllLossBackward0>)
tensor(2.9376, grad_fn=<NllLossBackward0>)
tensor(2.99

tensor(2.9501, grad_fn=<NllLossBackward0>)
tensor(3.2420, grad_fn=<NllLossBackward0>)
tensor(2.4500, grad_fn=<NllLossBackward0>)
tensor(2.0492, grad_fn=<NllLossBackward0>)
tensor(2.7244, grad_fn=<NllLossBackward0>)
tensor(2.0507, grad_fn=<NllLossBackward0>)
tensor(2.3004, grad_fn=<NllLossBackward0>)
tensor(3.6038, grad_fn=<NllLossBackward0>)
tensor(2.3944, grad_fn=<NllLossBackward0>)
tensor(3.1519, grad_fn=<NllLossBackward0>)
tensor(2.4374, grad_fn=<NllLossBackward0>)
tensor(3.3196, grad_fn=<NllLossBackward0>)
tensor(3.0857, grad_fn=<NllLossBackward0>)
tensor(2.9873, grad_fn=<NllLossBackward0>)
tensor(3.3915, grad_fn=<NllLossBackward0>)
tensor(3.5196, grad_fn=<NllLossBackward0>)
tensor(2.4392, grad_fn=<NllLossBackward0>)
tensor(3.0042, grad_fn=<NllLossBackward0>)
tensor(2.7165, grad_fn=<NllLossBackward0>)
tensor(2.6766, grad_fn=<NllLossBackward0>)
tensor(3.9648, grad_fn=<NllLossBackward0>)
tensor(2.0477, grad_fn=<NllLossBackward0>)
tensor(2.1106, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(2.9560, grad_fn=<NllLossBackward0>)
tensor(3.4655, grad_fn=<NllLossBackward0>)
tensor(4.0010, grad_fn=<NllLossBackward0>)
tensor(2.9081, grad_fn=<NllLossBackward0>)
tensor(3.9644, grad_fn=<NllLossBackward0>)
tensor(2.7238, grad_fn=<NllLossBackward0>)
tensor(2.4803, grad_fn=<NllLossBackward0>)
tensor(3.5510, grad_fn=<NllLossBackward0>)
tensor(3.2322, grad_fn=<NllLossBackward0>)
tensor(3.0599, grad_fn=<NllLossBackward0>)
tensor(3.0801, grad_fn=<NllLossBackward0>)
tensor(1.8055, grad_fn=<NllLossBackward0>)
tensor(3.5490, grad_fn=<NllLossBackward0>)
tensor(2.7030, grad_fn=<NllLossBackward0>)
tensor(2.5389, grad_fn=<NllLossBackward0>)
tensor(3.0934, grad_fn=<NllLossBackward0>)
tensor(3.0828, grad_fn=<NllLossBackward0>)
tensor(2.4038, grad_fn=<NllLossBackward0>)
tensor(2.9042, grad_fn=<NllLossBackward0>)
tensor(2.4464, grad_fn=<NllLossBackward0>)
tensor(3.0224, grad_fn=<NllLossBackward0>)
tensor(3.0295, grad_fn=<NllLossBackward0>)
tensor(3.0086, grad_fn=<NllLossBackward0>)
tensor(2.42

tensor(2.4054, grad_fn=<NllLossBackward0>)
tensor(2.8701, grad_fn=<NllLossBackward0>)
tensor(2.3997, grad_fn=<NllLossBackward0>)
tensor(2.8522, grad_fn=<NllLossBackward0>)
tensor(3.2102, grad_fn=<NllLossBackward0>)
tensor(3.3209, grad_fn=<NllLossBackward0>)
tensor(3.5365, grad_fn=<NllLossBackward0>)
tensor(3.3415, grad_fn=<NllLossBackward0>)
tensor(3.3366, grad_fn=<NllLossBackward0>)
tensor(2.3793, grad_fn=<NllLossBackward0>)
tensor(2.9311, grad_fn=<NllLossBackward0>)
tensor(3.0115, grad_fn=<NllLossBackward0>)
tensor(2.4082, grad_fn=<NllLossBackward0>)
tensor(2.4010, grad_fn=<NllLossBackward0>)
tensor(3.0584, grad_fn=<NllLossBackward0>)
tensor(2.8524, grad_fn=<NllLossBackward0>)
tensor(2.0385, grad_fn=<NllLossBackward0>)
tensor(2.9748, grad_fn=<NllLossBackward0>)
tensor(3.5375, grad_fn=<NllLossBackward0>)
tensor(2.1085, grad_fn=<NllLossBackward0>)
tensor(2.4036, grad_fn=<NllLossBackward0>)
tensor(3.0798, grad_fn=<NllLossBackward0>)
tensor(2.5340, grad_fn=<NllLossBackward0>)
tensor(4.05

tensor(2.0842, grad_fn=<NllLossBackward0>)
tensor(2.0853, grad_fn=<NllLossBackward0>)
tensor(2.7114, grad_fn=<NllLossBackward0>)
tensor(2.4446, grad_fn=<NllLossBackward0>)
tensor(3.8453, grad_fn=<NllLossBackward0>)
tensor(2.9494, grad_fn=<NllLossBackward0>)
tensor(2.5755, grad_fn=<NllLossBackward0>)
tensor(3.5223, grad_fn=<NllLossBackward0>)
tensor(3.4474, grad_fn=<NllLossBackward0>)
tensor(3.0077, grad_fn=<NllLossBackward0>)
tensor(3.3083, grad_fn=<NllLossBackward0>)
tensor(2.4728, grad_fn=<NllLossBackward0>)
tensor(3.0691, grad_fn=<NllLossBackward0>)
tensor(3.0811, grad_fn=<NllLossBackward0>)
tensor(2.7019, grad_fn=<NllLossBackward0>)
tensor(3.7302, grad_fn=<NllLossBackward0>)
tensor(2.7031, grad_fn=<NllLossBackward0>)
tensor(3.6578, grad_fn=<NllLossBackward0>)
tensor(3.2326, grad_fn=<NllLossBackward0>)
tensor(3.8819, grad_fn=<NllLossBackward0>)
tensor(3.5710, grad_fn=<NllLossBackward0>)
tensor(2.8805, grad_fn=<NllLossBackward0>)
tensor(2.3467, grad_fn=<NllLossBackward0>)
tensor(3.28

tensor(3.0299, grad_fn=<NllLossBackward0>)
tensor(3.0825, grad_fn=<NllLossBackward0>)
tensor(2.3584, grad_fn=<NllLossBackward0>)
tensor(3.0079, grad_fn=<NllLossBackward0>)
tensor(2.9539, grad_fn=<NllLossBackward0>)
tensor(2.7437, grad_fn=<NllLossBackward0>)
tensor(2.5467, grad_fn=<NllLossBackward0>)
tensor(2.5541, grad_fn=<NllLossBackward0>)
tensor(1.7631, grad_fn=<NllLossBackward0>)
tensor(2.3664, grad_fn=<NllLossBackward0>)
tensor(3.4130, grad_fn=<NllLossBackward0>)
tensor(2.6689, grad_fn=<NllLossBackward0>)
tensor(3.9469, grad_fn=<NllLossBackward0>)
tensor(3.4403, grad_fn=<NllLossBackward0>)
tensor(2.9563, grad_fn=<NllLossBackward0>)
tensor(2.2253, grad_fn=<NllLossBackward0>)
tensor(2.0205, grad_fn=<NllLossBackward0>)
tensor(3.3249, grad_fn=<NllLossBackward0>)
tensor(3.1754, grad_fn=<NllLossBackward0>)
tensor(3.0459, grad_fn=<NllLossBackward0>)
tensor(2.7334, grad_fn=<NllLossBackward0>)
tensor(3.1427, grad_fn=<NllLossBackward0>)
tensor(2.6005, grad_fn=<NllLossBackward0>)
tensor(4.09

tensor(2.0203, grad_fn=<NllLossBackward0>)
tensor(3.2768, grad_fn=<NllLossBackward0>)
tensor(3.6320, grad_fn=<NllLossBackward0>)
tensor(2.2182, grad_fn=<NllLossBackward0>)
tensor(2.9794, grad_fn=<NllLossBackward0>)
tensor(1.7539, grad_fn=<NllLossBackward0>)
tensor(3.4405, grad_fn=<NllLossBackward0>)
tensor(2.9122, grad_fn=<NllLossBackward0>)
tensor(3.1480, grad_fn=<NllLossBackward0>)
tensor(2.5568, grad_fn=<NllLossBackward0>)
tensor(2.5636, grad_fn=<NllLossBackward0>)
tensor(3.4273, grad_fn=<NllLossBackward0>)
tensor(3.7938, grad_fn=<NllLossBackward0>)
tensor(3.3198, grad_fn=<NllLossBackward0>)
tensor(2.9651, grad_fn=<NllLossBackward0>)
tensor(2.0186, grad_fn=<NllLossBackward0>)
tensor(2.7525, grad_fn=<NllLossBackward0>)
tensor(2.6824, grad_fn=<NllLossBackward0>)
tensor(2.6247, grad_fn=<NllLossBackward0>)
tensor(2.9545, grad_fn=<NllLossBackward0>)
tensor(3.6057, grad_fn=<NllLossBackward0>)
tensor(2.2746, grad_fn=<NllLossBackward0>)
tensor(2.6239, grad_fn=<NllLossBackward0>)
tensor(3.84

tensor(2.0673, grad_fn=<NllLossBackward0>)
tensor(2.1972, grad_fn=<NllLossBackward0>)
tensor(3.7368, grad_fn=<NllLossBackward0>)
tensor(3.4966, grad_fn=<NllLossBackward0>)
tensor(3.5915, grad_fn=<NllLossBackward0>)
tensor(2.7130, grad_fn=<NllLossBackward0>)
tensor(2.9752, grad_fn=<NllLossBackward0>)
tensor(2.3880, grad_fn=<NllLossBackward0>)
tensor(3.5207, grad_fn=<NllLossBackward0>)
tensor(3.2710, grad_fn=<NllLossBackward0>)
tensor(2.3683, grad_fn=<NllLossBackward0>)
tensor(2.8071, grad_fn=<NllLossBackward0>)
tensor(2.6774, grad_fn=<NllLossBackward0>)
tensor(2.3402, grad_fn=<NllLossBackward0>)
tensor(3.1112, grad_fn=<NllLossBackward0>)
tensor(2.4531, grad_fn=<NllLossBackward0>)
tensor(2.7095, grad_fn=<NllLossBackward0>)
tensor(3.1529, grad_fn=<NllLossBackward0>)
tensor(2.8766, grad_fn=<NllLossBackward0>)
tensor(3.0126, grad_fn=<NllLossBackward0>)
tensor(2.3299, grad_fn=<NllLossBackward0>)
tensor(2.5681, grad_fn=<NllLossBackward0>)
tensor(2.9465, grad_fn=<NllLossBackward0>)
tensor(2.55

tensor(2.4308, grad_fn=<NllLossBackward0>)
tensor(2.7542, grad_fn=<NllLossBackward0>)
tensor(2.3562, grad_fn=<NllLossBackward0>)
tensor(2.9869, grad_fn=<NllLossBackward0>)
tensor(4.0827, grad_fn=<NllLossBackward0>)
tensor(2.9283, grad_fn=<NllLossBackward0>)
tensor(2.3579, grad_fn=<NllLossBackward0>)
tensor(3.1435, grad_fn=<NllLossBackward0>)
tensor(2.8770, grad_fn=<NllLossBackward0>)
tensor(3.1071, grad_fn=<NllLossBackward0>)
tensor(2.3672, grad_fn=<NllLossBackward0>)
tensor(2.7258, grad_fn=<NllLossBackward0>)
tensor(2.6957, grad_fn=<NllLossBackward0>)
tensor(2.3571, grad_fn=<NllLossBackward0>)
tensor(3.1210, grad_fn=<NllLossBackward0>)
tensor(3.7788, grad_fn=<NllLossBackward0>)
tensor(2.9626, grad_fn=<NllLossBackward0>)
tensor(2.8493, grad_fn=<NllLossBackward0>)
tensor(2.5063, grad_fn=<NllLossBackward0>)
tensor(3.1928, grad_fn=<NllLossBackward0>)
tensor(2.3614, grad_fn=<NllLossBackward0>)
tensor(3.2288, grad_fn=<NllLossBackward0>)
tensor(3.0085, grad_fn=<NllLossBackward0>)
tensor(3.69

tensor(2.7201, grad_fn=<NllLossBackward0>)
tensor(2.6499, grad_fn=<NllLossBackward0>)
tensor(3.7460, grad_fn=<NllLossBackward0>)
tensor(3.0688, grad_fn=<NllLossBackward0>)
tensor(3.0983, grad_fn=<NllLossBackward0>)
tensor(2.3015, grad_fn=<NllLossBackward0>)
tensor(2.6323, grad_fn=<NllLossBackward0>)
tensor(3.6789, grad_fn=<NllLossBackward0>)
tensor(3.3043, grad_fn=<NllLossBackward0>)
tensor(2.9605, grad_fn=<NllLossBackward0>)
tensor(3.4271, grad_fn=<NllLossBackward0>)
tensor(2.3905, grad_fn=<NllLossBackward0>)
tensor(2.0187, grad_fn=<NllLossBackward0>)
tensor(3.4271, grad_fn=<NllLossBackward0>)
tensor(2.6453, grad_fn=<NllLossBackward0>)
tensor(2.8854, grad_fn=<NllLossBackward0>)
tensor(3.6460, grad_fn=<NllLossBackward0>)
tensor(2.3960, grad_fn=<NllLossBackward0>)
tensor(3.1788, grad_fn=<NllLossBackward0>)
tensor(2.2905, grad_fn=<NllLossBackward0>)
tensor(2.9384, grad_fn=<NllLossBackward0>)
tensor(3.5675, grad_fn=<NllLossBackward0>)
tensor(2.9866, grad_fn=<NllLossBackward0>)
tensor(2.01

tensor(3.4068, grad_fn=<NllLossBackward0>)
tensor(2.7593, grad_fn=<NllLossBackward0>)
tensor(2.0892, grad_fn=<NllLossBackward0>)
tensor(3.0915, grad_fn=<NllLossBackward0>)
tensor(2.8565, grad_fn=<NllLossBackward0>)
tensor(2.3680, grad_fn=<NllLossBackward0>)
tensor(2.4954, grad_fn=<NllLossBackward0>)
tensor(3.4557, grad_fn=<NllLossBackward0>)
tensor(3.9202, grad_fn=<NllLossBackward0>)
tensor(2.2813, grad_fn=<NllLossBackward0>)
tensor(2.3404, grad_fn=<NllLossBackward0>)
tensor(2.2935, grad_fn=<NllLossBackward0>)
tensor(3.6855, grad_fn=<NllLossBackward0>)
tensor(2.6473, grad_fn=<NllLossBackward0>)
tensor(2.9555, grad_fn=<NllLossBackward0>)
tensor(3.3186, grad_fn=<NllLossBackward0>)
tensor(2.4596, grad_fn=<NllLossBackward0>)
tensor(2.9514, grad_fn=<NllLossBackward0>)
tensor(2.0850, grad_fn=<NllLossBackward0>)
tensor(2.7454, grad_fn=<NllLossBackward0>)
tensor(3.5083, grad_fn=<NllLossBackward0>)
tensor(3.7103, grad_fn=<NllLossBackward0>)
tensor(2.5265, grad_fn=<NllLossBackward0>)
tensor(2.93

tensor(3.4904, grad_fn=<NllLossBackward0>)
tensor(2.7973, grad_fn=<NllLossBackward0>)
tensor(2.9759, grad_fn=<NllLossBackward0>)
tensor(2.0187, grad_fn=<NllLossBackward0>)
tensor(2.0145, grad_fn=<NllLossBackward0>)
tensor(3.0387, grad_fn=<NllLossBackward0>)
tensor(3.5640, grad_fn=<NllLossBackward0>)
tensor(3.0264, grad_fn=<NllLossBackward0>)
tensor(2.3946, grad_fn=<NllLossBackward0>)
tensor(2.7028, grad_fn=<NllLossBackward0>)
tensor(3.0346, grad_fn=<NllLossBackward0>)
tensor(2.0200, grad_fn=<NllLossBackward0>)
tensor(1.7609, grad_fn=<NllLossBackward0>)
tensor(2.0796, grad_fn=<NllLossBackward0>)
tensor(5.1718, grad_fn=<NllLossBackward0>)
tensor(2.8260, grad_fn=<NllLossBackward0>)
tensor(3.5371, grad_fn=<NllLossBackward0>)
tensor(3.9175, grad_fn=<NllLossBackward0>)
tensor(2.8756, grad_fn=<NllLossBackward0>)
tensor(3.5405, grad_fn=<NllLossBackward0>)
tensor(3.4081, grad_fn=<NllLossBackward0>)
tensor(2.5452, grad_fn=<NllLossBackward0>)
tensor(3.0417, grad_fn=<NllLossBackward0>)
tensor(2.08

tensor(3.0751, grad_fn=<NllLossBackward0>)
tensor(4.1458, grad_fn=<NllLossBackward0>)
tensor(3.6679, grad_fn=<NllLossBackward0>)
tensor(1.7682, grad_fn=<NllLossBackward0>)
tensor(2.0827, grad_fn=<NllLossBackward0>)
tensor(2.3989, grad_fn=<NllLossBackward0>)
tensor(3.1037, grad_fn=<NllLossBackward0>)
tensor(2.0252, grad_fn=<NllLossBackward0>)
tensor(3.0222, grad_fn=<NllLossBackward0>)
tensor(2.6807, grad_fn=<NllLossBackward0>)
tensor(3.9355, grad_fn=<NllLossBackward0>)
tensor(2.9500, grad_fn=<NllLossBackward0>)
tensor(2.2129, grad_fn=<NllLossBackward0>)
tensor(3.5967, grad_fn=<NllLossBackward0>)
tensor(3.4367, grad_fn=<NllLossBackward0>)
tensor(2.6941, grad_fn=<NllLossBackward0>)
tensor(2.3999, grad_fn=<NllLossBackward0>)
tensor(3.4722, grad_fn=<NllLossBackward0>)
tensor(3.0109, grad_fn=<NllLossBackward0>)
tensor(3.6363, grad_fn=<NllLossBackward0>)
tensor(4.1170, grad_fn=<NllLossBackward0>)
tensor(2.3590, grad_fn=<NllLossBackward0>)
tensor(2.0858, grad_fn=<NllLossBackward0>)
tensor(2.46

tensor(3.4043, grad_fn=<NllLossBackward0>)
tensor(3.4980, grad_fn=<NllLossBackward0>)
tensor(2.4018, grad_fn=<NllLossBackward0>)
tensor(2.6295, grad_fn=<NllLossBackward0>)
tensor(3.0057, grad_fn=<NllLossBackward0>)
tensor(2.7659, grad_fn=<NllLossBackward0>)
tensor(4.0676, grad_fn=<NllLossBackward0>)
tensor(2.0715, grad_fn=<NllLossBackward0>)
tensor(2.8289, grad_fn=<NllLossBackward0>)
tensor(2.9010, grad_fn=<NllLossBackward0>)
tensor(3.3473, grad_fn=<NllLossBackward0>)
tensor(3.4717, grad_fn=<NllLossBackward0>)
tensor(2.6507, grad_fn=<NllLossBackward0>)
tensor(2.3623, grad_fn=<NllLossBackward0>)
tensor(2.9504, grad_fn=<NllLossBackward0>)
tensor(4.1042, grad_fn=<NllLossBackward0>)
tensor(2.3914, grad_fn=<NllLossBackward0>)
tensor(3.4591, grad_fn=<NllLossBackward0>)
tensor(3.1011, grad_fn=<NllLossBackward0>)
tensor(3.0055, grad_fn=<NllLossBackward0>)
tensor(3.0225, grad_fn=<NllLossBackward0>)
tensor(2.2019, grad_fn=<NllLossBackward0>)
tensor(2.0236, grad_fn=<NllLossBackward0>)
tensor(2.62

tensor(2.8543, grad_fn=<NllLossBackward0>)
tensor(4.0850, grad_fn=<NllLossBackward0>)
tensor(3.4612, grad_fn=<NllLossBackward0>)
tensor(2.3778, grad_fn=<NllLossBackward0>)
tensor(2.4608, grad_fn=<NllLossBackward0>)
tensor(2.7651, grad_fn=<NllLossBackward0>)
tensor(4.2519, grad_fn=<NllLossBackward0>)
tensor(3.4246, grad_fn=<NllLossBackward0>)
tensor(2.4561, grad_fn=<NllLossBackward0>)
tensor(4.1635, grad_fn=<NllLossBackward0>)
tensor(3.0582, grad_fn=<NllLossBackward0>)
tensor(1.7605, grad_fn=<NllLossBackward0>)
tensor(2.9403, grad_fn=<NllLossBackward0>)
tensor(3.9688, grad_fn=<NllLossBackward0>)
tensor(3.2732, grad_fn=<NllLossBackward0>)
tensor(2.2765, grad_fn=<NllLossBackward0>)
tensor(2.9761, grad_fn=<NllLossBackward0>)
tensor(2.5598, grad_fn=<NllLossBackward0>)
tensor(1.7591, grad_fn=<NllLossBackward0>)
tensor(3.1688, grad_fn=<NllLossBackward0>)
tensor(3.4706, grad_fn=<NllLossBackward0>)
tensor(2.3986, grad_fn=<NllLossBackward0>)
tensor(2.8710, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(3.4155, grad_fn=<NllLossBackward0>)
tensor(2.7567, grad_fn=<NllLossBackward0>)
tensor(1.7778, grad_fn=<NllLossBackward0>)
tensor(2.5938, grad_fn=<NllLossBackward0>)
tensor(3.0538, grad_fn=<NllLossBackward0>)
tensor(2.9088, grad_fn=<NllLossBackward0>)
tensor(3.1856, grad_fn=<NllLossBackward0>)
tensor(2.5945, grad_fn=<NllLossBackward0>)
tensor(2.0859, grad_fn=<NllLossBackward0>)
tensor(4.4011, grad_fn=<NllLossBackward0>)
tensor(2.9217, grad_fn=<NllLossBackward0>)
tensor(2.3654, grad_fn=<NllLossBackward0>)
tensor(2.4636, grad_fn=<NllLossBackward0>)
tensor(3.4386, grad_fn=<NllLossBackward0>)
tensor(1.7701, grad_fn=<NllLossBackward0>)
tensor(2.8316, grad_fn=<NllLossBackward0>)
tensor(3.4219, grad_fn=<NllLossBackward0>)
tensor(2.6073, grad_fn=<NllLossBackward0>)
tensor(2.6066, grad_fn=<NllLossBackward0>)
tensor(2.7686, grad_fn=<NllLossBackward0>)
tensor(2.7033, grad_fn=<NllLossBackward0>)
tensor(2.9851, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(2.91

tensor(3.0881, grad_fn=<NllLossBackward0>)
tensor(2.8236, grad_fn=<NllLossBackward0>)
tensor(2.8792, grad_fn=<NllLossBackward0>)
tensor(2.3208, grad_fn=<NllLossBackward0>)
tensor(2.2648, grad_fn=<NllLossBackward0>)
tensor(2.4368, grad_fn=<NllLossBackward0>)
tensor(2.3197, grad_fn=<NllLossBackward0>)
tensor(3.9638, grad_fn=<NllLossBackward0>)
tensor(2.0114, grad_fn=<NllLossBackward0>)
tensor(3.9153, grad_fn=<NllLossBackward0>)
tensor(3.5198, grad_fn=<NllLossBackward0>)
tensor(2.8214, grad_fn=<NllLossBackward0>)
tensor(3.4588, grad_fn=<NllLossBackward0>)
tensor(3.4063, grad_fn=<NllLossBackward0>)
tensor(2.4462, grad_fn=<NllLossBackward0>)
tensor(2.9928, grad_fn=<NllLossBackward0>)
tensor(2.8211, grad_fn=<NllLossBackward0>)
tensor(3.0838, grad_fn=<NllLossBackward0>)
tensor(2.6702, grad_fn=<NllLossBackward0>)
tensor(2.6525, grad_fn=<NllLossBackward0>)
tensor(1.7703, grad_fn=<NllLossBackward0>)
tensor(4.1997, grad_fn=<NllLossBackward0>)
tensor(2.7463, grad_fn=<NllLossBackward0>)
tensor(3.04

tensor(3.7203, grad_fn=<NllLossBackward0>)
tensor(2.3727, grad_fn=<NllLossBackward0>)
tensor(2.4017, grad_fn=<NllLossBackward0>)
tensor(3.0653, grad_fn=<NllLossBackward0>)
tensor(2.3019, grad_fn=<NllLossBackward0>)
tensor(2.9873, grad_fn=<NllLossBackward0>)
tensor(2.7584, grad_fn=<NllLossBackward0>)
tensor(1.9877, grad_fn=<NllLossBackward0>)
tensor(1.9875, grad_fn=<NllLossBackward0>)
tensor(3.1351, grad_fn=<NllLossBackward0>)
tensor(3.4568, grad_fn=<NllLossBackward0>)
tensor(3.7688, grad_fn=<NllLossBackward0>)
tensor(2.5231, grad_fn=<NllLossBackward0>)
tensor(2.9510, grad_fn=<NllLossBackward0>)
tensor(2.3936, grad_fn=<NllLossBackward0>)
tensor(2.6773, grad_fn=<NllLossBackward0>)
tensor(2.0787, grad_fn=<NllLossBackward0>)
tensor(3.0147, grad_fn=<NllLossBackward0>)
tensor(2.4521, grad_fn=<NllLossBackward0>)
tensor(4.0660, grad_fn=<NllLossBackward0>)
tensor(1.7673, grad_fn=<NllLossBackward0>)
tensor(2.2965, grad_fn=<NllLossBackward0>)
tensor(3.2648, grad_fn=<NllLossBackward0>)
tensor(2.68

tensor(4.6612, grad_fn=<NllLossBackward0>)
tensor(3.0324, grad_fn=<NllLossBackward0>)
tensor(2.7038, grad_fn=<NllLossBackward0>)
tensor(3.1007, grad_fn=<NllLossBackward0>)
tensor(2.8744, grad_fn=<NllLossBackward0>)
tensor(2.4304, grad_fn=<NllLossBackward0>)
tensor(2.9783, grad_fn=<NllLossBackward0>)
tensor(2.2431, grad_fn=<NllLossBackward0>)
tensor(2.2265, grad_fn=<NllLossBackward0>)
tensor(2.8593, grad_fn=<NllLossBackward0>)
tensor(4.0733, grad_fn=<NllLossBackward0>)
tensor(3.1788, grad_fn=<NllLossBackward0>)
tensor(2.7786, grad_fn=<NllLossBackward0>)
tensor(3.4812, grad_fn=<NllLossBackward0>)
tensor(1.7482, grad_fn=<NllLossBackward0>)
tensor(3.6008, grad_fn=<NllLossBackward0>)
tensor(3.2558, grad_fn=<NllLossBackward0>)
tensor(2.0605, grad_fn=<NllLossBackward0>)
tensor(3.4042, grad_fn=<NllLossBackward0>)
tensor(3.1385, grad_fn=<NllLossBackward0>)
tensor(2.6066, grad_fn=<NllLossBackward0>)
tensor(2.8378, grad_fn=<NllLossBackward0>)
tensor(2.3117, grad_fn=<NllLossBackward0>)
tensor(2.30

tensor(2.7372, grad_fn=<NllLossBackward0>)
tensor(3.0336, grad_fn=<NllLossBackward0>)
tensor(3.2840, grad_fn=<NllLossBackward0>)
tensor(1.9922, grad_fn=<NllLossBackward0>)
tensor(2.6907, grad_fn=<NllLossBackward0>)
tensor(2.9097, grad_fn=<NllLossBackward0>)
tensor(2.3479, grad_fn=<NllLossBackward0>)
tensor(2.6004, grad_fn=<NllLossBackward0>)
tensor(2.0433, grad_fn=<NllLossBackward0>)
tensor(2.6846, grad_fn=<NllLossBackward0>)
tensor(3.4877, grad_fn=<NllLossBackward0>)
tensor(2.9777, grad_fn=<NllLossBackward0>)
tensor(2.3088, grad_fn=<NllLossBackward0>)
tensor(2.8293, grad_fn=<NllLossBackward0>)
tensor(2.7455, grad_fn=<NllLossBackward0>)
tensor(1.9898, grad_fn=<NllLossBackward0>)
tensor(3.1060, grad_fn=<NllLossBackward0>)
tensor(2.6544, grad_fn=<NllLossBackward0>)
tensor(2.0519, grad_fn=<NllLossBackward0>)
tensor(2.3725, grad_fn=<NllLossBackward0>)
tensor(4.0303, grad_fn=<NllLossBackward0>)
tensor(3.5021, grad_fn=<NllLossBackward0>)
tensor(3.1483, grad_fn=<NllLossBackward0>)
tensor(3.02

tensor(1.7439, grad_fn=<NllLossBackward0>)
tensor(2.2056, grad_fn=<NllLossBackward0>)
tensor(2.3205, grad_fn=<NllLossBackward0>)
tensor(1.9900, grad_fn=<NllLossBackward0>)
tensor(2.6776, grad_fn=<NllLossBackward0>)
tensor(2.0474, grad_fn=<NllLossBackward0>)
tensor(3.9618, grad_fn=<NllLossBackward0>)
tensor(2.8198, grad_fn=<NllLossBackward0>)
tensor(2.7071, grad_fn=<NllLossBackward0>)
tensor(2.4521, grad_fn=<NllLossBackward0>)
tensor(2.6628, grad_fn=<NllLossBackward0>)
tensor(2.9155, grad_fn=<NllLossBackward0>)
tensor(2.3818, grad_fn=<NllLossBackward0>)
tensor(4.0225, grad_fn=<NllLossBackward0>)
tensor(1.9950, grad_fn=<NllLossBackward0>)
tensor(4.3037, grad_fn=<NllLossBackward0>)
tensor(2.9216, grad_fn=<NllLossBackward0>)
tensor(2.9428, grad_fn=<NllLossBackward0>)
tensor(2.6555, grad_fn=<NllLossBackward0>)
tensor(2.0529, grad_fn=<NllLossBackward0>)
tensor(2.9412, grad_fn=<NllLossBackward0>)
tensor(4.1522, grad_fn=<NllLossBackward0>)
tensor(2.9031, grad_fn=<NllLossBackward0>)
tensor(3.62

tensor(2.0607, grad_fn=<NllLossBackward0>)
tensor(2.0491, grad_fn=<NllLossBackward0>)
tensor(3.4060, grad_fn=<NllLossBackward0>)
tensor(2.4733, grad_fn=<NllLossBackward0>)
tensor(3.3428, grad_fn=<NllLossBackward0>)
tensor(3.5685, grad_fn=<NllLossBackward0>)
tensor(2.6303, grad_fn=<NllLossBackward0>)
tensor(2.8223, grad_fn=<NllLossBackward0>)
tensor(2.2682, grad_fn=<NllLossBackward0>)
tensor(2.9927, grad_fn=<NllLossBackward0>)
tensor(3.5565, grad_fn=<NllLossBackward0>)
tensor(2.3921, grad_fn=<NllLossBackward0>)
tensor(2.6367, grad_fn=<NllLossBackward0>)
tensor(2.1738, grad_fn=<NllLossBackward0>)
tensor(3.1350, grad_fn=<NllLossBackward0>)
tensor(3.1110, grad_fn=<NllLossBackward0>)
tensor(2.3148, grad_fn=<NllLossBackward0>)
tensor(2.4826, grad_fn=<NllLossBackward0>)
tensor(3.0484, grad_fn=<NllLossBackward0>)
tensor(2.6387, grad_fn=<NllLossBackward0>)
tensor(3.9411, grad_fn=<NllLossBackward0>)
tensor(2.3746, grad_fn=<NllLossBackward0>)
tensor(2.4257, grad_fn=<NllLossBackward0>)
tensor(2.95

tensor(2.6333, grad_fn=<NllLossBackward0>)
tensor(1.9998, grad_fn=<NllLossBackward0>)
tensor(3.4240, grad_fn=<NllLossBackward0>)
tensor(1.9921, grad_fn=<NllLossBackward0>)
tensor(2.9401, grad_fn=<NllLossBackward0>)
tensor(2.5517, grad_fn=<NllLossBackward0>)
tensor(3.1097, grad_fn=<NllLossBackward0>)
tensor(2.8494, grad_fn=<NllLossBackward0>)
tensor(2.6284, grad_fn=<NllLossBackward0>)
tensor(3.5164, grad_fn=<NllLossBackward0>)
tensor(2.3354, grad_fn=<NllLossBackward0>)
tensor(4.0502, grad_fn=<NllLossBackward0>)
tensor(2.9794, grad_fn=<NllLossBackward0>)
tensor(2.6386, grad_fn=<NllLossBackward0>)
tensor(2.9734, grad_fn=<NllLossBackward0>)
tensor(3.4525, grad_fn=<NllLossBackward0>)
tensor(2.2586, grad_fn=<NllLossBackward0>)
tensor(4.6293, grad_fn=<NllLossBackward0>)
tensor(2.2675, grad_fn=<NllLossBackward0>)
tensor(3.5161, grad_fn=<NllLossBackward0>)
tensor(3.6006, grad_fn=<NllLossBackward0>)
tensor(3.2801, grad_fn=<NllLossBackward0>)
tensor(3.7492, grad_fn=<NllLossBackward0>)
tensor(2.60

tensor(2.8363, grad_fn=<NllLossBackward0>)
tensor(3.5094, grad_fn=<NllLossBackward0>)
tensor(2.4341, grad_fn=<NllLossBackward0>)
tensor(3.4889, grad_fn=<NllLossBackward0>)
tensor(2.3283, grad_fn=<NllLossBackward0>)
tensor(3.4321, grad_fn=<NllLossBackward0>)
tensor(2.1680, grad_fn=<NllLossBackward0>)
tensor(2.4846, grad_fn=<NllLossBackward0>)
tensor(2.2695, grad_fn=<NllLossBackward0>)
tensor(3.4141, grad_fn=<NllLossBackward0>)
tensor(2.7790, grad_fn=<NllLossBackward0>)
tensor(3.2285, grad_fn=<NllLossBackward0>)
tensor(2.3252, grad_fn=<NllLossBackward0>)
tensor(2.3275, grad_fn=<NllLossBackward0>)
tensor(2.2635, grad_fn=<NllLossBackward0>)
tensor(2.3252, grad_fn=<NllLossBackward0>)
tensor(3.4382, grad_fn=<NllLossBackward0>)
tensor(3.6881, grad_fn=<NllLossBackward0>)
tensor(3.5749, grad_fn=<NllLossBackward0>)
tensor(2.3699, grad_fn=<NllLossBackward0>)
tensor(3.1419, grad_fn=<NllLossBackward0>)
tensor(2.2691, grad_fn=<NllLossBackward0>)
tensor(3.0750, grad_fn=<NllLossBackward0>)
tensor(4.51

tensor(2.2544, grad_fn=<NllLossBackward0>)
tensor(2.9277, grad_fn=<NllLossBackward0>)
tensor(4.3571, grad_fn=<NllLossBackward0>)
tensor(2.5915, grad_fn=<NllLossBackward0>)
tensor(2.8444, grad_fn=<NllLossBackward0>)
tensor(1.7399, grad_fn=<NllLossBackward0>)
tensor(3.5437, grad_fn=<NllLossBackward0>)
tensor(3.2726, grad_fn=<NllLossBackward0>)
tensor(3.9843, grad_fn=<NllLossBackward0>)
tensor(2.8252, grad_fn=<NllLossBackward0>)
tensor(2.2639, grad_fn=<NllLossBackward0>)
tensor(3.1104, grad_fn=<NllLossBackward0>)
tensor(3.2763, grad_fn=<NllLossBackward0>)
tensor(2.4291, grad_fn=<NllLossBackward0>)
tensor(4.1180, grad_fn=<NllLossBackward0>)
tensor(2.4810, grad_fn=<NllLossBackward0>)
tensor(2.0636, grad_fn=<NllLossBackward0>)
tensor(2.9364, grad_fn=<NllLossBackward0>)
tensor(2.8043, grad_fn=<NllLossBackward0>)
tensor(2.7291, grad_fn=<NllLossBackward0>)
tensor(2.9470, grad_fn=<NllLossBackward0>)
tensor(3.3016, grad_fn=<NllLossBackward0>)
tensor(4.2660, grad_fn=<NllLossBackward0>)
tensor(3.66

tensor(3.1380, grad_fn=<NllLossBackward0>)
tensor(2.9669, grad_fn=<NllLossBackward0>)
tensor(2.4385, grad_fn=<NllLossBackward0>)
tensor(2.3397, grad_fn=<NllLossBackward0>)
tensor(3.3317, grad_fn=<NllLossBackward0>)
tensor(1.7259, grad_fn=<NllLossBackward0>)
tensor(3.6592, grad_fn=<NllLossBackward0>)
tensor(3.0136, grad_fn=<NllLossBackward0>)
tensor(2.1518, grad_fn=<NllLossBackward0>)
tensor(2.7899, grad_fn=<NllLossBackward0>)
tensor(3.0891, grad_fn=<NllLossBackward0>)
tensor(3.4416, grad_fn=<NllLossBackward0>)
tensor(2.1526, grad_fn=<NllLossBackward0>)
tensor(2.9547, grad_fn=<NllLossBackward0>)
tensor(2.7198, grad_fn=<NllLossBackward0>)
tensor(3.5336, grad_fn=<NllLossBackward0>)
tensor(2.6857, grad_fn=<NllLossBackward0>)
tensor(2.7650, grad_fn=<NllLossBackward0>)
tensor(2.3273, grad_fn=<NllLossBackward0>)
tensor(2.7473, grad_fn=<NllLossBackward0>)
tensor(1.9983, grad_fn=<NllLossBackward0>)
tensor(2.7352, grad_fn=<NllLossBackward0>)
tensor(2.4920, grad_fn=<NllLossBackward0>)
tensor(3.57

tensor(2.4752, grad_fn=<NllLossBackward0>)
tensor(2.4501, grad_fn=<NllLossBackward0>)
tensor(2.9125, grad_fn=<NllLossBackward0>)
tensor(2.4594, grad_fn=<NllLossBackward0>)
tensor(2.1476, grad_fn=<NllLossBackward0>)
tensor(3.1252, grad_fn=<NllLossBackward0>)
tensor(2.8346, grad_fn=<NllLossBackward0>)
tensor(3.3541, grad_fn=<NllLossBackward0>)
tensor(2.4329, grad_fn=<NllLossBackward0>)
tensor(2.3404, grad_fn=<NllLossBackward0>)
tensor(2.7486, grad_fn=<NllLossBackward0>)
tensor(2.5156, grad_fn=<NllLossBackward0>)
tensor(3.9496, grad_fn=<NllLossBackward0>)
tensor(3.3969, grad_fn=<NllLossBackward0>)
tensor(2.3694, grad_fn=<NllLossBackward0>)
tensor(2.7046, grad_fn=<NllLossBackward0>)
tensor(3.5428, grad_fn=<NllLossBackward0>)
tensor(2.9951, grad_fn=<NllLossBackward0>)
tensor(2.3664, grad_fn=<NllLossBackward0>)
tensor(2.4952, grad_fn=<NllLossBackward0>)
tensor(2.6533, grad_fn=<NllLossBackward0>)
tensor(2.6206, grad_fn=<NllLossBackward0>)
tensor(1.7238, grad_fn=<NllLossBackward0>)
tensor(2.04

tensor(3.7254, grad_fn=<NllLossBackward0>)
tensor(3.1436, grad_fn=<NllLossBackward0>)
tensor(2.6349, grad_fn=<NllLossBackward0>)
tensor(3.2346, grad_fn=<NllLossBackward0>)
tensor(1.9987, grad_fn=<NllLossBackward0>)
tensor(2.3508, grad_fn=<NllLossBackward0>)
tensor(3.7251, grad_fn=<NllLossBackward0>)
tensor(3.1389, grad_fn=<NllLossBackward0>)
tensor(2.8331, grad_fn=<NllLossBackward0>)
tensor(2.6390, grad_fn=<NllLossBackward0>)
tensor(2.4992, grad_fn=<NllLossBackward0>)
tensor(4.0275, grad_fn=<NllLossBackward0>)
tensor(3.3891, grad_fn=<NllLossBackward0>)
tensor(2.3666, grad_fn=<NllLossBackward0>)
tensor(1.7092, grad_fn=<NllLossBackward0>)
tensor(1.7051, grad_fn=<NllLossBackward0>)
tensor(2.0352, grad_fn=<NllLossBackward0>)
tensor(2.3235, grad_fn=<NllLossBackward0>)
tensor(3.2316, grad_fn=<NllLossBackward0>)
tensor(2.6799, grad_fn=<NllLossBackward0>)
tensor(1.9926, grad_fn=<NllLossBackward0>)
tensor(4.9558, grad_fn=<NllLossBackward0>)
tensor(3.1782, grad_fn=<NllLossBackward0>)
tensor(4.13

tensor(1.7112, grad_fn=<NllLossBackward0>)
tensor(5.3421, grad_fn=<NllLossBackward0>)
tensor(1.7080, grad_fn=<NllLossBackward0>)
tensor(3.3888, grad_fn=<NllLossBackward0>)
tensor(2.1484, grad_fn=<NllLossBackward0>)
tensor(3.8262, grad_fn=<NllLossBackward0>)
tensor(3.7486, grad_fn=<NllLossBackward0>)
tensor(2.7939, grad_fn=<NllLossBackward0>)
tensor(2.9726, grad_fn=<NllLossBackward0>)
tensor(2.9558, grad_fn=<NllLossBackward0>)
tensor(3.1150, grad_fn=<NllLossBackward0>)
tensor(3.4874, grad_fn=<NllLossBackward0>)
tensor(3.5285, grad_fn=<NllLossBackward0>)
tensor(3.4499, grad_fn=<NllLossBackward0>)
tensor(3.9692, grad_fn=<NllLossBackward0>)
tensor(2.8258, grad_fn=<NllLossBackward0>)
tensor(2.9153, grad_fn=<NllLossBackward0>)
tensor(1.9949, grad_fn=<NllLossBackward0>)
tensor(4.9320, grad_fn=<NllLossBackward0>)
tensor(2.8185, grad_fn=<NllLossBackward0>)
tensor(1.9914, grad_fn=<NllLossBackward0>)
tensor(2.5912, grad_fn=<NllLossBackward0>)
tensor(2.7861, grad_fn=<NllLossBackward0>)
tensor(3.78

tensor(3.7397, grad_fn=<NllLossBackward0>)
tensor(1.9789, grad_fn=<NllLossBackward0>)
tensor(2.2900, grad_fn=<NllLossBackward0>)
tensor(2.3724, grad_fn=<NllLossBackward0>)
tensor(3.2078, grad_fn=<NllLossBackward0>)
tensor(3.0097, grad_fn=<NllLossBackward0>)
tensor(2.3047, grad_fn=<NllLossBackward0>)
tensor(2.3292, grad_fn=<NllLossBackward0>)
tensor(2.3452, grad_fn=<NllLossBackward0>)
tensor(2.6629, grad_fn=<NllLossBackward0>)
tensor(3.0185, grad_fn=<NllLossBackward0>)
tensor(3.1183, grad_fn=<NllLossBackward0>)
tensor(2.3038, grad_fn=<NllLossBackward0>)
tensor(2.0284, grad_fn=<NllLossBackward0>)
tensor(2.2059, grad_fn=<NllLossBackward0>)
tensor(3.4779, grad_fn=<NllLossBackward0>)
tensor(2.4558, grad_fn=<NllLossBackward0>)
tensor(3.0663, grad_fn=<NllLossBackward0>)
tensor(3.2138, grad_fn=<NllLossBackward0>)
tensor(2.7361, grad_fn=<NllLossBackward0>)
tensor(3.7243, grad_fn=<NllLossBackward0>)
tensor(3.6763, grad_fn=<NllLossBackward0>)
tensor(2.4454, grad_fn=<NllLossBackward0>)
tensor(2.42

tensor(2.2079, grad_fn=<NllLossBackward0>)
tensor(3.4779, grad_fn=<NllLossBackward0>)
tensor(3.2270, grad_fn=<NllLossBackward0>)
tensor(3.0316, grad_fn=<NllLossBackward0>)
tensor(2.8152, grad_fn=<NllLossBackward0>)
tensor(2.4550, grad_fn=<NllLossBackward0>)
tensor(3.5785, grad_fn=<NllLossBackward0>)
tensor(2.8709, grad_fn=<NllLossBackward0>)
tensor(3.0721, grad_fn=<NllLossBackward0>)
tensor(2.7095, grad_fn=<NllLossBackward0>)
tensor(2.4691, grad_fn=<NllLossBackward0>)
tensor(4.2014, grad_fn=<NllLossBackward0>)
tensor(4.3190, grad_fn=<NllLossBackward0>)
tensor(2.0262, grad_fn=<NllLossBackward0>)
tensor(1.9693, grad_fn=<NllLossBackward0>)
tensor(2.1582, grad_fn=<NllLossBackward0>)
tensor(2.8161, grad_fn=<NllLossBackward0>)
tensor(2.7868, grad_fn=<NllLossBackward0>)
tensor(2.6590, grad_fn=<NllLossBackward0>)
tensor(2.9830, grad_fn=<NllLossBackward0>)
tensor(2.2889, grad_fn=<NllLossBackward0>)
tensor(1.9602, grad_fn=<NllLossBackward0>)
tensor(4.6012, grad_fn=<NllLossBackward0>)
tensor(2.27

tensor(1.6719, grad_fn=<NllLossBackward0>)
tensor(3.0009, grad_fn=<NllLossBackward0>)
tensor(2.3180, grad_fn=<NllLossBackward0>)
tensor(2.6859, grad_fn=<NllLossBackward0>)
tensor(3.2731, grad_fn=<NllLossBackward0>)
tensor(4.0970, grad_fn=<NllLossBackward0>)
tensor(2.9530, grad_fn=<NllLossBackward0>)
tensor(2.1418, grad_fn=<NllLossBackward0>)
tensor(1.9588, grad_fn=<NllLossBackward0>)
tensor(3.2153, grad_fn=<NllLossBackward0>)
tensor(2.2973, grad_fn=<NllLossBackward0>)
tensor(3.9663, grad_fn=<NllLossBackward0>)
tensor(3.1194, grad_fn=<NllLossBackward0>)
tensor(3.5242, grad_fn=<NllLossBackward0>)
tensor(1.6673, grad_fn=<NllLossBackward0>)
tensor(2.5112, grad_fn=<NllLossBackward0>)
tensor(2.8296, grad_fn=<NllLossBackward0>)
tensor(3.5908, grad_fn=<NllLossBackward0>)
tensor(2.6827, grad_fn=<NllLossBackward0>)
tensor(3.6162, grad_fn=<NllLossBackward0>)
tensor(3.0034, grad_fn=<NllLossBackward0>)
tensor(2.5302, grad_fn=<NllLossBackward0>)
tensor(2.4252, grad_fn=<NllLossBackward0>)
tensor(2.52

tensor(3.9839, grad_fn=<NllLossBackward0>)
tensor(4.2251, grad_fn=<NllLossBackward0>)
tensor(2.4159, grad_fn=<NllLossBackward0>)
tensor(3.0123, grad_fn=<NllLossBackward0>)
tensor(4.0364, grad_fn=<NllLossBackward0>)
tensor(2.6412, grad_fn=<NllLossBackward0>)
tensor(3.3502, grad_fn=<NllLossBackward0>)
tensor(2.5214, grad_fn=<NllLossBackward0>)
tensor(2.0420, grad_fn=<NllLossBackward0>)
tensor(3.9781, grad_fn=<NllLossBackward0>)
tensor(2.4706, grad_fn=<NllLossBackward0>)
tensor(1.9734, grad_fn=<NllLossBackward0>)
tensor(3.2841, grad_fn=<NllLossBackward0>)
tensor(1.9622, grad_fn=<NllLossBackward0>)
tensor(2.8559, grad_fn=<NllLossBackward0>)
tensor(3.3940, grad_fn=<NllLossBackward0>)
tensor(2.6324, grad_fn=<NllLossBackward0>)
tensor(3.5411, grad_fn=<NllLossBackward0>)
tensor(2.4444, grad_fn=<NllLossBackward0>)
tensor(2.5289, grad_fn=<NllLossBackward0>)
tensor(3.8615, grad_fn=<NllLossBackward0>)
tensor(2.6439, grad_fn=<NllLossBackward0>)
tensor(2.4425, grad_fn=<NllLossBackward0>)
tensor(3.06

tensor(2.6806, grad_fn=<NllLossBackward0>)
tensor(3.9906, grad_fn=<NllLossBackward0>)
tensor(2.8514, grad_fn=<NllLossBackward0>)
tensor(3.5976, grad_fn=<NllLossBackward0>)
tensor(3.6428, grad_fn=<NllLossBackward0>)
tensor(2.8235, grad_fn=<NllLossBackward0>)
tensor(2.6283, grad_fn=<NllLossBackward0>)
tensor(2.9790, grad_fn=<NllLossBackward0>)
tensor(3.2524, grad_fn=<NllLossBackward0>)
tensor(2.9842, grad_fn=<NllLossBackward0>)
tensor(4.4747, grad_fn=<NllLossBackward0>)
tensor(2.2156, grad_fn=<NllLossBackward0>)
tensor(4.2665, grad_fn=<NllLossBackward0>)
tensor(2.1835, grad_fn=<NllLossBackward0>)
tensor(2.9209, grad_fn=<NllLossBackward0>)
tensor(2.2319, grad_fn=<NllLossBackward0>)
tensor(3.6739, grad_fn=<NllLossBackward0>)
tensor(4.0311, grad_fn=<NllLossBackward0>)
tensor(1.7028, grad_fn=<NllLossBackward0>)
tensor(3.6964, grad_fn=<NllLossBackward0>)
tensor(2.4209, grad_fn=<NllLossBackward0>)
tensor(3.9944, grad_fn=<NllLossBackward0>)
tensor(3.5971, grad_fn=<NllLossBackward0>)
tensor(3.68

tensor(3.3983, grad_fn=<NllLossBackward0>)
tensor(2.3606, grad_fn=<NllLossBackward0>)
tensor(4.0258, grad_fn=<NllLossBackward0>)
tensor(3.0508, grad_fn=<NllLossBackward0>)
tensor(1.6768, grad_fn=<NllLossBackward0>)
tensor(3.1773, grad_fn=<NllLossBackward0>)
tensor(3.2082, grad_fn=<NllLossBackward0>)
tensor(1.9468, grad_fn=<NllLossBackward0>)
tensor(1.9463, grad_fn=<NllLossBackward0>)
tensor(2.2072, grad_fn=<NllLossBackward0>)
tensor(4.3664, grad_fn=<NllLossBackward0>)
tensor(2.6122, grad_fn=<NllLossBackward0>)
tensor(3.4684, grad_fn=<NllLossBackward0>)
tensor(1.9476, grad_fn=<NllLossBackward0>)
tensor(2.4255, grad_fn=<NllLossBackward0>)
tensor(2.8766, grad_fn=<NllLossBackward0>)
tensor(2.7095, grad_fn=<NllLossBackward0>)
tensor(2.6923, grad_fn=<NllLossBackward0>)
tensor(4.4710, grad_fn=<NllLossBackward0>)
tensor(3.0137, grad_fn=<NllLossBackward0>)
tensor(2.4911, grad_fn=<NllLossBackward0>)
tensor(1.9539, grad_fn=<NllLossBackward0>)
tensor(4.1162, grad_fn=<NllLossBackward0>)
tensor(3.09

tensor(2.3252, grad_fn=<NllLossBackward0>)
tensor(3.4552, grad_fn=<NllLossBackward0>)
tensor(2.3540, grad_fn=<NllLossBackward0>)
tensor(2.4926, grad_fn=<NllLossBackward0>)
tensor(2.7325, grad_fn=<NllLossBackward0>)
tensor(2.3746, grad_fn=<NllLossBackward0>)
tensor(2.9210, grad_fn=<NllLossBackward0>)
tensor(3.5785, grad_fn=<NllLossBackward0>)
tensor(3.6231, grad_fn=<NllLossBackward0>)
tensor(3.7501, grad_fn=<NllLossBackward0>)
tensor(2.5042, grad_fn=<NllLossBackward0>)
tensor(4.2919, grad_fn=<NllLossBackward0>)
tensor(3.1385, grad_fn=<NllLossBackward0>)
tensor(2.7130, grad_fn=<NllLossBackward0>)
tensor(3.9577, grad_fn=<NllLossBackward0>)
tensor(2.4313, grad_fn=<NllLossBackward0>)
tensor(4.3068, grad_fn=<NllLossBackward0>)
tensor(3.4600, grad_fn=<NllLossBackward0>)
tensor(1.6947, grad_fn=<NllLossBackward0>)
tensor(1.9634, grad_fn=<NllLossBackward0>)
tensor(3.0536, grad_fn=<NllLossBackward0>)
tensor(1.9758, grad_fn=<NllLossBackward0>)
tensor(3.4745, grad_fn=<NllLossBackward0>)
tensor(3.58

tensor(2.3662, grad_fn=<NllLossBackward0>)
tensor(2.6988, grad_fn=<NllLossBackward0>)
tensor(3.4125, grad_fn=<NllLossBackward0>)
tensor(2.3749, grad_fn=<NllLossBackward0>)
tensor(2.8036, grad_fn=<NllLossBackward0>)
tensor(3.0894, grad_fn=<NllLossBackward0>)
tensor(2.4932, grad_fn=<NllLossBackward0>)
tensor(3.3318, grad_fn=<NllLossBackward0>)
tensor(2.5905, grad_fn=<NllLossBackward0>)
tensor(4.3348, grad_fn=<NllLossBackward0>)
tensor(2.9763, grad_fn=<NllLossBackward0>)
tensor(2.6189, grad_fn=<NllLossBackward0>)
tensor(4.3058, grad_fn=<NllLossBackward0>)
tensor(2.4917, grad_fn=<NllLossBackward0>)
tensor(2.2109, grad_fn=<NllLossBackward0>)
tensor(2.0638, grad_fn=<NllLossBackward0>)
tensor(2.8024, grad_fn=<NllLossBackward0>)
tensor(2.5932, grad_fn=<NllLossBackward0>)
tensor(2.8572, grad_fn=<NllLossBackward0>)
tensor(3.1313, grad_fn=<NllLossBackward0>)
tensor(2.8242, grad_fn=<NllLossBackward0>)
tensor(3.4446, grad_fn=<NllLossBackward0>)
tensor(2.4362, grad_fn=<NllLossBackward0>)
tensor(4.38

tensor(2.8825, grad_fn=<NllLossBackward0>)
tensor(4.0999, grad_fn=<NllLossBackward0>)
tensor(2.5725, grad_fn=<NllLossBackward0>)
tensor(2.9467, grad_fn=<NllLossBackward0>)
tensor(2.5616, grad_fn=<NllLossBackward0>)
tensor(2.8962, grad_fn=<NllLossBackward0>)
tensor(2.9719, grad_fn=<NllLossBackward0>)
tensor(3.7229, grad_fn=<NllLossBackward0>)
tensor(1.9958, grad_fn=<NllLossBackward0>)
tensor(2.4794, grad_fn=<NllLossBackward0>)
tensor(3.9705, grad_fn=<NllLossBackward0>)
tensor(2.8995, grad_fn=<NllLossBackward0>)
tensor(1.7504, grad_fn=<NllLossBackward0>)
tensor(2.8967, grad_fn=<NllLossBackward0>)
tensor(2.3874, grad_fn=<NllLossBackward0>)
tensor(2.4413, grad_fn=<NllLossBackward0>)
tensor(2.3218, grad_fn=<NllLossBackward0>)
tensor(3.5386, grad_fn=<NllLossBackward0>)
tensor(3.1677, grad_fn=<NllLossBackward0>)
tensor(2.7130, grad_fn=<NllLossBackward0>)
tensor(2.5064, grad_fn=<NllLossBackward0>)
tensor(3.0869, grad_fn=<NllLossBackward0>)
tensor(3.0251, grad_fn=<NllLossBackward0>)
tensor(2.99

tensor(2.2638, grad_fn=<NllLossBackward0>)
tensor(3.1908, grad_fn=<NllLossBackward0>)
tensor(3.0335, grad_fn=<NllLossBackward0>)
tensor(2.5433, grad_fn=<NllLossBackward0>)
tensor(2.4757, grad_fn=<NllLossBackward0>)
tensor(3.4190, grad_fn=<NllLossBackward0>)
tensor(2.4473, grad_fn=<NllLossBackward0>)
tensor(4.6318, grad_fn=<NllLossBackward0>)
tensor(2.8492, grad_fn=<NllLossBackward0>)
tensor(3.3942, grad_fn=<NllLossBackward0>)
tensor(2.4563, grad_fn=<NllLossBackward0>)
tensor(3.2314, grad_fn=<NllLossBackward0>)
tensor(2.2681, grad_fn=<NllLossBackward0>)
tensor(2.0500, grad_fn=<NllLossBackward0>)
tensor(2.8703, grad_fn=<NllLossBackward0>)
tensor(3.1299, grad_fn=<NllLossBackward0>)
tensor(3.0832, grad_fn=<NllLossBackward0>)
tensor(2.8105, grad_fn=<NllLossBackward0>)
tensor(3.1107, grad_fn=<NllLossBackward0>)
tensor(2.6310, grad_fn=<NllLossBackward0>)
tensor(2.6380, grad_fn=<NllLossBackward0>)
tensor(1.7583, grad_fn=<NllLossBackward0>)
tensor(4.3958, grad_fn=<NllLossBackward0>)
tensor(3.20

tensor(2.0123, grad_fn=<NllLossBackward0>)
tensor(3.8651, grad_fn=<NllLossBackward0>)
tensor(3.1187, grad_fn=<NllLossBackward0>)
tensor(2.6946, grad_fn=<NllLossBackward0>)
tensor(2.6338, grad_fn=<NllLossBackward0>)
tensor(3.1690, grad_fn=<NllLossBackward0>)
tensor(3.1577, grad_fn=<NllLossBackward0>)
tensor(3.1755, grad_fn=<NllLossBackward0>)
tensor(2.8992, grad_fn=<NllLossBackward0>)
tensor(2.6194, grad_fn=<NllLossBackward0>)
tensor(3.2445, grad_fn=<NllLossBackward0>)
tensor(3.3503, grad_fn=<NllLossBackward0>)
tensor(3.1878, grad_fn=<NllLossBackward0>)
tensor(3.0284, grad_fn=<NllLossBackward0>)
tensor(2.5901, grad_fn=<NllLossBackward0>)
tensor(2.7255, grad_fn=<NllLossBackward0>)
tensor(4.1459, grad_fn=<NllLossBackward0>)
tensor(2.4552, grad_fn=<NllLossBackward0>)
tensor(2.2669, grad_fn=<NllLossBackward0>)
tensor(3.4408, grad_fn=<NllLossBackward0>)
tensor(2.0490, grad_fn=<NllLossBackward0>)
tensor(2.4816, grad_fn=<NllLossBackward0>)
tensor(3.9872, grad_fn=<NllLossBackward0>)
tensor(2.68

tensor(4.0165, grad_fn=<NllLossBackward0>)
tensor(2.4614, grad_fn=<NllLossBackward0>)
tensor(3.2000, grad_fn=<NllLossBackward0>)
tensor(2.9263, grad_fn=<NllLossBackward0>)
tensor(4.5040, grad_fn=<NllLossBackward0>)
tensor(2.3793, grad_fn=<NllLossBackward0>)
tensor(2.6811, grad_fn=<NllLossBackward0>)
tensor(2.6235, grad_fn=<NllLossBackward0>)
tensor(3.4351, grad_fn=<NllLossBackward0>)
tensor(2.7931, grad_fn=<NllLossBackward0>)
tensor(2.2233, grad_fn=<NllLossBackward0>)
tensor(1.7357, grad_fn=<NllLossBackward0>)
tensor(3.6425, grad_fn=<NllLossBackward0>)
tensor(3.8288, grad_fn=<NllLossBackward0>)
tensor(2.3783, grad_fn=<NllLossBackward0>)
tensor(3.6979, grad_fn=<NllLossBackward0>)
tensor(3.6380, grad_fn=<NllLossBackward0>)
tensor(2.9203, grad_fn=<NllLossBackward0>)
tensor(2.3899, grad_fn=<NllLossBackward0>)
tensor(2.4998, grad_fn=<NllLossBackward0>)
tensor(4.0416, grad_fn=<NllLossBackward0>)
tensor(2.3483, grad_fn=<NllLossBackward0>)
tensor(1.9878, grad_fn=<NllLossBackward0>)
tensor(3.57

tensor(2.3825, grad_fn=<NllLossBackward0>)
tensor(2.4299, grad_fn=<NllLossBackward0>)
tensor(3.0730, grad_fn=<NllLossBackward0>)
tensor(3.4648, grad_fn=<NllLossBackward0>)
tensor(3.8483, grad_fn=<NllLossBackward0>)
tensor(2.8797, grad_fn=<NllLossBackward0>)
tensor(3.0025, grad_fn=<NllLossBackward0>)
tensor(3.0205, grad_fn=<NllLossBackward0>)
tensor(3.3388, grad_fn=<NllLossBackward0>)
tensor(2.9128, grad_fn=<NllLossBackward0>)
tensor(2.6843, grad_fn=<NllLossBackward0>)
tensor(3.6583, grad_fn=<NllLossBackward0>)
tensor(2.8593, grad_fn=<NllLossBackward0>)
tensor(3.7877, grad_fn=<NllLossBackward0>)
tensor(2.7564, grad_fn=<NllLossBackward0>)
tensor(3.5033, grad_fn=<NllLossBackward0>)
tensor(3.0963, grad_fn=<NllLossBackward0>)
tensor(1.7242, grad_fn=<NllLossBackward0>)
tensor(1.7203, grad_fn=<NllLossBackward0>)
tensor(2.6679, grad_fn=<NllLossBackward0>)
tensor(3.4754, grad_fn=<NllLossBackward0>)
tensor(2.2991, grad_fn=<NllLossBackward0>)
tensor(4.0042, grad_fn=<NllLossBackward0>)
tensor(2.50

tensor(2.8968, grad_fn=<NllLossBackward0>)
tensor(2.3430, grad_fn=<NllLossBackward0>)
tensor(2.6497, grad_fn=<NllLossBackward0>)
tensor(2.2253, grad_fn=<NllLossBackward0>)
tensor(2.1950, grad_fn=<NllLossBackward0>)
tensor(3.6056, grad_fn=<NllLossBackward0>)
tensor(3.7597, grad_fn=<NllLossBackward0>)
tensor(3.9929, grad_fn=<NllLossBackward0>)
tensor(3.1671, grad_fn=<NllLossBackward0>)
tensor(3.7483, grad_fn=<NllLossBackward0>)
tensor(2.8220, grad_fn=<NllLossBackward0>)
tensor(2.0244, grad_fn=<NllLossBackward0>)
tensor(3.4698, grad_fn=<NllLossBackward0>)
tensor(4.0306, grad_fn=<NllLossBackward0>)
tensor(2.4957, grad_fn=<NllLossBackward0>)
tensor(3.8431, grad_fn=<NllLossBackward0>)
tensor(2.3667, grad_fn=<NllLossBackward0>)
tensor(2.7647, grad_fn=<NllLossBackward0>)
tensor(3.9895, grad_fn=<NllLossBackward0>)
tensor(1.9810, grad_fn=<NllLossBackward0>)
tensor(2.9635, grad_fn=<NllLossBackward0>)
tensor(2.7773, grad_fn=<NllLossBackward0>)
tensor(2.2073, grad_fn=<NllLossBackward0>)
tensor(3.37

tensor(2.3893, grad_fn=<NllLossBackward0>)
tensor(2.6597, grad_fn=<NllLossBackward0>)
tensor(4.4986, grad_fn=<NllLossBackward0>)
tensor(2.3396, grad_fn=<NllLossBackward0>)
tensor(2.7584, grad_fn=<NllLossBackward0>)
tensor(3.1102, grad_fn=<NllLossBackward0>)
tensor(3.3134, grad_fn=<NllLossBackward0>)
tensor(3.1187, grad_fn=<NllLossBackward0>)
tensor(2.0148, grad_fn=<NllLossBackward0>)
tensor(1.9749, grad_fn=<NllLossBackward0>)
tensor(2.9793, grad_fn=<NllLossBackward0>)
tensor(3.1956, grad_fn=<NllLossBackward0>)
tensor(3.4985, grad_fn=<NllLossBackward0>)
tensor(3.1907, grad_fn=<NllLossBackward0>)
tensor(2.4952, grad_fn=<NllLossBackward0>)
tensor(4.1016, grad_fn=<NllLossBackward0>)
tensor(2.7343, grad_fn=<NllLossBackward0>)
tensor(2.7497, grad_fn=<NllLossBackward0>)
tensor(3.1512, grad_fn=<NllLossBackward0>)
tensor(2.7269, grad_fn=<NllLossBackward0>)
tensor(3.2586, grad_fn=<NllLossBackward0>)
tensor(2.1764, grad_fn=<NllLossBackward0>)
tensor(1.6854, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(3.5386, grad_fn=<NllLossBackward0>)
tensor(3.6469, grad_fn=<NllLossBackward0>)
tensor(2.0098, grad_fn=<NllLossBackward0>)
tensor(2.0077, grad_fn=<NllLossBackward0>)
tensor(4.2004, grad_fn=<NllLossBackward0>)
tensor(3.5163, grad_fn=<NllLossBackward0>)
tensor(2.3238, grad_fn=<NllLossBackward0>)
tensor(3.2156, grad_fn=<NllLossBackward0>)
tensor(2.9949, grad_fn=<NllLossBackward0>)
tensor(2.6306, grad_fn=<NllLossBackward0>)
tensor(3.0281, grad_fn=<NllLossBackward0>)
tensor(3.4398, grad_fn=<NllLossBackward0>)
tensor(2.8874, grad_fn=<NllLossBackward0>)
tensor(1.7074, grad_fn=<NllLossBackward0>)
tensor(2.6475, grad_fn=<NllLossBackward0>)
tensor(3.1770, grad_fn=<NllLossBackward0>)
tensor(2.0159, grad_fn=<NllLossBackward0>)
tensor(2.6330, grad_fn=<NllLossBackward0>)
tensor(1.7044, grad_fn=<NllLossBackward0>)
tensor(2.9832, grad_fn=<NllLossBackward0>)
tensor(1.7011, grad_fn=<NllLossBackward0>)
tensor(3.7337, grad_fn=<NllLossBackward0>)
tensor(3.2733, grad_fn=<NllLossBackward0>)
tensor(3.25

tensor(2.7226, grad_fn=<NllLossBackward0>)
tensor(2.9158, grad_fn=<NllLossBackward0>)
tensor(3.0695, grad_fn=<NllLossBackward0>)
tensor(2.1815, grad_fn=<NllLossBackward0>)
tensor(3.0760, grad_fn=<NllLossBackward0>)
tensor(2.3339, grad_fn=<NllLossBackward0>)
tensor(2.4232, grad_fn=<NllLossBackward0>)
tensor(3.4749, grad_fn=<NllLossBackward0>)
tensor(2.6212, grad_fn=<NllLossBackward0>)
tensor(2.9006, grad_fn=<NllLossBackward0>)
tensor(3.3286, grad_fn=<NllLossBackward0>)
tensor(1.9903, grad_fn=<NllLossBackward0>)
tensor(3.0892, grad_fn=<NllLossBackward0>)
tensor(2.6292, grad_fn=<NllLossBackward0>)
tensor(3.0123, grad_fn=<NllLossBackward0>)
tensor(1.9833, grad_fn=<NllLossBackward0>)
tensor(3.1707, grad_fn=<NllLossBackward0>)
tensor(3.6254, grad_fn=<NllLossBackward0>)
tensor(2.6403, grad_fn=<NllLossBackward0>)
tensor(2.6276, grad_fn=<NllLossBackward0>)
tensor(2.4360, grad_fn=<NllLossBackward0>)
tensor(2.8269, grad_fn=<NllLossBackward0>)
tensor(2.1763, grad_fn=<NllLossBackward0>)
tensor(1.99

tensor(2.2302, grad_fn=<NllLossBackward0>)
tensor(3.0157, grad_fn=<NllLossBackward0>)
tensor(3.4603, grad_fn=<NllLossBackward0>)
tensor(3.1350, grad_fn=<NllLossBackward0>)
tensor(2.9640, grad_fn=<NllLossBackward0>)
tensor(3.5954, grad_fn=<NllLossBackward0>)
tensor(3.6589, grad_fn=<NllLossBackward0>)
tensor(1.6828, grad_fn=<NllLossBackward0>)
tensor(3.9641, grad_fn=<NllLossBackward0>)
tensor(2.5341, grad_fn=<NllLossBackward0>)
tensor(3.0775, grad_fn=<NllLossBackward0>)
tensor(2.7195, grad_fn=<NllLossBackward0>)
tensor(2.3544, grad_fn=<NllLossBackward0>)
tensor(2.4530, grad_fn=<NllLossBackward0>)
tensor(3.0801, grad_fn=<NllLossBackward0>)
tensor(1.9741, grad_fn=<NllLossBackward0>)
tensor(2.1883, grad_fn=<NllLossBackward0>)
tensor(3.0362, grad_fn=<NllLossBackward0>)
tensor(3.0263, grad_fn=<NllLossBackward0>)
tensor(2.6731, grad_fn=<NllLossBackward0>)
tensor(2.3186, grad_fn=<NllLossBackward0>)
tensor(2.4079, grad_fn=<NllLossBackward0>)
tensor(1.6795, grad_fn=<NllLossBackward0>)
tensor(3.53

tensor(2.4377, grad_fn=<NllLossBackward0>)
tensor(3.7079, grad_fn=<NllLossBackward0>)
tensor(2.3360, grad_fn=<NllLossBackward0>)
tensor(5.3377, grad_fn=<NllLossBackward0>)
tensor(3.4138, grad_fn=<NllLossBackward0>)
tensor(1.9802, grad_fn=<NllLossBackward0>)
tensor(2.4832, grad_fn=<NllLossBackward0>)
tensor(2.3280, grad_fn=<NllLossBackward0>)
tensor(3.4966, grad_fn=<NllLossBackward0>)
tensor(2.6391, grad_fn=<NllLossBackward0>)
tensor(3.0632, grad_fn=<NllLossBackward0>)
tensor(3.6710, grad_fn=<NllLossBackward0>)
tensor(3.2015, grad_fn=<NllLossBackward0>)
tensor(3.7730, grad_fn=<NllLossBackward0>)
tensor(3.5360, grad_fn=<NllLossBackward0>)
tensor(4.0140, grad_fn=<NllLossBackward0>)
tensor(2.3233, grad_fn=<NllLossBackward0>)
tensor(1.7241, grad_fn=<NllLossBackward0>)
tensor(2.9593, grad_fn=<NllLossBackward0>)
tensor(2.5331, grad_fn=<NllLossBackward0>)
tensor(2.2802, grad_fn=<NllLossBackward0>)
tensor(3.0795, grad_fn=<NllLossBackward0>)
tensor(2.8496, grad_fn=<NllLossBackward0>)
tensor(2.90

tensor(3.1496, grad_fn=<NllLossBackward0>)
tensor(3.0944, grad_fn=<NllLossBackward0>)
tensor(2.4764, grad_fn=<NllLossBackward0>)
tensor(2.2519, grad_fn=<NllLossBackward0>)
tensor(3.0958, grad_fn=<NllLossBackward0>)
tensor(2.6126, grad_fn=<NllLossBackward0>)
tensor(2.4889, grad_fn=<NllLossBackward0>)
tensor(2.9280, grad_fn=<NllLossBackward0>)
tensor(2.8918, grad_fn=<NllLossBackward0>)
tensor(3.0128, grad_fn=<NllLossBackward0>)
tensor(3.4737, grad_fn=<NllLossBackward0>)
tensor(2.0226, grad_fn=<NllLossBackward0>)
tensor(2.3700, grad_fn=<NllLossBackward0>)
tensor(2.2491, grad_fn=<NllLossBackward0>)
tensor(2.9262, grad_fn=<NllLossBackward0>)
tensor(2.3513, grad_fn=<NllLossBackward0>)
tensor(2.9335, grad_fn=<NllLossBackward0>)
tensor(2.8273, grad_fn=<NllLossBackward0>)
tensor(2.7166, grad_fn=<NllLossBackward0>)
tensor(3.9402, grad_fn=<NllLossBackward0>)
tensor(2.4611, grad_fn=<NllLossBackward0>)
tensor(2.9201, grad_fn=<NllLossBackward0>)
tensor(2.4663, grad_fn=<NllLossBackward0>)
tensor(2.18

tensor(2.7781, grad_fn=<NllLossBackward0>)
tensor(3.6691, grad_fn=<NllLossBackward0>)
tensor(2.5332, grad_fn=<NllLossBackward0>)
tensor(2.1840, grad_fn=<NllLossBackward0>)
tensor(3.3227, grad_fn=<NllLossBackward0>)
tensor(2.1821, grad_fn=<NllLossBackward0>)
tensor(2.7317, grad_fn=<NllLossBackward0>)
tensor(3.4547, grad_fn=<NllLossBackward0>)
tensor(2.6775, grad_fn=<NllLossBackward0>)
tensor(2.3154, grad_fn=<NllLossBackward0>)
tensor(2.6288, grad_fn=<NllLossBackward0>)
tensor(2.4717, grad_fn=<NllLossBackward0>)
tensor(3.1432, grad_fn=<NllLossBackward0>)
tensor(3.2992, grad_fn=<NllLossBackward0>)
tensor(2.3498, grad_fn=<NllLossBackward0>)
tensor(1.7453, grad_fn=<NllLossBackward0>)
tensor(3.0428, grad_fn=<NllLossBackward0>)
tensor(2.0061, grad_fn=<NllLossBackward0>)
tensor(2.9155, grad_fn=<NllLossBackward0>)
tensor(2.2853, grad_fn=<NllLossBackward0>)
tensor(2.6295, grad_fn=<NllLossBackward0>)
tensor(2.0199, grad_fn=<NllLossBackward0>)
tensor(2.5949, grad_fn=<NllLossBackward0>)
tensor(2.95

tensor(3.1788, grad_fn=<NllLossBackward0>)
tensor(3.8804, grad_fn=<NllLossBackward0>)
tensor(3.1749, grad_fn=<NllLossBackward0>)
tensor(3.3794, grad_fn=<NllLossBackward0>)
tensor(3.4490, grad_fn=<NllLossBackward0>)
tensor(2.3726, grad_fn=<NllLossBackward0>)
tensor(2.6297, grad_fn=<NllLossBackward0>)
tensor(2.7126, grad_fn=<NllLossBackward0>)
tensor(3.1349, grad_fn=<NllLossBackward0>)
tensor(2.7782, grad_fn=<NllLossBackward0>)
tensor(4.1317, grad_fn=<NllLossBackward0>)
tensor(2.2505, grad_fn=<NllLossBackward0>)
tensor(2.3993, grad_fn=<NllLossBackward0>)
tensor(2.4191, grad_fn=<NllLossBackward0>)
tensor(1.7212, grad_fn=<NllLossBackward0>)
tensor(3.0614, grad_fn=<NllLossBackward0>)
tensor(2.5747, grad_fn=<NllLossBackward0>)
tensor(2.1956, grad_fn=<NllLossBackward0>)
tensor(2.4253, grad_fn=<NllLossBackward0>)
tensor(2.8775, grad_fn=<NllLossBackward0>)
tensor(2.8076, grad_fn=<NllLossBackward0>)
tensor(2.6639, grad_fn=<NllLossBackward0>)
tensor(2.6437, grad_fn=<NllLossBackward0>)
tensor(3.66

tensor(2.6015, grad_fn=<NllLossBackward0>)
tensor(2.4397, grad_fn=<NllLossBackward0>)
tensor(2.8490, grad_fn=<NllLossBackward0>)
tensor(2.3979, grad_fn=<NllLossBackward0>)
tensor(3.5220, grad_fn=<NllLossBackward0>)
tensor(2.9286, grad_fn=<NllLossBackward0>)
tensor(4.0472, grad_fn=<NllLossBackward0>)
tensor(2.8074, grad_fn=<NllLossBackward0>)
tensor(2.2076, grad_fn=<NllLossBackward0>)
tensor(2.4556, grad_fn=<NllLossBackward0>)
tensor(1.9964, grad_fn=<NllLossBackward0>)
tensor(2.4681, grad_fn=<NllLossBackward0>)
tensor(2.9819, grad_fn=<NllLossBackward0>)
tensor(2.9170, grad_fn=<NllLossBackward0>)
tensor(2.2794, grad_fn=<NllLossBackward0>)
tensor(2.9069, grad_fn=<NllLossBackward0>)
tensor(3.8573, grad_fn=<NllLossBackward0>)
tensor(2.3569, grad_fn=<NllLossBackward0>)
tensor(2.1967, grad_fn=<NllLossBackward0>)
tensor(2.4510, grad_fn=<NllLossBackward0>)
tensor(2.2103, grad_fn=<NllLossBackward0>)
tensor(1.9884, grad_fn=<NllLossBackward0>)
tensor(3.4382, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(2.3908, grad_fn=<NllLossBackward0>)
tensor(3.8628, grad_fn=<NllLossBackward0>)
tensor(2.9246, grad_fn=<NllLossBackward0>)
tensor(2.0390, grad_fn=<NllLossBackward0>)
tensor(3.6278, grad_fn=<NllLossBackward0>)
tensor(2.6852, grad_fn=<NllLossBackward0>)
tensor(2.0414, grad_fn=<NllLossBackward0>)
tensor(3.7350, grad_fn=<NllLossBackward0>)
tensor(2.7573, grad_fn=<NllLossBackward0>)
tensor(2.5978, grad_fn=<NllLossBackward0>)
tensor(3.2421, grad_fn=<NllLossBackward0>)
tensor(2.1596, grad_fn=<NllLossBackward0>)
tensor(2.6353, grad_fn=<NllLossBackward0>)
tensor(1.9966, grad_fn=<NllLossBackward0>)
tensor(2.6375, grad_fn=<NllLossBackward0>)
tensor(2.4020, grad_fn=<NllLossBackward0>)
tensor(2.0017, grad_fn=<NllLossBackward0>)
tensor(2.3155, grad_fn=<NllLossBackward0>)
tensor(2.3761, grad_fn=<NllLossBackward0>)
tensor(3.1555, grad_fn=<NllLossBackward0>)
tensor(3.7327, grad_fn=<NllLossBackward0>)
tensor(1.9838, grad_fn=<NllLossBackward0>)
tensor(2.4896, grad_fn=<NllLossBackward0>)
tensor(3.07

tensor(2.2904, grad_fn=<NllLossBackward0>)
tensor(2.4876, grad_fn=<NllLossBackward0>)
tensor(1.9675, grad_fn=<NllLossBackward0>)
tensor(2.6806, grad_fn=<NllLossBackward0>)
tensor(3.0556, grad_fn=<NllLossBackward0>)
tensor(3.6649, grad_fn=<NllLossBackward0>)
tensor(3.2129, grad_fn=<NllLossBackward0>)
tensor(4.1318, grad_fn=<NllLossBackward0>)
tensor(2.5657, grad_fn=<NllLossBackward0>)
tensor(2.7354, grad_fn=<NllLossBackward0>)
tensor(2.0122, grad_fn=<NllLossBackward0>)
tensor(2.2436, grad_fn=<NllLossBackward0>)
tensor(2.6802, grad_fn=<NllLossBackward0>)
tensor(2.6800, grad_fn=<NllLossBackward0>)
tensor(2.9128, grad_fn=<NllLossBackward0>)
tensor(2.2169, grad_fn=<NllLossBackward0>)
tensor(2.7464, grad_fn=<NllLossBackward0>)
tensor(3.2339, grad_fn=<NllLossBackward0>)
tensor(3.0076, grad_fn=<NllLossBackward0>)
tensor(3.3031, grad_fn=<NllLossBackward0>)
tensor(2.7117, grad_fn=<NllLossBackward0>)
tensor(2.3934, grad_fn=<NllLossBackward0>)
tensor(2.4504, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(3.2012, grad_fn=<NllLossBackward0>)
tensor(2.9181, grad_fn=<NllLossBackward0>)
tensor(2.7617, grad_fn=<NllLossBackward0>)
tensor(3.3596, grad_fn=<NllLossBackward0>)
tensor(1.9680, grad_fn=<NllLossBackward0>)
tensor(4.2304, grad_fn=<NllLossBackward0>)
tensor(2.6666, grad_fn=<NllLossBackward0>)
tensor(2.7368, grad_fn=<NllLossBackward0>)
tensor(3.8944, grad_fn=<NllLossBackward0>)
tensor(2.9545, grad_fn=<NllLossBackward0>)
tensor(2.1743, grad_fn=<NllLossBackward0>)
tensor(2.4719, grad_fn=<NllLossBackward0>)
tensor(3.6748, grad_fn=<NllLossBackward0>)
tensor(2.1800, grad_fn=<NllLossBackward0>)
tensor(2.2939, grad_fn=<NllLossBackward0>)
tensor(2.8222, grad_fn=<NllLossBackward0>)
tensor(4.1650, grad_fn=<NllLossBackward0>)
tensor(2.2351, grad_fn=<NllLossBackward0>)
tensor(2.6803, grad_fn=<NllLossBackward0>)
tensor(2.7004, grad_fn=<NllLossBackward0>)
tensor(3.4792, grad_fn=<NllLossBackward0>)
tensor(2.2658, grad_fn=<NllLossBackward0>)
tensor(3.6329, grad_fn=<NllLossBackward0>)
tensor(3.29

tensor(3.6238, grad_fn=<NllLossBackward0>)
tensor(2.3398, grad_fn=<NllLossBackward0>)
tensor(3.2226, grad_fn=<NllLossBackward0>)
tensor(3.0691, grad_fn=<NllLossBackward0>)
tensor(2.0297, grad_fn=<NllLossBackward0>)
tensor(4.2987, grad_fn=<NllLossBackward0>)
tensor(2.6223, grad_fn=<NllLossBackward0>)
tensor(3.4117, grad_fn=<NllLossBackward0>)
tensor(3.2803, grad_fn=<NllLossBackward0>)
tensor(2.9887, grad_fn=<NllLossBackward0>)
tensor(3.8145, grad_fn=<NllLossBackward0>)
tensor(2.3132, grad_fn=<NllLossBackward0>)
tensor(2.3019, grad_fn=<NllLossBackward0>)
tensor(2.1504, grad_fn=<NllLossBackward0>)
tensor(2.7352, grad_fn=<NllLossBackward0>)
tensor(2.4241, grad_fn=<NllLossBackward0>)
tensor(3.5795, grad_fn=<NllLossBackward0>)
tensor(3.4649, grad_fn=<NllLossBackward0>)
tensor(3.5498, grad_fn=<NllLossBackward0>)
tensor(3.3769, grad_fn=<NllLossBackward0>)
tensor(3.4204, grad_fn=<NllLossBackward0>)
tensor(2.2433, grad_fn=<NllLossBackward0>)
tensor(2.4961, grad_fn=<NllLossBackward0>)
tensor(4.07

tensor(2.6060, grad_fn=<NllLossBackward0>)
tensor(4.0979, grad_fn=<NllLossBackward0>)
tensor(1.7018, grad_fn=<NllLossBackward0>)
tensor(2.4846, grad_fn=<NllLossBackward0>)
tensor(3.5478, grad_fn=<NllLossBackward0>)
tensor(2.8495, grad_fn=<NllLossBackward0>)
tensor(4.6808, grad_fn=<NllLossBackward0>)
tensor(2.3385, grad_fn=<NllLossBackward0>)
tensor(2.6880, grad_fn=<NllLossBackward0>)
tensor(3.5381, grad_fn=<NllLossBackward0>)
tensor(2.6871, grad_fn=<NllLossBackward0>)
tensor(1.9623, grad_fn=<NllLossBackward0>)
tensor(2.3703, grad_fn=<NllLossBackward0>)
tensor(2.3282, grad_fn=<NllLossBackward0>)
tensor(1.9568, grad_fn=<NllLossBackward0>)
tensor(2.9190, grad_fn=<NllLossBackward0>)
tensor(2.9170, grad_fn=<NllLossBackward0>)
tensor(2.8243, grad_fn=<NllLossBackward0>)
tensor(3.4971, grad_fn=<NllLossBackward0>)
tensor(1.9686, grad_fn=<NllLossBackward0>)
tensor(1.6938, grad_fn=<NllLossBackward0>)
tensor(3.2113, grad_fn=<NllLossBackward0>)
tensor(1.6901, grad_fn=<NllLossBackward0>)
tensor(2.82

tensor(3.1939, grad_fn=<NllLossBackward0>)
tensor(3.0511, grad_fn=<NllLossBackward0>)
tensor(2.5034, grad_fn=<NllLossBackward0>)
tensor(2.6351, grad_fn=<NllLossBackward0>)
tensor(1.7143, grad_fn=<NllLossBackward0>)
tensor(3.3931, grad_fn=<NllLossBackward0>)
tensor(2.3288, grad_fn=<NllLossBackward0>)
tensor(2.6611, grad_fn=<NllLossBackward0>)
tensor(2.4017, grad_fn=<NllLossBackward0>)
tensor(2.8183, grad_fn=<NllLossBackward0>)
tensor(2.6722, grad_fn=<NllLossBackward0>)
tensor(3.5423, grad_fn=<NllLossBackward0>)
tensor(3.5963, grad_fn=<NllLossBackward0>)
tensor(3.1086, grad_fn=<NllLossBackward0>)
tensor(2.8319, grad_fn=<NllLossBackward0>)
tensor(3.3684, grad_fn=<NllLossBackward0>)
tensor(2.6598, grad_fn=<NllLossBackward0>)
tensor(3.1544, grad_fn=<NllLossBackward0>)
tensor(3.4719, grad_fn=<NllLossBackward0>)
tensor(2.8480, grad_fn=<NllLossBackward0>)
tensor(2.1826, grad_fn=<NllLossBackward0>)
tensor(2.3965, grad_fn=<NllLossBackward0>)
tensor(3.9225, grad_fn=<NllLossBackward0>)
tensor(2.75

tensor(3.1592, grad_fn=<NllLossBackward0>)
tensor(2.3762, grad_fn=<NllLossBackward0>)
tensor(2.4141, grad_fn=<NllLossBackward0>)
tensor(1.7089, grad_fn=<NllLossBackward0>)
tensor(2.5790, grad_fn=<NllLossBackward0>)
tensor(3.1224, grad_fn=<NllLossBackward0>)
tensor(3.5763, grad_fn=<NllLossBackward0>)
tensor(1.9585, grad_fn=<NllLossBackward0>)
tensor(2.3957, grad_fn=<NllLossBackward0>)
tensor(3.5555, grad_fn=<NllLossBackward0>)
tensor(3.0357, grad_fn=<NllLossBackward0>)
tensor(3.0069, grad_fn=<NllLossBackward0>)
tensor(3.0731, grad_fn=<NllLossBackward0>)
tensor(3.6729, grad_fn=<NllLossBackward0>)
tensor(3.0934, grad_fn=<NllLossBackward0>)
tensor(3.5924, grad_fn=<NllLossBackward0>)
tensor(2.8947, grad_fn=<NllLossBackward0>)
tensor(2.3937, grad_fn=<NllLossBackward0>)
tensor(2.9829, grad_fn=<NllLossBackward0>)
tensor(3.7518, grad_fn=<NllLossBackward0>)
tensor(2.2262, grad_fn=<NllLossBackward0>)
tensor(2.2056, grad_fn=<NllLossBackward0>)
tensor(2.3964, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(2.7547, grad_fn=<NllLossBackward0>)
tensor(2.3037, grad_fn=<NllLossBackward0>)
tensor(2.6676, grad_fn=<NllLossBackward0>)
tensor(3.5495, grad_fn=<NllLossBackward0>)
tensor(3.7858, grad_fn=<NllLossBackward0>)
tensor(2.4732, grad_fn=<NllLossBackward0>)
tensor(3.6108, grad_fn=<NllLossBackward0>)
tensor(3.2304, grad_fn=<NllLossBackward0>)
tensor(2.9066, grad_fn=<NllLossBackward0>)
tensor(2.5162, grad_fn=<NllLossBackward0>)
tensor(2.6002, grad_fn=<NllLossBackward0>)
tensor(2.4374, grad_fn=<NllLossBackward0>)
tensor(2.9545, grad_fn=<NllLossBackward0>)
tensor(2.5795, grad_fn=<NllLossBackward0>)
tensor(3.1123, grad_fn=<NllLossBackward0>)
tensor(3.4217, grad_fn=<NllLossBackward0>)
tensor(2.2403, grad_fn=<NllLossBackward0>)
tensor(3.6020, grad_fn=<NllLossBackward0>)
tensor(3.1057, grad_fn=<NllLossBackward0>)
tensor(2.6013, grad_fn=<NllLossBackward0>)
tensor(3.1265, grad_fn=<NllLossBackward0>)
tensor(2.7243, grad_fn=<NllLossBackward0>)
tensor(3.5371, grad_fn=<NllLossBackward0>)
tensor(4.00

tensor(3.4482, grad_fn=<NllLossBackward0>)
tensor(3.5100, grad_fn=<NllLossBackward0>)
tensor(3.4200, grad_fn=<NllLossBackward0>)
tensor(2.4311, grad_fn=<NllLossBackward0>)
tensor(2.2075, grad_fn=<NllLossBackward0>)
tensor(3.2164, grad_fn=<NllLossBackward0>)
tensor(3.0418, grad_fn=<NllLossBackward0>)
tensor(2.4369, grad_fn=<NllLossBackward0>)
tensor(2.3304, grad_fn=<NllLossBackward0>)
tensor(3.0816, grad_fn=<NllLossBackward0>)
tensor(3.3563, grad_fn=<NllLossBackward0>)
tensor(3.5938, grad_fn=<NllLossBackward0>)
tensor(3.1481, grad_fn=<NllLossBackward0>)
tensor(3.1549, grad_fn=<NllLossBackward0>)
tensor(3.0031, grad_fn=<NllLossBackward0>)
tensor(2.9001, grad_fn=<NllLossBackward0>)
tensor(4.0600, grad_fn=<NllLossBackward0>)
tensor(2.8858, grad_fn=<NllLossBackward0>)
tensor(3.4041, grad_fn=<NllLossBackward0>)
tensor(1.9650, grad_fn=<NllLossBackward0>)
tensor(4.0626, grad_fn=<NllLossBackward0>)
tensor(3.9209, grad_fn=<NllLossBackward0>)
tensor(3.0942, grad_fn=<NllLossBackward0>)
tensor(3.40

tensor(2.5275, grad_fn=<NllLossBackward0>)
tensor(3.1236, grad_fn=<NllLossBackward0>)
tensor(2.3306, grad_fn=<NllLossBackward0>)
tensor(2.6024, grad_fn=<NllLossBackward0>)
tensor(4.0850, grad_fn=<NllLossBackward0>)
tensor(2.7891, grad_fn=<NllLossBackward0>)
tensor(2.6503, grad_fn=<NllLossBackward0>)
tensor(3.3899, grad_fn=<NllLossBackward0>)
tensor(2.3367, grad_fn=<NllLossBackward0>)
tensor(2.6820, grad_fn=<NllLossBackward0>)
tensor(2.4306, grad_fn=<NllLossBackward0>)
tensor(2.0840, grad_fn=<NllLossBackward0>)
tensor(2.3080, grad_fn=<NllLossBackward0>)
tensor(2.5661, grad_fn=<NllLossBackward0>)
tensor(3.3622, grad_fn=<NllLossBackward0>)
tensor(2.3410, grad_fn=<NllLossBackward0>)
tensor(2.5111, grad_fn=<NllLossBackward0>)
tensor(4.0053, grad_fn=<NllLossBackward0>)
tensor(3.1833, grad_fn=<NllLossBackward0>)
tensor(2.7509, grad_fn=<NllLossBackward0>)
tensor(2.6570, grad_fn=<NllLossBackward0>)
tensor(3.0450, grad_fn=<NllLossBackward0>)
tensor(2.7742, grad_fn=<NllLossBackward0>)
tensor(2.32

tensor(3.7065, grad_fn=<NllLossBackward0>)
tensor(2.4608, grad_fn=<NllLossBackward0>)
tensor(3.1367, grad_fn=<NllLossBackward0>)
tensor(3.0905, grad_fn=<NllLossBackward0>)
tensor(2.1925, grad_fn=<NllLossBackward0>)
tensor(2.8331, grad_fn=<NllLossBackward0>)
tensor(2.2202, grad_fn=<NllLossBackward0>)
tensor(2.4637, grad_fn=<NllLossBackward0>)
tensor(2.4230, grad_fn=<NllLossBackward0>)
tensor(2.3289, grad_fn=<NllLossBackward0>)
tensor(2.4865, grad_fn=<NllLossBackward0>)
tensor(3.0869, grad_fn=<NllLossBackward0>)
tensor(2.6740, grad_fn=<NllLossBackward0>)
tensor(2.9520, grad_fn=<NllLossBackward0>)
tensor(3.1471, grad_fn=<NllLossBackward0>)
tensor(2.4295, grad_fn=<NllLossBackward0>)
tensor(2.4903, grad_fn=<NllLossBackward0>)
tensor(2.6960, grad_fn=<NllLossBackward0>)
tensor(2.5041, grad_fn=<NllLossBackward0>)
tensor(4.6347, grad_fn=<NllLossBackward0>)
tensor(2.3524, grad_fn=<NllLossBackward0>)
tensor(3.8929, grad_fn=<NllLossBackward0>)
tensor(3.1461, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(2.9716, grad_fn=<NllLossBackward0>)
tensor(2.9928, grad_fn=<NllLossBackward0>)
tensor(2.0673, grad_fn=<NllLossBackward0>)
tensor(3.3714, grad_fn=<NllLossBackward0>)
tensor(2.6909, grad_fn=<NllLossBackward0>)
tensor(2.4190, grad_fn=<NllLossBackward0>)
tensor(2.3897, grad_fn=<NllLossBackward0>)
tensor(1.9852, grad_fn=<NllLossBackward0>)
tensor(2.6984, grad_fn=<NllLossBackward0>)
tensor(5.1329, grad_fn=<NllLossBackward0>)
tensor(2.4329, grad_fn=<NllLossBackward0>)
tensor(2.4008, grad_fn=<NllLossBackward0>)
tensor(1.9850, grad_fn=<NllLossBackward0>)
tensor(2.9301, grad_fn=<NllLossBackward0>)
tensor(2.3817, grad_fn=<NllLossBackward0>)
tensor(2.8576, grad_fn=<NllLossBackward0>)
tensor(2.9453, grad_fn=<NllLossBackward0>)
tensor(2.4354, grad_fn=<NllLossBackward0>)
tensor(2.2519, grad_fn=<NllLossBackward0>)
tensor(3.1313, grad_fn=<NllLossBackward0>)
tensor(2.1627, grad_fn=<NllLossBackward0>)
tensor(2.4902, grad_fn=<NllLossBackward0>)
tensor(3.6212, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(3.0943, grad_fn=<NllLossBackward0>)
tensor(3.1217, grad_fn=<NllLossBackward0>)
tensor(2.1836, grad_fn=<NllLossBackward0>)
tensor(3.0419, grad_fn=<NllLossBackward0>)
tensor(2.4588, grad_fn=<NllLossBackward0>)
tensor(3.5985, grad_fn=<NllLossBackward0>)
tensor(2.5766, grad_fn=<NllLossBackward0>)
tensor(3.0433, grad_fn=<NllLossBackward0>)
tensor(2.8522, grad_fn=<NllLossBackward0>)
tensor(2.7657, grad_fn=<NllLossBackward0>)
tensor(2.3845, grad_fn=<NllLossBackward0>)
tensor(2.6677, grad_fn=<NllLossBackward0>)
tensor(2.7543, grad_fn=<NllLossBackward0>)
tensor(2.5595, grad_fn=<NllLossBackward0>)
tensor(3.6591, grad_fn=<NllLossBackward0>)
tensor(2.2828, grad_fn=<NllLossBackward0>)
tensor(3.6474, grad_fn=<NllLossBackward0>)
tensor(3.0022, grad_fn=<NllLossBackward0>)
tensor(3.9388, grad_fn=<NllLossBackward0>)
tensor(2.7314, grad_fn=<NllLossBackward0>)
tensor(2.7530, grad_fn=<NllLossBackward0>)
tensor(3.1052, grad_fn=<NllLossBackward0>)
tensor(4.1924, grad_fn=<NllLossBackward0>)
tensor(3.87

tensor(2.9287, grad_fn=<NllLossBackward0>)
tensor(3.2522, grad_fn=<NllLossBackward0>)
tensor(3.3910, grad_fn=<NllLossBackward0>)
tensor(3.3378, grad_fn=<NllLossBackward0>)
tensor(2.7512, grad_fn=<NllLossBackward0>)
tensor(3.6664, grad_fn=<NllLossBackward0>)
tensor(2.2345, grad_fn=<NllLossBackward0>)
tensor(2.3122, grad_fn=<NllLossBackward0>)
tensor(2.6629, grad_fn=<NllLossBackward0>)
tensor(3.3061, grad_fn=<NllLossBackward0>)
tensor(1.9761, grad_fn=<NllLossBackward0>)
tensor(3.4921, grad_fn=<NllLossBackward0>)
tensor(2.9241, grad_fn=<NllLossBackward0>)
tensor(2.8488, grad_fn=<NllLossBackward0>)
tensor(3.2207, grad_fn=<NllLossBackward0>)
tensor(2.0810, grad_fn=<NllLossBackward0>)
tensor(2.8916, grad_fn=<NllLossBackward0>)
tensor(2.4264, grad_fn=<NllLossBackward0>)
tensor(2.1687, grad_fn=<NllLossBackward0>)
tensor(2.9281, grad_fn=<NllLossBackward0>)
tensor(2.4863, grad_fn=<NllLossBackward0>)
tensor(3.4873, grad_fn=<NllLossBackward0>)
tensor(3.2911, grad_fn=<NllLossBackward0>)
tensor(3.71

tensor(3.6064, grad_fn=<NllLossBackward0>)
tensor(2.7431, grad_fn=<NllLossBackward0>)
tensor(2.9433, grad_fn=<NllLossBackward0>)
tensor(3.2737, grad_fn=<NllLossBackward0>)
tensor(2.8633, grad_fn=<NllLossBackward0>)
tensor(2.5017, grad_fn=<NllLossBackward0>)
tensor(3.8127, grad_fn=<NllLossBackward0>)
tensor(3.0081, grad_fn=<NllLossBackward0>)
tensor(3.3124, grad_fn=<NllLossBackward0>)
tensor(3.7619, grad_fn=<NllLossBackward0>)
tensor(1.9721, grad_fn=<NllLossBackward0>)
tensor(3.0118, grad_fn=<NllLossBackward0>)
tensor(2.0698, grad_fn=<NllLossBackward0>)
tensor(2.8163, grad_fn=<NllLossBackward0>)
tensor(4.1246, grad_fn=<NllLossBackward0>)
tensor(3.4479, grad_fn=<NllLossBackward0>)
tensor(2.8626, grad_fn=<NllLossBackward0>)
tensor(4.0192, grad_fn=<NllLossBackward0>)
tensor(2.8154, grad_fn=<NllLossBackward0>)
tensor(2.5108, grad_fn=<NllLossBackward0>)
tensor(2.2577, grad_fn=<NllLossBackward0>)
tensor(2.2854, grad_fn=<NllLossBackward0>)
tensor(2.2802, grad_fn=<NllLossBackward0>)
tensor(3.18

tensor(2.4183, grad_fn=<NllLossBackward0>)
tensor(4.0952, grad_fn=<NllLossBackward0>)
tensor(2.3613, grad_fn=<NllLossBackward0>)
tensor(2.2850, grad_fn=<NllLossBackward0>)
tensor(3.0332, grad_fn=<NllLossBackward0>)
tensor(2.7380, grad_fn=<NllLossBackward0>)
tensor(2.3599, grad_fn=<NllLossBackward0>)
tensor(2.2053, grad_fn=<NllLossBackward0>)
tensor(3.0820, grad_fn=<NllLossBackward0>)
tensor(2.6560, grad_fn=<NllLossBackward0>)
tensor(3.1856, grad_fn=<NllLossBackward0>)
tensor(2.5199, grad_fn=<NllLossBackward0>)
tensor(3.6575, grad_fn=<NllLossBackward0>)
tensor(3.3732, grad_fn=<NllLossBackward0>)
tensor(3.0285, grad_fn=<NllLossBackward0>)
tensor(2.0610, grad_fn=<NllLossBackward0>)
tensor(2.3389, grad_fn=<NllLossBackward0>)
tensor(2.3230, grad_fn=<NllLossBackward0>)
tensor(2.1869, grad_fn=<NllLossBackward0>)
tensor(1.9822, grad_fn=<NllLossBackward0>)
tensor(2.9928, grad_fn=<NllLossBackward0>)
tensor(4.6180, grad_fn=<NllLossBackward0>)
tensor(2.3267, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(3.5029, grad_fn=<NllLossBackward0>)
tensor(3.7629, grad_fn=<NllLossBackward0>)
tensor(2.8545, grad_fn=<NllLossBackward0>)
tensor(3.5373, grad_fn=<NllLossBackward0>)
tensor(3.1443, grad_fn=<NllLossBackward0>)
tensor(3.4276, grad_fn=<NllLossBackward0>)
tensor(2.1725, grad_fn=<NllLossBackward0>)
tensor(2.4032, grad_fn=<NllLossBackward0>)
tensor(3.6732, grad_fn=<NllLossBackward0>)
tensor(4.1420, grad_fn=<NllLossBackward0>)
tensor(2.1949, grad_fn=<NllLossBackward0>)
tensor(2.7802, grad_fn=<NllLossBackward0>)
tensor(2.4958, grad_fn=<NllLossBackward0>)
tensor(2.6274, grad_fn=<NllLossBackward0>)
tensor(3.5261, grad_fn=<NllLossBackward0>)
tensor(2.0872, grad_fn=<NllLossBackward0>)
tensor(2.9245, grad_fn=<NllLossBackward0>)
tensor(3.1348, grad_fn=<NllLossBackward0>)
tensor(2.7263, grad_fn=<NllLossBackward0>)
tensor(2.9879, grad_fn=<NllLossBackward0>)
tensor(3.9819, grad_fn=<NllLossBackward0>)
tensor(2.9992, grad_fn=<NllLossBackward0>)
tensor(2.4171, grad_fn=<NllLossBackward0>)
tensor(3.13

tensor(3.6658, grad_fn=<NllLossBackward0>)
tensor(2.3900, grad_fn=<NllLossBackward0>)
tensor(1.9804, grad_fn=<NllLossBackward0>)
tensor(3.2761, grad_fn=<NllLossBackward0>)
tensor(3.0090, grad_fn=<NllLossBackward0>)
tensor(2.8770, grad_fn=<NllLossBackward0>)
tensor(3.2745, grad_fn=<NllLossBackward0>)
tensor(1.7545, grad_fn=<NllLossBackward0>)
tensor(3.0176, grad_fn=<NllLossBackward0>)
tensor(2.7565, grad_fn=<NllLossBackward0>)
tensor(3.7397, grad_fn=<NllLossBackward0>)
tensor(3.7652, grad_fn=<NllLossBackward0>)
tensor(3.1341, grad_fn=<NllLossBackward0>)
tensor(2.8717, grad_fn=<NllLossBackward0>)
tensor(3.0272, grad_fn=<NllLossBackward0>)
tensor(3.4545, grad_fn=<NllLossBackward0>)
tensor(3.0206, grad_fn=<NllLossBackward0>)
tensor(2.6098, grad_fn=<NllLossBackward0>)
tensor(3.6387, grad_fn=<NllLossBackward0>)
tensor(2.4158, grad_fn=<NllLossBackward0>)
tensor(2.0541, grad_fn=<NllLossBackward0>)
tensor(2.8955, grad_fn=<NllLossBackward0>)
tensor(3.5080, grad_fn=<NllLossBackward0>)
tensor(1.75

tensor(2.6423, grad_fn=<NllLossBackward0>)
tensor(3.1384, grad_fn=<NllLossBackward0>)
tensor(3.1063, grad_fn=<NllLossBackward0>)
tensor(3.1995, grad_fn=<NllLossBackward0>)
tensor(3.5187, grad_fn=<NllLossBackward0>)
tensor(2.3031, grad_fn=<NllLossBackward0>)
tensor(2.9307, grad_fn=<NllLossBackward0>)
tensor(2.1989, grad_fn=<NllLossBackward0>)
tensor(3.6856, grad_fn=<NllLossBackward0>)
tensor(2.4202, grad_fn=<NllLossBackward0>)
tensor(2.3477, grad_fn=<NllLossBackward0>)
tensor(2.7356, grad_fn=<NllLossBackward0>)
tensor(2.9269, grad_fn=<NllLossBackward0>)
tensor(2.0909, grad_fn=<NllLossBackward0>)
tensor(3.8798, grad_fn=<NllLossBackward0>)
tensor(3.5163, grad_fn=<NllLossBackward0>)
tensor(2.0727, grad_fn=<NllLossBackward0>)
tensor(3.1368, grad_fn=<NllLossBackward0>)
tensor(3.0707, grad_fn=<NllLossBackward0>)
tensor(3.1549, grad_fn=<NllLossBackward0>)
tensor(2.1581, grad_fn=<NllLossBackward0>)
tensor(2.5571, grad_fn=<NllLossBackward0>)
tensor(3.1314, grad_fn=<NllLossBackward0>)
tensor(3.14

tensor(2.9277, grad_fn=<NllLossBackward0>)
tensor(3.0218, grad_fn=<NllLossBackward0>)
tensor(1.9441, grad_fn=<NllLossBackward0>)
tensor(3.0113, grad_fn=<NllLossBackward0>)
tensor(2.5796, grad_fn=<NllLossBackward0>)
tensor(2.6391, grad_fn=<NllLossBackward0>)
tensor(1.7180, grad_fn=<NllLossBackward0>)
tensor(2.3215, grad_fn=<NllLossBackward0>)
tensor(2.0687, grad_fn=<NllLossBackward0>)
tensor(2.9845, grad_fn=<NllLossBackward0>)
tensor(2.7722, grad_fn=<NllLossBackward0>)
tensor(2.6641, grad_fn=<NllLossBackward0>)
tensor(2.3031, grad_fn=<NllLossBackward0>)
tensor(3.7948, grad_fn=<NllLossBackward0>)
tensor(2.0626, grad_fn=<NllLossBackward0>)
tensor(2.8239, grad_fn=<NllLossBackward0>)
tensor(3.2197, grad_fn=<NllLossBackward0>)
tensor(2.2608, grad_fn=<NllLossBackward0>)
tensor(2.5577, grad_fn=<NllLossBackward0>)
tensor(2.3058, grad_fn=<NllLossBackward0>)
tensor(3.4993, grad_fn=<NllLossBackward0>)
tensor(4.5748, grad_fn=<NllLossBackward0>)
tensor(2.3600, grad_fn=<NllLossBackward0>)
tensor(2.68

tensor(2.9149, grad_fn=<NllLossBackward0>)
tensor(3.6521, grad_fn=<NllLossBackward0>)
tensor(1.9570, grad_fn=<NllLossBackward0>)
tensor(2.9076, grad_fn=<NllLossBackward0>)
tensor(2.4012, grad_fn=<NllLossBackward0>)
tensor(3.0769, grad_fn=<NllLossBackward0>)
tensor(3.0380, grad_fn=<NllLossBackward0>)
tensor(3.5121, grad_fn=<NllLossBackward0>)
tensor(3.0245, grad_fn=<NllLossBackward0>)
tensor(2.5717, grad_fn=<NllLossBackward0>)
tensor(2.9260, grad_fn=<NllLossBackward0>)
tensor(2.2078, grad_fn=<NllLossBackward0>)
tensor(3.1261, grad_fn=<NllLossBackward0>)
tensor(2.3481, grad_fn=<NllLossBackward0>)
tensor(3.5002, grad_fn=<NllLossBackward0>)
tensor(2.9466, grad_fn=<NllLossBackward0>)
tensor(2.3768, grad_fn=<NllLossBackward0>)
tensor(2.1784, grad_fn=<NllLossBackward0>)
tensor(2.0574, grad_fn=<NllLossBackward0>)
tensor(3.2914, grad_fn=<NllLossBackward0>)
tensor(1.7228, grad_fn=<NllLossBackward0>)
tensor(3.1010, grad_fn=<NllLossBackward0>)
tensor(4.2279, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(2.1940, grad_fn=<NllLossBackward0>)
tensor(3.6398, grad_fn=<NllLossBackward0>)
tensor(2.2332, grad_fn=<NllLossBackward0>)
tensor(3.0061, grad_fn=<NllLossBackward0>)
tensor(2.8117, grad_fn=<NllLossBackward0>)
tensor(3.3680, grad_fn=<NllLossBackward0>)
tensor(2.9238, grad_fn=<NllLossBackward0>)
tensor(3.7485, grad_fn=<NllLossBackward0>)
tensor(2.1673, grad_fn=<NllLossBackward0>)
tensor(2.0605, grad_fn=<NllLossBackward0>)
tensor(2.7236, grad_fn=<NllLossBackward0>)
tensor(2.9332, grad_fn=<NllLossBackward0>)
tensor(2.8138, grad_fn=<NllLossBackward0>)
tensor(2.4088, grad_fn=<NllLossBackward0>)
tensor(2.7227, grad_fn=<NllLossBackward0>)
tensor(2.4265, grad_fn=<NllLossBackward0>)
tensor(2.8424, grad_fn=<NllLossBackward0>)
tensor(2.0540, grad_fn=<NllLossBackward0>)
tensor(3.9699, grad_fn=<NllLossBackward0>)
tensor(3.5001, grad_fn=<NllLossBackward0>)
tensor(2.7758, grad_fn=<NllLossBackward0>)
tensor(1.7298, grad_fn=<NllLossBackward0>)
tensor(2.4734, grad_fn=<NllLossBackward0>)
tensor(2.18

tensor(3.2284, grad_fn=<NllLossBackward0>)
tensor(3.2159, grad_fn=<NllLossBackward0>)
tensor(2.9525, grad_fn=<NllLossBackward0>)
tensor(1.9744, grad_fn=<NllLossBackward0>)
tensor(2.1389, grad_fn=<NllLossBackward0>)
tensor(2.3944, grad_fn=<NllLossBackward0>)
tensor(2.6967, grad_fn=<NllLossBackward0>)
tensor(2.6563, grad_fn=<NllLossBackward0>)
tensor(3.3785, grad_fn=<NllLossBackward0>)
tensor(2.9312, grad_fn=<NllLossBackward0>)
tensor(2.2579, grad_fn=<NllLossBackward0>)
tensor(3.4661, grad_fn=<NllLossBackward0>)
tensor(1.9698, grad_fn=<NllLossBackward0>)
tensor(2.9894, grad_fn=<NllLossBackward0>)
tensor(2.5741, grad_fn=<NllLossBackward0>)
tensor(3.0766, grad_fn=<NllLossBackward0>)
tensor(3.5938, grad_fn=<NllLossBackward0>)
tensor(3.5578, grad_fn=<NllLossBackward0>)
tensor(3.6515, grad_fn=<NllLossBackward0>)
tensor(2.4031, grad_fn=<NllLossBackward0>)
tensor(2.3099, grad_fn=<NllLossBackward0>)
tensor(2.1947, grad_fn=<NllLossBackward0>)
tensor(2.6843, grad_fn=<NllLossBackward0>)
tensor(4.09

tensor(1.9973, grad_fn=<NllLossBackward0>)
tensor(2.4200, grad_fn=<NllLossBackward0>)
tensor(2.9221, grad_fn=<NllLossBackward0>)
tensor(2.7734, grad_fn=<NllLossBackward0>)
tensor(2.4143, grad_fn=<NllLossBackward0>)
tensor(2.4148, grad_fn=<NllLossBackward0>)
tensor(4.0228, grad_fn=<NllLossBackward0>)
tensor(2.6808, grad_fn=<NllLossBackward0>)
tensor(2.6282, grad_fn=<NllLossBackward0>)
tensor(2.5162, grad_fn=<NllLossBackward0>)
tensor(3.2194, grad_fn=<NllLossBackward0>)
tensor(3.0053, grad_fn=<NllLossBackward0>)
tensor(2.0040, grad_fn=<NllLossBackward0>)
tensor(1.9923, grad_fn=<NllLossBackward0>)
tensor(2.3645, grad_fn=<NllLossBackward0>)
tensor(3.7321, grad_fn=<NllLossBackward0>)
tensor(2.3749, grad_fn=<NllLossBackward0>)
tensor(3.9564, grad_fn=<NllLossBackward0>)
tensor(3.5517, grad_fn=<NllLossBackward0>)
tensor(2.4338, grad_fn=<NllLossBackward0>)
tensor(2.0428, grad_fn=<NllLossBackward0>)
tensor(3.2795, grad_fn=<NllLossBackward0>)
tensor(2.3681, grad_fn=<NllLossBackward0>)
tensor(3.21

tensor(3.0175, grad_fn=<NllLossBackward0>)
tensor(2.9116, grad_fn=<NllLossBackward0>)
tensor(3.5018, grad_fn=<NllLossBackward0>)
tensor(3.2411, grad_fn=<NllLossBackward0>)
tensor(3.0837, grad_fn=<NllLossBackward0>)
tensor(3.6363, grad_fn=<NllLossBackward0>)
tensor(2.8526, grad_fn=<NllLossBackward0>)
tensor(2.5517, grad_fn=<NllLossBackward0>)
tensor(4.5804, grad_fn=<NllLossBackward0>)
tensor(2.5757, grad_fn=<NllLossBackward0>)
tensor(3.3316, grad_fn=<NllLossBackward0>)
tensor(4.1072, grad_fn=<NllLossBackward0>)
tensor(3.4920, grad_fn=<NllLossBackward0>)
tensor(2.3904, grad_fn=<NllLossBackward0>)
tensor(2.6859, grad_fn=<NllLossBackward0>)
tensor(3.3525, grad_fn=<NllLossBackward0>)
tensor(2.7866, grad_fn=<NllLossBackward0>)
tensor(2.8442, grad_fn=<NllLossBackward0>)
tensor(2.6646, grad_fn=<NllLossBackward0>)
tensor(3.6773, grad_fn=<NllLossBackward0>)
tensor(2.3979, grad_fn=<NllLossBackward0>)
tensor(2.9274, grad_fn=<NllLossBackward0>)
tensor(2.1480, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(3.0904, grad_fn=<NllLossBackward0>)
tensor(2.8747, grad_fn=<NllLossBackward0>)
tensor(2.3811, grad_fn=<NllLossBackward0>)
tensor(2.6264, grad_fn=<NllLossBackward0>)
tensor(3.2023, grad_fn=<NllLossBackward0>)
tensor(4.8987, grad_fn=<NllLossBackward0>)
tensor(3.0909, grad_fn=<NllLossBackward0>)
tensor(2.6976, grad_fn=<NllLossBackward0>)
tensor(3.7535, grad_fn=<NllLossBackward0>)
tensor(2.0346, grad_fn=<NllLossBackward0>)
tensor(3.1471, grad_fn=<NllLossBackward0>)
tensor(2.1387, grad_fn=<NllLossBackward0>)
tensor(2.1460, grad_fn=<NllLossBackward0>)
tensor(2.7674, grad_fn=<NllLossBackward0>)
tensor(1.7251, grad_fn=<NllLossBackward0>)
tensor(2.1505, grad_fn=<NllLossBackward0>)
tensor(3.2380, grad_fn=<NllLossBackward0>)
tensor(3.1026, grad_fn=<NllLossBackward0>)
tensor(3.5663, grad_fn=<NllLossBackward0>)
tensor(2.3355, grad_fn=<NllLossBackward0>)
tensor(3.1948, grad_fn=<NllLossBackward0>)
tensor(2.3882, grad_fn=<NllLossBackward0>)
tensor(2.3279, grad_fn=<NllLossBackward0>)
tensor(2.45

tensor(2.8512, grad_fn=<NllLossBackward0>)
tensor(3.4323, grad_fn=<NllLossBackward0>)
tensor(2.5356, grad_fn=<NllLossBackward0>)
tensor(2.8692, grad_fn=<NllLossBackward0>)
tensor(3.2917, grad_fn=<NllLossBackward0>)
tensor(3.5390, grad_fn=<NllLossBackward0>)
tensor(2.6486, grad_fn=<NllLossBackward0>)
tensor(1.9865, grad_fn=<NllLossBackward0>)
tensor(2.4635, grad_fn=<NllLossBackward0>)
tensor(2.2524, grad_fn=<NllLossBackward0>)
tensor(4.5250, grad_fn=<NllLossBackward0>)
tensor(3.0118, grad_fn=<NllLossBackward0>)
tensor(2.3174, grad_fn=<NllLossBackward0>)
tensor(2.4637, grad_fn=<NllLossBackward0>)
tensor(2.9046, grad_fn=<NllLossBackward0>)
tensor(2.3181, grad_fn=<NllLossBackward0>)
tensor(2.7082, grad_fn=<NllLossBackward0>)
tensor(3.6064, grad_fn=<NllLossBackward0>)
tensor(2.7780, grad_fn=<NllLossBackward0>)
tensor(2.0365, grad_fn=<NllLossBackward0>)
tensor(2.3668, grad_fn=<NllLossBackward0>)
tensor(2.3271, grad_fn=<NllLossBackward0>)
tensor(2.3714, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(2.7143, grad_fn=<NllLossBackward0>)
tensor(2.8802, grad_fn=<NllLossBackward0>)
tensor(3.4811, grad_fn=<NllLossBackward0>)
tensor(2.7129, grad_fn=<NllLossBackward0>)
tensor(3.2349, grad_fn=<NllLossBackward0>)
tensor(2.8617, grad_fn=<NllLossBackward0>)
tensor(4.1106, grad_fn=<NllLossBackward0>)
tensor(3.4831, grad_fn=<NllLossBackward0>)
tensor(3.3316, grad_fn=<NllLossBackward0>)
tensor(1.9780, grad_fn=<NllLossBackward0>)
tensor(2.4969, grad_fn=<NllLossBackward0>)
tensor(2.5597, grad_fn=<NllLossBackward0>)
tensor(3.1588, grad_fn=<NllLossBackward0>)
tensor(3.0751, grad_fn=<NllLossBackward0>)
tensor(3.4568, grad_fn=<NllLossBackward0>)
tensor(2.0384, grad_fn=<NllLossBackward0>)
tensor(3.6249, grad_fn=<NllLossBackward0>)
tensor(2.5754, grad_fn=<NllLossBackward0>)
tensor(3.0991, grad_fn=<NllLossBackward0>)
tensor(3.0423, grad_fn=<NllLossBackward0>)
tensor(2.7598, grad_fn=<NllLossBackward0>)
tensor(3.0966, grad_fn=<NllLossBackward0>)
tensor(2.0496, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(3.0955, grad_fn=<NllLossBackward0>)
tensor(2.3600, grad_fn=<NllLossBackward0>)
tensor(2.3153, grad_fn=<NllLossBackward0>)
tensor(2.9401, grad_fn=<NllLossBackward0>)
tensor(2.4312, grad_fn=<NllLossBackward0>)
tensor(3.7628, grad_fn=<NllLossBackward0>)
tensor(3.2161, grad_fn=<NllLossBackward0>)
tensor(2.4569, grad_fn=<NllLossBackward0>)
tensor(3.6186, grad_fn=<NllLossBackward0>)
tensor(3.2930, grad_fn=<NllLossBackward0>)
tensor(2.9142, grad_fn=<NllLossBackward0>)
tensor(2.2438, grad_fn=<NllLossBackward0>)
tensor(2.5312, grad_fn=<NllLossBackward0>)
tensor(2.1994, grad_fn=<NllLossBackward0>)
tensor(2.7201, grad_fn=<NllLossBackward0>)
tensor(1.9750, grad_fn=<NllLossBackward0>)
tensor(4.0547, grad_fn=<NllLossBackward0>)
tensor(2.4320, grad_fn=<NllLossBackward0>)
tensor(2.4050, grad_fn=<NllLossBackward0>)
tensor(3.4131, grad_fn=<NllLossBackward0>)
tensor(2.8754, grad_fn=<NllLossBackward0>)
tensor(2.7325, grad_fn=<NllLossBackward0>)
tensor(2.0805, grad_fn=<NllLossBackward0>)
tensor(2.40

tensor(2.3177, grad_fn=<NllLossBackward0>)
tensor(2.3455, grad_fn=<NllLossBackward0>)
tensor(3.3300, grad_fn=<NllLossBackward0>)
tensor(2.7453, grad_fn=<NllLossBackward0>)
tensor(2.4583, grad_fn=<NllLossBackward0>)
tensor(2.8169, grad_fn=<NllLossBackward0>)
tensor(3.2955, grad_fn=<NllLossBackward0>)
tensor(2.4877, grad_fn=<NllLossBackward0>)
tensor(2.7123, grad_fn=<NllLossBackward0>)
tensor(2.0589, grad_fn=<NllLossBackward0>)
tensor(2.6021, grad_fn=<NllLossBackward0>)
tensor(2.3065, grad_fn=<NllLossBackward0>)
tensor(2.7994, grad_fn=<NllLossBackward0>)
tensor(2.3929, grad_fn=<NllLossBackward0>)
tensor(3.0949, grad_fn=<NllLossBackward0>)
tensor(2.2649, grad_fn=<NllLossBackward0>)
tensor(3.5139, grad_fn=<NllLossBackward0>)
tensor(3.5482, grad_fn=<NllLossBackward0>)
tensor(4.1526, grad_fn=<NllLossBackward0>)
tensor(3.2822, grad_fn=<NllLossBackward0>)
tensor(2.5150, grad_fn=<NllLossBackward0>)
tensor(2.4564, grad_fn=<NllLossBackward0>)
tensor(2.3154, grad_fn=<NllLossBackward0>)
tensor(2.31

tensor(2.7830, grad_fn=<NllLossBackward0>)
tensor(3.1400, grad_fn=<NllLossBackward0>)
tensor(3.4687, grad_fn=<NllLossBackward0>)
tensor(3.5806, grad_fn=<NllLossBackward0>)
tensor(2.7794, grad_fn=<NllLossBackward0>)
tensor(3.1655, grad_fn=<NllLossBackward0>)
tensor(3.0587, grad_fn=<NllLossBackward0>)
tensor(2.3399, grad_fn=<NllLossBackward0>)
tensor(3.4037, grad_fn=<NllLossBackward0>)
tensor(2.7794, grad_fn=<NllLossBackward0>)
tensor(4.5305, grad_fn=<NllLossBackward0>)
tensor(3.4998, grad_fn=<NllLossBackward0>)
tensor(2.8023, grad_fn=<NllLossBackward0>)
tensor(2.3121, grad_fn=<NllLossBackward0>)
tensor(5.0393, grad_fn=<NllLossBackward0>)
tensor(2.4701, grad_fn=<NllLossBackward0>)
tensor(3.0953, grad_fn=<NllLossBackward0>)
tensor(2.7950, grad_fn=<NllLossBackward0>)
tensor(3.4403, grad_fn=<NllLossBackward0>)
tensor(3.6875, grad_fn=<NllLossBackward0>)
tensor(2.5696, grad_fn=<NllLossBackward0>)
tensor(1.7106, grad_fn=<NllLossBackward0>)
tensor(2.9924, grad_fn=<NllLossBackward0>)
tensor(3.15

tensor(3.0105, grad_fn=<NllLossBackward0>)
tensor(3.6208, grad_fn=<NllLossBackward0>)
tensor(3.1609, grad_fn=<NllLossBackward0>)
tensor(3.3602, grad_fn=<NllLossBackward0>)
tensor(2.4317, grad_fn=<NllLossBackward0>)
tensor(1.9708, grad_fn=<NllLossBackward0>)
tensor(3.6655, grad_fn=<NllLossBackward0>)
tensor(3.2282, grad_fn=<NllLossBackward0>)
tensor(3.0361, grad_fn=<NllLossBackward0>)
tensor(2.4400, grad_fn=<NllLossBackward0>)
tensor(2.1840, grad_fn=<NllLossBackward0>)
tensor(3.0056, grad_fn=<NllLossBackward0>)
tensor(2.4423, grad_fn=<NllLossBackward0>)
tensor(2.0583, grad_fn=<NllLossBackward0>)
tensor(3.5253, grad_fn=<NllLossBackward0>)
tensor(3.4117, grad_fn=<NllLossBackward0>)
tensor(2.4691, grad_fn=<NllLossBackward0>)
tensor(2.5934, grad_fn=<NllLossBackward0>)
tensor(3.1780, grad_fn=<NllLossBackward0>)
tensor(2.4707, grad_fn=<NllLossBackward0>)
tensor(3.1447, grad_fn=<NllLossBackward0>)
tensor(3.9346, grad_fn=<NllLossBackward0>)
tensor(2.7144, grad_fn=<NllLossBackward0>)
tensor(2.90

tensor(4.0565, grad_fn=<NllLossBackward0>)
tensor(1.9879, grad_fn=<NllLossBackward0>)
tensor(4.0240, grad_fn=<NllLossBackward0>)
tensor(2.7408, grad_fn=<NllLossBackward0>)
tensor(2.7005, grad_fn=<NllLossBackward0>)
tensor(4.3032, grad_fn=<NllLossBackward0>)
tensor(3.9235, grad_fn=<NllLossBackward0>)
tensor(3.0539, grad_fn=<NllLossBackward0>)
tensor(2.3274, grad_fn=<NllLossBackward0>)
tensor(2.8604, grad_fn=<NllLossBackward0>)
tensor(4.2533, grad_fn=<NllLossBackward0>)
tensor(3.3736, grad_fn=<NllLossBackward0>)
tensor(2.4857, grad_fn=<NllLossBackward0>)
tensor(2.4249, grad_fn=<NllLossBackward0>)
tensor(2.4877, grad_fn=<NllLossBackward0>)
tensor(2.7614, grad_fn=<NllLossBackward0>)
tensor(2.2454, grad_fn=<NllLossBackward0>)
tensor(2.4983, grad_fn=<NllLossBackward0>)
tensor(3.3734, grad_fn=<NllLossBackward0>)
tensor(1.7330, grad_fn=<NllLossBackward0>)
tensor(2.3626, grad_fn=<NllLossBackward0>)
tensor(1.9840, grad_fn=<NllLossBackward0>)
tensor(3.5247, grad_fn=<NllLossBackward0>)
tensor(3.15

tensor(3.0670, grad_fn=<NllLossBackward0>)
tensor(2.4256, grad_fn=<NllLossBackward0>)
tensor(3.0865, grad_fn=<NllLossBackward0>)
tensor(2.7818, grad_fn=<NllLossBackward0>)
tensor(2.9806, grad_fn=<NllLossBackward0>)
tensor(2.8605, grad_fn=<NllLossBackward0>)
tensor(4.1580, grad_fn=<NllLossBackward0>)
tensor(2.6040, grad_fn=<NllLossBackward0>)
tensor(2.8080, grad_fn=<NllLossBackward0>)
tensor(4.1565, grad_fn=<NllLossBackward0>)
tensor(2.3041, grad_fn=<NllLossBackward0>)
tensor(3.3008, grad_fn=<NllLossBackward0>)
tensor(2.5248, grad_fn=<NllLossBackward0>)
tensor(3.8488, grad_fn=<NllLossBackward0>)
tensor(2.4720, grad_fn=<NllLossBackward0>)
tensor(2.3783, grad_fn=<NllLossBackward0>)
tensor(3.1381, grad_fn=<NllLossBackward0>)
tensor(2.0922, grad_fn=<NllLossBackward0>)
tensor(2.5457, grad_fn=<NllLossBackward0>)
tensor(2.8018, grad_fn=<NllLossBackward0>)
tensor(2.4423, grad_fn=<NllLossBackward0>)
tensor(3.6113, grad_fn=<NllLossBackward0>)
tensor(2.9306, grad_fn=<NllLossBackward0>)
tensor(2.34

tensor(3.4005, grad_fn=<NllLossBackward0>)
tensor(2.4932, grad_fn=<NllLossBackward0>)
tensor(3.5177, grad_fn=<NllLossBackward0>)
tensor(3.0123, grad_fn=<NllLossBackward0>)
tensor(2.8733, grad_fn=<NllLossBackward0>)
tensor(3.5689, grad_fn=<NllLossBackward0>)
tensor(3.3846, grad_fn=<NllLossBackward0>)
tensor(1.9892, grad_fn=<NllLossBackward0>)
tensor(3.5051, grad_fn=<NllLossBackward0>)
tensor(2.5190, grad_fn=<NllLossBackward0>)
tensor(1.7203, grad_fn=<NllLossBackward0>)
tensor(3.0005, grad_fn=<NllLossBackward0>)
tensor(2.2213, grad_fn=<NllLossBackward0>)
tensor(2.1546, grad_fn=<NllLossBackward0>)
tensor(1.9891, grad_fn=<NllLossBackward0>)
tensor(3.5077, grad_fn=<NllLossBackward0>)
tensor(2.7713, grad_fn=<NllLossBackward0>)
tensor(2.7146, grad_fn=<NllLossBackward0>)
tensor(3.4098, grad_fn=<NllLossBackward0>)
tensor(2.4449, grad_fn=<NllLossBackward0>)
tensor(3.9779, grad_fn=<NllLossBackward0>)
tensor(3.4941, grad_fn=<NllLossBackward0>)
tensor(3.4159, grad_fn=<NllLossBackward0>)
tensor(2.41

tensor(3.9033, grad_fn=<NllLossBackward0>)
tensor(2.9464, grad_fn=<NllLossBackward0>)
tensor(3.5119, grad_fn=<NllLossBackward0>)
tensor(2.6756, grad_fn=<NllLossBackward0>)
tensor(3.4015, grad_fn=<NllLossBackward0>)
tensor(2.3486, grad_fn=<NllLossBackward0>)
tensor(1.9740, grad_fn=<NllLossBackward0>)
tensor(1.9736, grad_fn=<NllLossBackward0>)
tensor(2.2077, grad_fn=<NllLossBackward0>)
tensor(2.2601, grad_fn=<NllLossBackward0>)
tensor(3.3771, grad_fn=<NllLossBackward0>)
tensor(3.4897, grad_fn=<NllLossBackward0>)
tensor(2.0676, grad_fn=<NllLossBackward0>)
tensor(3.9793, grad_fn=<NllLossBackward0>)
tensor(3.3915, grad_fn=<NllLossBackward0>)
tensor(2.9397, grad_fn=<NllLossBackward0>)
tensor(2.2525, grad_fn=<NllLossBackward0>)
tensor(2.3396, grad_fn=<NllLossBackward0>)
tensor(2.5510, grad_fn=<NllLossBackward0>)
tensor(3.4888, grad_fn=<NllLossBackward0>)
tensor(3.0506, grad_fn=<NllLossBackward0>)
tensor(3.0518, grad_fn=<NllLossBackward0>)
tensor(2.0557, grad_fn=<NllLossBackward0>)
tensor(2.93

tensor(2.6554, grad_fn=<NllLossBackward0>)
tensor(2.6144, grad_fn=<NllLossBackward0>)
tensor(3.1552, grad_fn=<NllLossBackward0>)
tensor(2.8646, grad_fn=<NllLossBackward0>)
tensor(4.3731, grad_fn=<NllLossBackward0>)
tensor(2.4313, grad_fn=<NllLossBackward0>)
tensor(2.8913, grad_fn=<NllLossBackward0>)
tensor(2.3671, grad_fn=<NllLossBackward0>)
tensor(1.7080, grad_fn=<NllLossBackward0>)
tensor(3.0462, grad_fn=<NllLossBackward0>)
tensor(3.3976, grad_fn=<NllLossBackward0>)
tensor(3.4582, grad_fn=<NllLossBackward0>)
tensor(2.6747, grad_fn=<NllLossBackward0>)
tensor(2.9542, grad_fn=<NllLossBackward0>)
tensor(2.9841, grad_fn=<NllLossBackward0>)
tensor(2.6732, grad_fn=<NllLossBackward0>)
tensor(3.1312, grad_fn=<NllLossBackward0>)
tensor(2.8326, grad_fn=<NllLossBackward0>)
tensor(2.9868, grad_fn=<NllLossBackward0>)
tensor(2.8910, grad_fn=<NllLossBackward0>)
tensor(3.5423, grad_fn=<NllLossBackward0>)
tensor(3.6103, grad_fn=<NllLossBackward0>)
tensor(2.7515, grad_fn=<NllLossBackward0>)
tensor(2.45

tensor(3.0162, grad_fn=<NllLossBackward0>)
tensor(2.7797, grad_fn=<NllLossBackward0>)
tensor(2.2713, grad_fn=<NllLossBackward0>)
tensor(2.4883, grad_fn=<NllLossBackward0>)
tensor(2.6896, grad_fn=<NllLossBackward0>)
tensor(2.6503, grad_fn=<NllLossBackward0>)
tensor(3.1767, grad_fn=<NllLossBackward0>)
tensor(3.0897, grad_fn=<NllLossBackward0>)
tensor(2.7522, grad_fn=<NllLossBackward0>)
tensor(2.7350, grad_fn=<NllLossBackward0>)
tensor(2.3248, grad_fn=<NllLossBackward0>)
tensor(2.8016, grad_fn=<NllLossBackward0>)
tensor(3.0107, grad_fn=<NllLossBackward0>)
tensor(2.9244, grad_fn=<NllLossBackward0>)
tensor(2.4341, grad_fn=<NllLossBackward0>)
tensor(4.2358, grad_fn=<NllLossBackward0>)
tensor(3.3742, grad_fn=<NllLossBackward0>)
tensor(3.1894, grad_fn=<NllLossBackward0>)
tensor(3.3854, grad_fn=<NllLossBackward0>)
tensor(2.7205, grad_fn=<NllLossBackward0>)
tensor(2.5804, grad_fn=<NllLossBackward0>)
tensor(3.8840, grad_fn=<NllLossBackward0>)
tensor(2.6526, grad_fn=<NllLossBackward0>)
tensor(3.53

tensor(3.7158, grad_fn=<NllLossBackward0>)
tensor(2.3088, grad_fn=<NllLossBackward0>)
tensor(3.6164, grad_fn=<NllLossBackward0>)
tensor(2.4489, grad_fn=<NllLossBackward0>)
tensor(3.7585, grad_fn=<NllLossBackward0>)
tensor(2.5920, grad_fn=<NllLossBackward0>)
tensor(2.6114, grad_fn=<NllLossBackward0>)
tensor(1.7049, grad_fn=<NllLossBackward0>)
tensor(3.1381, grad_fn=<NllLossBackward0>)
tensor(2.9554, grad_fn=<NllLossBackward0>)
tensor(2.8645, grad_fn=<NllLossBackward0>)
tensor(1.9827, grad_fn=<NllLossBackward0>)
tensor(3.9670, grad_fn=<NllLossBackward0>)
tensor(2.8852, grad_fn=<NllLossBackward0>)
tensor(2.6787, grad_fn=<NllLossBackward0>)
tensor(2.6571, grad_fn=<NllLossBackward0>)
tensor(2.0558, grad_fn=<NllLossBackward0>)
tensor(2.5314, grad_fn=<NllLossBackward0>)
tensor(3.4578, grad_fn=<NllLossBackward0>)
tensor(3.2752, grad_fn=<NllLossBackward0>)
tensor(2.8661, grad_fn=<NllLossBackward0>)
tensor(2.4126, grad_fn=<NllLossBackward0>)
tensor(3.0835, grad_fn=<NllLossBackward0>)
tensor(2.39

tensor(2.7182, grad_fn=<NllLossBackward0>)
tensor(3.1750, grad_fn=<NllLossBackward0>)
tensor(2.2944, grad_fn=<NllLossBackward0>)
tensor(2.2293, grad_fn=<NllLossBackward0>)
tensor(2.5575, grad_fn=<NllLossBackward0>)
tensor(3.4671, grad_fn=<NllLossBackward0>)
tensor(2.4226, grad_fn=<NllLossBackward0>)
tensor(2.9159, grad_fn=<NllLossBackward0>)
tensor(4.7469, grad_fn=<NllLossBackward0>)
tensor(3.0601, grad_fn=<NllLossBackward0>)
tensor(1.9947, grad_fn=<NllLossBackward0>)
tensor(3.1850, grad_fn=<NllLossBackward0>)
tensor(2.7125, grad_fn=<NllLossBackward0>)
tensor(3.3278, grad_fn=<NllLossBackward0>)
tensor(2.6333, grad_fn=<NllLossBackward0>)
tensor(1.9921, grad_fn=<NllLossBackward0>)
tensor(3.8694, grad_fn=<NllLossBackward0>)
tensor(3.9313, grad_fn=<NllLossBackward0>)
tensor(2.6961, grad_fn=<NllLossBackward0>)
tensor(3.1090, grad_fn=<NllLossBackward0>)
tensor(2.9695, grad_fn=<NllLossBackward0>)
tensor(2.6693, grad_fn=<NllLossBackward0>)
tensor(3.5014, grad_fn=<NllLossBackward0>)
tensor(2.45

tensor(3.0382, grad_fn=<NllLossBackward0>)
tensor(2.7310, grad_fn=<NllLossBackward0>)
tensor(3.1113, grad_fn=<NllLossBackward0>)
tensor(2.8868, grad_fn=<NllLossBackward0>)
tensor(1.9967, grad_fn=<NllLossBackward0>)
tensor(3.1124, grad_fn=<NllLossBackward0>)
tensor(2.4525, grad_fn=<NllLossBackward0>)
tensor(4.0817, grad_fn=<NllLossBackward0>)
tensor(2.7728, grad_fn=<NllLossBackward0>)
tensor(2.3123, grad_fn=<NllLossBackward0>)
tensor(3.1894, grad_fn=<NllLossBackward0>)
tensor(2.8291, grad_fn=<NllLossBackward0>)
tensor(2.0675, grad_fn=<NllLossBackward0>)
tensor(2.4143, grad_fn=<NllLossBackward0>)
tensor(2.8812, grad_fn=<NllLossBackward0>)
tensor(2.4560, grad_fn=<NllLossBackward0>)
tensor(2.3399, grad_fn=<NllLossBackward0>)
tensor(2.6032, grad_fn=<NllLossBackward0>)
tensor(2.9371, grad_fn=<NllLossBackward0>)
tensor(2.4600, grad_fn=<NllLossBackward0>)
tensor(3.6073, grad_fn=<NllLossBackward0>)
tensor(1.9882, grad_fn=<NllLossBackward0>)
tensor(2.6020, grad_fn=<NllLossBackward0>)
tensor(2.33

tensor(4.9645, grad_fn=<NllLossBackward0>)
tensor(2.3002, grad_fn=<NllLossBackward0>)
tensor(2.9534, grad_fn=<NllLossBackward0>)
tensor(3.5721, grad_fn=<NllLossBackward0>)
tensor(2.4330, grad_fn=<NllLossBackward0>)
tensor(3.0214, grad_fn=<NllLossBackward0>)
tensor(3.4519, grad_fn=<NllLossBackward0>)
tensor(2.0448, grad_fn=<NllLossBackward0>)
tensor(2.9639, grad_fn=<NllLossBackward0>)
tensor(2.6486, grad_fn=<NllLossBackward0>)
tensor(3.3509, grad_fn=<NllLossBackward0>)
tensor(2.4525, grad_fn=<NllLossBackward0>)
tensor(3.5304, grad_fn=<NllLossBackward0>)
tensor(2.3731, grad_fn=<NllLossBackward0>)
tensor(4.6530, grad_fn=<NllLossBackward0>)
tensor(3.0738, grad_fn=<NllLossBackward0>)
tensor(2.9576, grad_fn=<NllLossBackward0>)
tensor(2.1959, grad_fn=<NllLossBackward0>)
tensor(1.7543, grad_fn=<NllLossBackward0>)
tensor(2.5612, grad_fn=<NllLossBackward0>)
tensor(1.7496, grad_fn=<NllLossBackward0>)
tensor(2.2568, grad_fn=<NllLossBackward0>)
tensor(2.7123, grad_fn=<NllLossBackward0>)
tensor(2.06

tensor(2.6814, grad_fn=<NllLossBackward0>)
tensor(2.7727, grad_fn=<NllLossBackward0>)
tensor(2.4490, grad_fn=<NllLossBackward0>)
tensor(2.9529, grad_fn=<NllLossBackward0>)
tensor(2.2590, grad_fn=<NllLossBackward0>)
tensor(3.3009, grad_fn=<NllLossBackward0>)
tensor(3.1849, grad_fn=<NllLossBackward0>)
tensor(2.3701, grad_fn=<NllLossBackward0>)
tensor(2.5212, grad_fn=<NllLossBackward0>)
tensor(2.8289, grad_fn=<NllLossBackward0>)
tensor(2.3307, grad_fn=<NllLossBackward0>)
tensor(2.2811, grad_fn=<NllLossBackward0>)
tensor(3.4611, grad_fn=<NllLossBackward0>)
tensor(1.7532, grad_fn=<NllLossBackward0>)
tensor(3.5356, grad_fn=<NllLossBackward0>)
tensor(3.6328, grad_fn=<NllLossBackward0>)
tensor(3.3607, grad_fn=<NllLossBackward0>)
tensor(1.9980, grad_fn=<NllLossBackward0>)
tensor(2.3857, grad_fn=<NllLossBackward0>)
tensor(1.7483, grad_fn=<NllLossBackward0>)
tensor(2.4699, grad_fn=<NllLossBackward0>)
tensor(2.3558, grad_fn=<NllLossBackward0>)
tensor(2.3724, grad_fn=<NllLossBackward0>)
tensor(4.21

tensor(2.0218, grad_fn=<NllLossBackward0>)
tensor(2.2350, grad_fn=<NllLossBackward0>)
tensor(2.7380, grad_fn=<NllLossBackward0>)
tensor(2.9330, grad_fn=<NllLossBackward0>)
tensor(2.6304, grad_fn=<NllLossBackward0>)
tensor(2.4886, grad_fn=<NllLossBackward0>)
tensor(2.3590, grad_fn=<NllLossBackward0>)
tensor(2.8483, grad_fn=<NllLossBackward0>)
tensor(2.2013, grad_fn=<NllLossBackward0>)
tensor(2.5719, grad_fn=<NllLossBackward0>)
tensor(3.6895, grad_fn=<NllLossBackward0>)
tensor(2.2324, grad_fn=<NllLossBackward0>)
tensor(2.2667, grad_fn=<NllLossBackward0>)
tensor(2.2968, grad_fn=<NllLossBackward0>)
tensor(3.9923, grad_fn=<NllLossBackward0>)
tensor(3.6035, grad_fn=<NllLossBackward0>)
tensor(3.1300, grad_fn=<NllLossBackward0>)
tensor(2.2067, grad_fn=<NllLossBackward0>)
tensor(2.2310, grad_fn=<NllLossBackward0>)
tensor(2.7078, grad_fn=<NllLossBackward0>)
tensor(2.3310, grad_fn=<NllLossBackward0>)
tensor(2.2188, grad_fn=<NllLossBackward0>)
tensor(2.2957, grad_fn=<NllLossBackward0>)
tensor(3.59

tensor(3.4655, grad_fn=<NllLossBackward0>)
tensor(3.3122, grad_fn=<NllLossBackward0>)
tensor(3.0400, grad_fn=<NllLossBackward0>)
tensor(3.1554, grad_fn=<NllLossBackward0>)
tensor(3.3747, grad_fn=<NllLossBackward0>)
tensor(2.9612, grad_fn=<NllLossBackward0>)
tensor(2.8414, grad_fn=<NllLossBackward0>)
tensor(1.9573, grad_fn=<NllLossBackward0>)
tensor(3.7168, grad_fn=<NllLossBackward0>)
tensor(2.0038, grad_fn=<NllLossBackward0>)
tensor(2.4372, grad_fn=<NllLossBackward0>)
tensor(3.8075, grad_fn=<NllLossBackward0>)
tensor(1.9860, grad_fn=<NllLossBackward0>)
tensor(2.4730, grad_fn=<NllLossBackward0>)
tensor(2.2300, grad_fn=<NllLossBackward0>)
tensor(2.3927, grad_fn=<NllLossBackward0>)
tensor(3.5927, grad_fn=<NllLossBackward0>)
tensor(3.1563, grad_fn=<NllLossBackward0>)
tensor(3.1732, grad_fn=<NllLossBackward0>)
tensor(2.6795, grad_fn=<NllLossBackward0>)
tensor(1.9755, grad_fn=<NllLossBackward0>)
tensor(3.0373, grad_fn=<NllLossBackward0>)
tensor(2.6686, grad_fn=<NllLossBackward0>)
tensor(2.41

tensor(2.3459, grad_fn=<NllLossBackward0>)
tensor(2.4237, grad_fn=<NllLossBackward0>)
tensor(3.0692, grad_fn=<NllLossBackward0>)
tensor(3.6435, grad_fn=<NllLossBackward0>)
tensor(3.1748, grad_fn=<NllLossBackward0>)
tensor(1.9667, grad_fn=<NllLossBackward0>)
tensor(2.7107, grad_fn=<NllLossBackward0>)
tensor(2.2197, grad_fn=<NllLossBackward0>)
tensor(3.5414, grad_fn=<NllLossBackward0>)
tensor(2.7079, grad_fn=<NllLossBackward0>)
tensor(3.3005, grad_fn=<NllLossBackward0>)
tensor(2.4811, grad_fn=<NllLossBackward0>)
tensor(3.0369, grad_fn=<NllLossBackward0>)
tensor(3.5267, grad_fn=<NllLossBackward0>)
tensor(2.6375, grad_fn=<NllLossBackward0>)
tensor(2.3912, grad_fn=<NllLossBackward0>)
tensor(2.2477, grad_fn=<NllLossBackward0>)
tensor(2.7372, grad_fn=<NllLossBackward0>)
tensor(4.0967, grad_fn=<NllLossBackward0>)
tensor(2.5829, grad_fn=<NllLossBackward0>)
tensor(3.8409, grad_fn=<NllLossBackward0>)
tensor(3.0782, grad_fn=<NllLossBackward0>)
tensor(3.1488, grad_fn=<NllLossBackward0>)
tensor(3.14

tensor(2.9520, grad_fn=<NllLossBackward0>)
tensor(2.6806, grad_fn=<NllLossBackward0>)
tensor(3.0307, grad_fn=<NllLossBackward0>)
tensor(2.8065, grad_fn=<NllLossBackward0>)
tensor(4.0745, grad_fn=<NllLossBackward0>)
tensor(2.7153, grad_fn=<NllLossBackward0>)
tensor(3.2001, grad_fn=<NllLossBackward0>)
tensor(2.8766, grad_fn=<NllLossBackward0>)
tensor(2.8074, grad_fn=<NllLossBackward0>)
tensor(2.4991, grad_fn=<NllLossBackward0>)
tensor(2.7469, grad_fn=<NllLossBackward0>)
tensor(2.4313, grad_fn=<NllLossBackward0>)
tensor(2.4932, grad_fn=<NllLossBackward0>)
tensor(2.3262, grad_fn=<NllLossBackward0>)
tensor(2.6196, grad_fn=<NllLossBackward0>)
tensor(2.4492, grad_fn=<NllLossBackward0>)
tensor(2.2498, grad_fn=<NllLossBackward0>)
tensor(3.8853, grad_fn=<NllLossBackward0>)
tensor(2.6472, grad_fn=<NllLossBackward0>)
tensor(2.6732, grad_fn=<NllLossBackward0>)
tensor(2.8185, grad_fn=<NllLossBackward0>)
tensor(1.9533, grad_fn=<NllLossBackward0>)
tensor(3.0709, grad_fn=<NllLossBackward0>)
tensor(2.67

tensor(3.3059, grad_fn=<NllLossBackward0>)
tensor(4.3023, grad_fn=<NllLossBackward0>)
tensor(1.9602, grad_fn=<NllLossBackward0>)
tensor(3.5879, grad_fn=<NllLossBackward0>)
tensor(2.2440, grad_fn=<NllLossBackward0>)
tensor(2.2671, grad_fn=<NllLossBackward0>)
tensor(2.3168, grad_fn=<NllLossBackward0>)
tensor(3.0515, grad_fn=<NllLossBackward0>)
tensor(2.6760, grad_fn=<NllLossBackward0>)
tensor(2.0021, grad_fn=<NllLossBackward0>)
tensor(2.3773, grad_fn=<NllLossBackward0>)
tensor(3.0616, grad_fn=<NllLossBackward0>)
tensor(2.7601, grad_fn=<NllLossBackward0>)
tensor(3.4871, grad_fn=<NllLossBackward0>)
tensor(2.6162, grad_fn=<NllLossBackward0>)
tensor(3.0199, grad_fn=<NllLossBackward0>)
tensor(3.3710, grad_fn=<NllLossBackward0>)
tensor(1.9462, grad_fn=<NllLossBackward0>)
tensor(3.0645, grad_fn=<NllLossBackward0>)
tensor(2.9676, grad_fn=<NllLossBackward0>)
tensor(2.9738, grad_fn=<NllLossBackward0>)
tensor(2.4860, grad_fn=<NllLossBackward0>)
tensor(2.7806, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(1.9443, grad_fn=<NllLossBackward0>)
tensor(2.9326, grad_fn=<NllLossBackward0>)
tensor(3.1006, grad_fn=<NllLossBackward0>)
tensor(3.3955, grad_fn=<NllLossBackward0>)
tensor(3.2670, grad_fn=<NllLossBackward0>)
tensor(4.8472, grad_fn=<NllLossBackward0>)
tensor(4.4910, grad_fn=<NllLossBackward0>)
tensor(2.3472, grad_fn=<NllLossBackward0>)
tensor(3.0088, grad_fn=<NllLossBackward0>)
tensor(2.3979, grad_fn=<NllLossBackward0>)
tensor(3.1802, grad_fn=<NllLossBackward0>)
tensor(2.9978, grad_fn=<NllLossBackward0>)
tensor(2.7279, grad_fn=<NllLossBackward0>)
tensor(3.0196, grad_fn=<NllLossBackward0>)
tensor(2.6741, grad_fn=<NllLossBackward0>)
tensor(3.9551, grad_fn=<NllLossBackward0>)
tensor(2.7125, grad_fn=<NllLossBackward0>)
tensor(2.1699, grad_fn=<NllLossBackward0>)
tensor(2.0735, grad_fn=<NllLossBackward0>)
tensor(3.1110, grad_fn=<NllLossBackward0>)
tensor(3.2722, grad_fn=<NllLossBackward0>)
tensor(2.4014, grad_fn=<NllLossBackward0>)
tensor(2.8363, grad_fn=<NllLossBackward0>)
tensor(3.54

tensor(2.3842, grad_fn=<NllLossBackward0>)
tensor(1.9432, grad_fn=<NllLossBackward0>)
tensor(2.6565, grad_fn=<NllLossBackward0>)
tensor(3.0330, grad_fn=<NllLossBackward0>)
tensor(3.1360, grad_fn=<NllLossBackward0>)
tensor(2.1992, grad_fn=<NllLossBackward0>)
tensor(2.7265, grad_fn=<NllLossBackward0>)
tensor(2.9485, grad_fn=<NllLossBackward0>)
tensor(2.9866, grad_fn=<NllLossBackward0>)
tensor(2.6798, grad_fn=<NllLossBackward0>)
tensor(2.3949, grad_fn=<NllLossBackward0>)
tensor(4.0732, grad_fn=<NllLossBackward0>)
tensor(2.3589, grad_fn=<NllLossBackward0>)
tensor(2.8861, grad_fn=<NllLossBackward0>)
tensor(2.2074, grad_fn=<NllLossBackward0>)
tensor(2.8177, grad_fn=<NllLossBackward0>)
tensor(3.5617, grad_fn=<NllLossBackward0>)
tensor(3.4125, grad_fn=<NllLossBackward0>)
tensor(2.2015, grad_fn=<NllLossBackward0>)
tensor(2.4974, grad_fn=<NllLossBackward0>)
tensor(3.3575, grad_fn=<NllLossBackward0>)
tensor(3.6509, grad_fn=<NllLossBackward0>)
tensor(3.2732, grad_fn=<NllLossBackward0>)
tensor(1.94

tensor(2.5980, grad_fn=<NllLossBackward0>)
tensor(1.9371, grad_fn=<NllLossBackward0>)
tensor(2.3178, grad_fn=<NllLossBackward0>)
tensor(2.0290, grad_fn=<NllLossBackward0>)
tensor(4.4308, grad_fn=<NllLossBackward0>)
tensor(2.5971, grad_fn=<NllLossBackward0>)
tensor(3.2199, grad_fn=<NllLossBackward0>)
tensor(2.4528, grad_fn=<NllLossBackward0>)
tensor(2.8685, grad_fn=<NllLossBackward0>)
tensor(1.9391, grad_fn=<NllLossBackward0>)
tensor(3.0572, grad_fn=<NllLossBackward0>)
tensor(2.5666, grad_fn=<NllLossBackward0>)
tensor(3.5316, grad_fn=<NllLossBackward0>)
tensor(3.4216, grad_fn=<NllLossBackward0>)
tensor(2.5431, grad_fn=<NllLossBackward0>)
tensor(3.1467, grad_fn=<NllLossBackward0>)
tensor(2.4700, grad_fn=<NllLossBackward0>)
tensor(3.8668, grad_fn=<NllLossBackward0>)
tensor(3.6151, grad_fn=<NllLossBackward0>)
tensor(1.9455, grad_fn=<NllLossBackward0>)
tensor(4.3746, grad_fn=<NllLossBackward0>)
tensor(2.4670, grad_fn=<NllLossBackward0>)
tensor(2.0302, grad_fn=<NllLossBackward0>)
tensor(3.41

tensor(2.0151, grad_fn=<NllLossBackward0>)
tensor(4.1661, grad_fn=<NllLossBackward0>)
tensor(2.4784, grad_fn=<NllLossBackward0>)
tensor(3.4608, grad_fn=<NllLossBackward0>)
tensor(2.7154, grad_fn=<NllLossBackward0>)
tensor(3.0806, grad_fn=<NllLossBackward0>)
tensor(2.2887, grad_fn=<NllLossBackward0>)
tensor(3.5520, grad_fn=<NllLossBackward0>)
tensor(2.9309, grad_fn=<NllLossBackward0>)
tensor(2.0326, grad_fn=<NllLossBackward0>)
tensor(2.4309, grad_fn=<NllLossBackward0>)
tensor(2.3256, grad_fn=<NllLossBackward0>)
tensor(3.2655, grad_fn=<NllLossBackward0>)
tensor(2.5399, grad_fn=<NllLossBackward0>)
tensor(3.1112, grad_fn=<NllLossBackward0>)
tensor(3.5715, grad_fn=<NllLossBackward0>)
tensor(4.4616, grad_fn=<NllLossBackward0>)
tensor(2.8930, grad_fn=<NllLossBackward0>)
tensor(2.4082, grad_fn=<NllLossBackward0>)
tensor(2.3483, grad_fn=<NllLossBackward0>)
tensor(2.5302, grad_fn=<NllLossBackward0>)
tensor(3.2310, grad_fn=<NllLossBackward0>)
tensor(2.8342, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(2.8962, grad_fn=<NllLossBackward0>)
tensor(2.8503, grad_fn=<NllLossBackward0>)
tensor(2.8597, grad_fn=<NllLossBackward0>)
tensor(1.9224, grad_fn=<NllLossBackward0>)
tensor(3.6306, grad_fn=<NllLossBackward0>)
tensor(2.4620, grad_fn=<NllLossBackward0>)
tensor(3.0551, grad_fn=<NllLossBackward0>)
tensor(2.3765, grad_fn=<NllLossBackward0>)
tensor(2.8618, grad_fn=<NllLossBackward0>)
tensor(2.5397, grad_fn=<NllLossBackward0>)
tensor(2.7152, grad_fn=<NllLossBackward0>)
tensor(2.0023, grad_fn=<NllLossBackward0>)
tensor(2.5380, grad_fn=<NllLossBackward0>)
tensor(2.6593, grad_fn=<NllLossBackward0>)
tensor(4.2903, grad_fn=<NllLossBackward0>)
tensor(3.5802, grad_fn=<NllLossBackward0>)
tensor(2.2393, grad_fn=<NllLossBackward0>)
tensor(2.4361, grad_fn=<NllLossBackward0>)
tensor(2.3226, grad_fn=<NllLossBackward0>)
tensor(2.3324, grad_fn=<NllLossBackward0>)
tensor(3.7035, grad_fn=<NllLossBackward0>)
tensor(3.4678, grad_fn=<NllLossBackward0>)
tensor(2.9675, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(2.1672, grad_fn=<NllLossBackward0>)
tensor(2.5901, grad_fn=<NllLossBackward0>)
tensor(2.3521, grad_fn=<NllLossBackward0>)
tensor(2.3035, grad_fn=<NllLossBackward0>)
tensor(2.7234, grad_fn=<NllLossBackward0>)
tensor(4.4381, grad_fn=<NllLossBackward0>)
tensor(2.4870, grad_fn=<NllLossBackward0>)
tensor(4.2827, grad_fn=<NllLossBackward0>)
tensor(3.3574, grad_fn=<NllLossBackward0>)
tensor(3.5247, grad_fn=<NllLossBackward0>)
tensor(3.0483, grad_fn=<NllLossBackward0>)
tensor(2.8205, grad_fn=<NllLossBackward0>)
tensor(2.6700, grad_fn=<NllLossBackward0>)
tensor(3.3725, grad_fn=<NllLossBackward0>)
tensor(2.2398, grad_fn=<NllLossBackward0>)
tensor(1.6714, grad_fn=<NllLossBackward0>)
tensor(2.4335, grad_fn=<NllLossBackward0>)
tensor(1.9249, grad_fn=<NllLossBackward0>)
tensor(1.6642, grad_fn=<NllLossBackward0>)
tensor(3.1859, grad_fn=<NllLossBackward0>)
tensor(1.9318, grad_fn=<NllLossBackward0>)
tensor(3.5053, grad_fn=<NllLossBackward0>)
tensor(2.7969, grad_fn=<NllLossBackward0>)
tensor(3.73

tensor(3.0805, grad_fn=<NllLossBackward0>)
tensor(2.8233, grad_fn=<NllLossBackward0>)
tensor(2.9043, grad_fn=<NllLossBackward0>)
tensor(3.6732, grad_fn=<NllLossBackward0>)
tensor(4.0353, grad_fn=<NllLossBackward0>)
tensor(2.8827, grad_fn=<NllLossBackward0>)
tensor(2.8673, grad_fn=<NllLossBackward0>)
tensor(2.3332, grad_fn=<NllLossBackward0>)
tensor(2.4704, grad_fn=<NllLossBackward0>)
tensor(1.6776, grad_fn=<NllLossBackward0>)
tensor(3.3063, grad_fn=<NllLossBackward0>)
tensor(2.3359, grad_fn=<NllLossBackward0>)
tensor(2.2992, grad_fn=<NllLossBackward0>)
tensor(3.3488, grad_fn=<NllLossBackward0>)
tensor(3.7422, grad_fn=<NllLossBackward0>)
tensor(2.5571, grad_fn=<NllLossBackward0>)
tensor(2.1992, grad_fn=<NllLossBackward0>)
tensor(3.0685, grad_fn=<NllLossBackward0>)
tensor(4.2938, grad_fn=<NllLossBackward0>)
tensor(2.8746, grad_fn=<NllLossBackward0>)
tensor(1.6755, grad_fn=<NllLossBackward0>)
tensor(2.8658, grad_fn=<NllLossBackward0>)
tensor(2.1256, grad_fn=<NllLossBackward0>)
tensor(2.91

tensor(2.2967, grad_fn=<NllLossBackward0>)
tensor(4.0087, grad_fn=<NllLossBackward0>)
tensor(2.8711, grad_fn=<NllLossBackward0>)
tensor(2.3868, grad_fn=<NllLossBackward0>)
tensor(3.1493, grad_fn=<NllLossBackward0>)
tensor(2.9976, grad_fn=<NllLossBackward0>)
tensor(2.3484, grad_fn=<NllLossBackward0>)
tensor(3.4294, grad_fn=<NllLossBackward0>)
tensor(2.5002, grad_fn=<NllLossBackward0>)
tensor(3.3530, grad_fn=<NllLossBackward0>)
tensor(2.8662, grad_fn=<NllLossBackward0>)
tensor(1.9623, grad_fn=<NllLossBackward0>)
tensor(3.2345, grad_fn=<NllLossBackward0>)
tensor(2.8353, grad_fn=<NllLossBackward0>)
tensor(2.6292, grad_fn=<NllLossBackward0>)
tensor(3.8460, grad_fn=<NllLossBackward0>)
tensor(3.0309, grad_fn=<NllLossBackward0>)
tensor(2.5985, grad_fn=<NllLossBackward0>)
tensor(3.0841, grad_fn=<NllLossBackward0>)
tensor(3.7028, grad_fn=<NllLossBackward0>)
tensor(3.5340, grad_fn=<NllLossBackward0>)
tensor(3.3967, grad_fn=<NllLossBackward0>)
tensor(2.4386, grad_fn=<NllLossBackward0>)
tensor(3.56

tensor(2.3185, grad_fn=<NllLossBackward0>)
tensor(3.6286, grad_fn=<NllLossBackward0>)
tensor(2.4103, grad_fn=<NllLossBackward0>)
tensor(3.0953, grad_fn=<NllLossBackward0>)
tensor(3.6475, grad_fn=<NllLossBackward0>)
tensor(2.3747, grad_fn=<NllLossBackward0>)
tensor(1.9538, grad_fn=<NllLossBackward0>)
tensor(2.8400, grad_fn=<NllLossBackward0>)
tensor(2.6722, grad_fn=<NllLossBackward0>)
tensor(2.0352, grad_fn=<NllLossBackward0>)
tensor(1.9636, grad_fn=<NllLossBackward0>)
tensor(3.2722, grad_fn=<NllLossBackward0>)
tensor(1.7109, grad_fn=<NllLossBackward0>)
tensor(2.7378, grad_fn=<NllLossBackward0>)
tensor(3.9119, grad_fn=<NllLossBackward0>)
tensor(3.5123, grad_fn=<NllLossBackward0>)
tensor(2.4548, grad_fn=<NllLossBackward0>)
tensor(3.2019, grad_fn=<NllLossBackward0>)
tensor(2.4560, grad_fn=<NllLossBackward0>)
tensor(2.7123, grad_fn=<NllLossBackward0>)
tensor(3.4196, grad_fn=<NllLossBackward0>)
tensor(2.4678, grad_fn=<NllLossBackward0>)
tensor(3.0347, grad_fn=<NllLossBackward0>)
tensor(3.43

tensor(2.3759, grad_fn=<NllLossBackward0>)
tensor(3.2604, grad_fn=<NllLossBackward0>)
tensor(3.8947, grad_fn=<NllLossBackward0>)
tensor(3.2797, grad_fn=<NllLossBackward0>)
tensor(3.1331, grad_fn=<NllLossBackward0>)
tensor(3.5365, grad_fn=<NllLossBackward0>)
tensor(3.0175, grad_fn=<NllLossBackward0>)
tensor(3.8399, grad_fn=<NllLossBackward0>)
tensor(3.3181, grad_fn=<NllLossBackward0>)
tensor(2.7742, grad_fn=<NllLossBackward0>)
tensor(3.5574, grad_fn=<NllLossBackward0>)
tensor(3.5556, grad_fn=<NllLossBackward0>)
tensor(3.4123, grad_fn=<NllLossBackward0>)
tensor(2.6187, grad_fn=<NllLossBackward0>)
tensor(3.1711, grad_fn=<NllLossBackward0>)
tensor(3.5850, grad_fn=<NllLossBackward0>)
tensor(3.2864, grad_fn=<NllLossBackward0>)
tensor(2.1469, grad_fn=<NllLossBackward0>)
tensor(3.6510, grad_fn=<NllLossBackward0>)
tensor(3.3741, grad_fn=<NllLossBackward0>)
tensor(2.8140, grad_fn=<NllLossBackward0>)
tensor(2.4078, grad_fn=<NllLossBackward0>)
tensor(3.4212, grad_fn=<NllLossBackward0>)
tensor(4.02

tensor(2.4231, grad_fn=<NllLossBackward0>)
tensor(1.9604, grad_fn=<NllLossBackward0>)
tensor(1.9479, grad_fn=<NllLossBackward0>)
tensor(3.5333, grad_fn=<NllLossBackward0>)
tensor(2.8881, grad_fn=<NllLossBackward0>)
tensor(2.0598, grad_fn=<NllLossBackward0>)
tensor(3.0687, grad_fn=<NllLossBackward0>)
tensor(3.9813, grad_fn=<NllLossBackward0>)
tensor(3.3344, grad_fn=<NllLossBackward0>)
tensor(2.4837, grad_fn=<NllLossBackward0>)
tensor(2.2217, grad_fn=<NllLossBackward0>)
tensor(2.1815, grad_fn=<NllLossBackward0>)
tensor(2.7236, grad_fn=<NllLossBackward0>)
tensor(2.6247, grad_fn=<NllLossBackward0>)
tensor(3.0654, grad_fn=<NllLossBackward0>)
tensor(3.4057, grad_fn=<NllLossBackward0>)
tensor(2.4111, grad_fn=<NllLossBackward0>)
tensor(2.6420, grad_fn=<NllLossBackward0>)
tensor(2.6858, grad_fn=<NllLossBackward0>)
tensor(3.6040, grad_fn=<NllLossBackward0>)
tensor(3.7628, grad_fn=<NllLossBackward0>)
tensor(3.5400, grad_fn=<NllLossBackward0>)
tensor(3.9086, grad_fn=<NllLossBackward0>)
tensor(2.45

tensor(2.0188, grad_fn=<NllLossBackward0>)
tensor(3.6363, grad_fn=<NllLossBackward0>)
tensor(2.7285, grad_fn=<NllLossBackward0>)
tensor(2.3763, grad_fn=<NllLossBackward0>)
tensor(1.9455, grad_fn=<NllLossBackward0>)
tensor(3.0476, grad_fn=<NllLossBackward0>)
tensor(3.7728, grad_fn=<NllLossBackward0>)
tensor(2.3253, grad_fn=<NllLossBackward0>)
tensor(2.6792, grad_fn=<NllLossBackward0>)
tensor(2.9378, grad_fn=<NllLossBackward0>)
tensor(3.0568, grad_fn=<NllLossBackward0>)
tensor(4.1688, grad_fn=<NllLossBackward0>)
tensor(3.6065, grad_fn=<NllLossBackward0>)
tensor(3.9209, grad_fn=<NllLossBackward0>)
tensor(2.3442, grad_fn=<NllLossBackward0>)
tensor(2.3401, grad_fn=<NllLossBackward0>)
tensor(2.4077, grad_fn=<NllLossBackward0>)
tensor(2.1660, grad_fn=<NllLossBackward0>)
tensor(2.8622, grad_fn=<NllLossBackward0>)
tensor(3.5235, grad_fn=<NllLossBackward0>)
tensor(3.0558, grad_fn=<NllLossBackward0>)
tensor(3.0747, grad_fn=<NllLossBackward0>)
tensor(2.3632, grad_fn=<NllLossBackward0>)
tensor(2.56

tensor(1.9709, grad_fn=<NllLossBackward0>)
tensor(3.1617, grad_fn=<NllLossBackward0>)
tensor(2.7746, grad_fn=<NllLossBackward0>)
tensor(2.3649, grad_fn=<NllLossBackward0>)
tensor(2.1820, grad_fn=<NllLossBackward0>)
tensor(3.1901, grad_fn=<NllLossBackward0>)
tensor(3.1167, grad_fn=<NllLossBackward0>)
tensor(2.5887, grad_fn=<NllLossBackward0>)
tensor(2.6854, grad_fn=<NllLossBackward0>)
tensor(3.6333, grad_fn=<NllLossBackward0>)
tensor(2.7537, grad_fn=<NllLossBackward0>)
tensor(3.8680, grad_fn=<NllLossBackward0>)
tensor(3.4238, grad_fn=<NllLossBackward0>)
tensor(3.6356, grad_fn=<NllLossBackward0>)
tensor(3.6544, grad_fn=<NllLossBackward0>)
tensor(2.9222, grad_fn=<NllLossBackward0>)
tensor(2.8702, grad_fn=<NllLossBackward0>)
tensor(1.6775, grad_fn=<NllLossBackward0>)
tensor(2.3561, grad_fn=<NllLossBackward0>)
tensor(3.1853, grad_fn=<NllLossBackward0>)
tensor(4.1901, grad_fn=<NllLossBackward0>)
tensor(2.7446, grad_fn=<NllLossBackward0>)
tensor(3.8321, grad_fn=<NllLossBackward0>)
tensor(3.10

tensor(2.2332, grad_fn=<NllLossBackward0>)
tensor(1.6875, grad_fn=<NllLossBackward0>)
tensor(2.9414, grad_fn=<NllLossBackward0>)
tensor(2.4523, grad_fn=<NllLossBackward0>)
tensor(2.7094, grad_fn=<NllLossBackward0>)
tensor(2.7933, grad_fn=<NllLossBackward0>)
tensor(2.7931, grad_fn=<NllLossBackward0>)
tensor(2.3273, grad_fn=<NllLossBackward0>)
tensor(2.9195, grad_fn=<NllLossBackward0>)
tensor(3.2041, grad_fn=<NllLossBackward0>)
tensor(2.8274, grad_fn=<NllLossBackward0>)
tensor(3.1541, grad_fn=<NllLossBackward0>)
tensor(1.9632, grad_fn=<NllLossBackward0>)
tensor(2.0469, grad_fn=<NllLossBackward0>)
tensor(2.8916, grad_fn=<NllLossBackward0>)
tensor(4.3006, grad_fn=<NllLossBackward0>)
tensor(3.1840, grad_fn=<NllLossBackward0>)
tensor(2.4296, grad_fn=<NllLossBackward0>)
tensor(2.6980, grad_fn=<NllLossBackward0>)
tensor(3.0595, grad_fn=<NllLossBackward0>)
tensor(3.1688, grad_fn=<NllLossBackward0>)
tensor(1.6885, grad_fn=<NllLossBackward0>)
tensor(3.0482, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(1.9674, grad_fn=<NllLossBackward0>)
tensor(3.3946, grad_fn=<NllLossBackward0>)
tensor(3.4998, grad_fn=<NllLossBackward0>)
tensor(2.4389, grad_fn=<NllLossBackward0>)
tensor(4.0662, grad_fn=<NllLossBackward0>)
tensor(4.1865, grad_fn=<NllLossBackward0>)
tensor(1.9801, grad_fn=<NllLossBackward0>)
tensor(2.5709, grad_fn=<NllLossBackward0>)
tensor(2.4448, grad_fn=<NllLossBackward0>)
tensor(2.6505, grad_fn=<NllLossBackward0>)
tensor(2.9326, grad_fn=<NllLossBackward0>)
tensor(2.8292, grad_fn=<NllLossBackward0>)
tensor(2.4019, grad_fn=<NllLossBackward0>)
tensor(2.4842, grad_fn=<NllLossBackward0>)
tensor(3.2644, grad_fn=<NllLossBackward0>)
tensor(3.3218, grad_fn=<NllLossBackward0>)
tensor(3.0194, grad_fn=<NllLossBackward0>)
tensor(2.6579, grad_fn=<NllLossBackward0>)
tensor(2.6502, grad_fn=<NllLossBackward0>)
tensor(2.4062, grad_fn=<NllLossBackward0>)
tensor(2.3172, grad_fn=<NllLossBackward0>)
tensor(2.8466, grad_fn=<NllLossBackward0>)
tensor(1.6925, grad_fn=<NllLossBackward0>)
tensor(3.67

tensor(2.5992, grad_fn=<NllLossBackward0>)
tensor(3.3553, grad_fn=<NllLossBackward0>)
tensor(3.2792, grad_fn=<NllLossBackward0>)
tensor(2.9062, grad_fn=<NllLossBackward0>)
tensor(3.4577, grad_fn=<NllLossBackward0>)
tensor(2.9601, grad_fn=<NllLossBackward0>)
tensor(2.9828, grad_fn=<NllLossBackward0>)
tensor(2.6730, grad_fn=<NllLossBackward0>)
tensor(3.4428, grad_fn=<NllLossBackward0>)
tensor(3.3092, grad_fn=<NllLossBackward0>)
tensor(3.3053, grad_fn=<NllLossBackward0>)
tensor(2.3353, grad_fn=<NllLossBackward0>)
tensor(2.6596, grad_fn=<NllLossBackward0>)
tensor(3.4309, grad_fn=<NllLossBackward0>)
tensor(2.1736, grad_fn=<NllLossBackward0>)
tensor(2.4254, grad_fn=<NllLossBackward0>)
tensor(2.7828, grad_fn=<NllLossBackward0>)
tensor(1.6916, grad_fn=<NllLossBackward0>)
tensor(2.4917, grad_fn=<NllLossBackward0>)
tensor(2.4388, grad_fn=<NllLossBackward0>)
tensor(2.3981, grad_fn=<NllLossBackward0>)
tensor(2.1868, grad_fn=<NllLossBackward0>)
tensor(3.5298, grad_fn=<NllLossBackward0>)
tensor(2.79

tensor(2.5481, grad_fn=<NllLossBackward0>)
tensor(1.9405, grad_fn=<NllLossBackward0>)
tensor(1.9527, grad_fn=<NllLossBackward0>)
tensor(2.9306, grad_fn=<NllLossBackward0>)
tensor(2.9947, grad_fn=<NllLossBackward0>)
tensor(2.6126, grad_fn=<NllLossBackward0>)
tensor(2.7895, grad_fn=<NllLossBackward0>)
tensor(2.3908, grad_fn=<NllLossBackward0>)
tensor(1.9443, grad_fn=<NllLossBackward0>)
tensor(2.9122, grad_fn=<NllLossBackward0>)
tensor(2.6323, grad_fn=<NllLossBackward0>)
tensor(1.9188, grad_fn=<NllLossBackward0>)
tensor(2.7367, grad_fn=<NllLossBackward0>)
tensor(3.9586, grad_fn=<NllLossBackward0>)
tensor(2.8436, grad_fn=<NllLossBackward0>)
tensor(4.0814, grad_fn=<NllLossBackward0>)
tensor(2.4426, grad_fn=<NllLossBackward0>)
tensor(2.4805, grad_fn=<NllLossBackward0>)
tensor(4.0890, grad_fn=<NllLossBackward0>)
tensor(2.3008, grad_fn=<NllLossBackward0>)
tensor(2.3487, grad_fn=<NllLossBackward0>)
tensor(2.2271, grad_fn=<NllLossBackward0>)
tensor(2.5645, grad_fn=<NllLossBackward0>)
tensor(3.51

tensor(2.4540, grad_fn=<NllLossBackward0>)
tensor(2.6956, grad_fn=<NllLossBackward0>)
tensor(2.8034, grad_fn=<NllLossBackward0>)
tensor(3.7947, grad_fn=<NllLossBackward0>)
tensor(1.9409, grad_fn=<NllLossBackward0>)
tensor(2.6578, grad_fn=<NllLossBackward0>)
tensor(2.6289, grad_fn=<NllLossBackward0>)
tensor(2.0359, grad_fn=<NllLossBackward0>)
tensor(2.3605, grad_fn=<NllLossBackward0>)
tensor(2.3051, grad_fn=<NllLossBackward0>)
tensor(2.8650, grad_fn=<NllLossBackward0>)
tensor(4.1067, grad_fn=<NllLossBackward0>)
tensor(3.5157, grad_fn=<NllLossBackward0>)
tensor(2.0049, grad_fn=<NllLossBackward0>)
tensor(2.6515, grad_fn=<NllLossBackward0>)
tensor(2.7646, grad_fn=<NllLossBackward0>)
tensor(2.0153, grad_fn=<NllLossBackward0>)
tensor(3.6292, grad_fn=<NllLossBackward0>)
tensor(4.3053, grad_fn=<NllLossBackward0>)
tensor(3.6917, grad_fn=<NllLossBackward0>)
tensor(2.6066, grad_fn=<NllLossBackward0>)
tensor(2.3625, grad_fn=<NllLossBackward0>)
tensor(2.3590, grad_fn=<NllLossBackward0>)
tensor(1.95

tensor(2.3716, grad_fn=<NllLossBackward0>)
tensor(2.3441, grad_fn=<NllLossBackward0>)
tensor(2.0150, grad_fn=<NllLossBackward0>)
tensor(3.7565, grad_fn=<NllLossBackward0>)
tensor(2.3736, grad_fn=<NllLossBackward0>)
tensor(3.1430, grad_fn=<NllLossBackward0>)
tensor(3.4843, grad_fn=<NllLossBackward0>)
tensor(4.2960, grad_fn=<NllLossBackward0>)
tensor(2.9888, grad_fn=<NllLossBackward0>)
tensor(3.8918, grad_fn=<NllLossBackward0>)
tensor(2.2843, grad_fn=<NllLossBackward0>)
tensor(3.2785, grad_fn=<NllLossBackward0>)
tensor(3.5006, grad_fn=<NllLossBackward0>)
tensor(3.0286, grad_fn=<NllLossBackward0>)
tensor(1.9640, grad_fn=<NllLossBackward0>)
tensor(1.9549, grad_fn=<NllLossBackward0>)
tensor(2.0447, grad_fn=<NllLossBackward0>)
tensor(3.0962, grad_fn=<NllLossBackward0>)
tensor(2.4632, grad_fn=<NllLossBackward0>)
tensor(3.6612, grad_fn=<NllLossBackward0>)
tensor(2.3447, grad_fn=<NllLossBackward0>)
tensor(2.4505, grad_fn=<NllLossBackward0>)
tensor(2.3888, grad_fn=<NllLossBackward0>)
tensor(2.31

tensor(2.9188, grad_fn=<NllLossBackward0>)
tensor(2.8903, grad_fn=<NllLossBackward0>)
tensor(3.4310, grad_fn=<NllLossBackward0>)
tensor(3.5669, grad_fn=<NllLossBackward0>)
tensor(2.8979, grad_fn=<NllLossBackward0>)
tensor(3.1615, grad_fn=<NllLossBackward0>)
tensor(3.1573, grad_fn=<NllLossBackward0>)
tensor(2.7540, grad_fn=<NllLossBackward0>)
tensor(2.3084, grad_fn=<NllLossBackward0>)
tensor(3.6372, grad_fn=<NllLossBackward0>)
tensor(2.4637, grad_fn=<NllLossBackward0>)
tensor(2.8571, grad_fn=<NllLossBackward0>)
tensor(2.6377, grad_fn=<NllLossBackward0>)
tensor(3.0378, grad_fn=<NllLossBackward0>)
tensor(3.5219, grad_fn=<NllLossBackward0>)
tensor(2.3668, grad_fn=<NllLossBackward0>)
tensor(3.1602, grad_fn=<NllLossBackward0>)
tensor(2.4022, grad_fn=<NllLossBackward0>)
tensor(3.5990, grad_fn=<NllLossBackward0>)
tensor(2.0306, grad_fn=<NllLossBackward0>)
tensor(2.6561, grad_fn=<NllLossBackward0>)
tensor(2.1569, grad_fn=<NllLossBackward0>)
tensor(2.7058, grad_fn=<NllLossBackward0>)
tensor(3.34

tensor(4.4523, grad_fn=<NllLossBackward0>)
tensor(3.5129, grad_fn=<NllLossBackward0>)
tensor(1.9521, grad_fn=<NllLossBackward0>)
tensor(3.0930, grad_fn=<NllLossBackward0>)
tensor(1.9479, grad_fn=<NllLossBackward0>)
tensor(2.9784, grad_fn=<NllLossBackward0>)
tensor(2.2184, grad_fn=<NllLossBackward0>)
tensor(2.9633, grad_fn=<NllLossBackward0>)
tensor(3.2134, grad_fn=<NllLossBackward0>)
tensor(2.5736, grad_fn=<NllLossBackward0>)
tensor(3.0292, grad_fn=<NllLossBackward0>)
tensor(2.9096, grad_fn=<NllLossBackward0>)
tensor(2.5168, grad_fn=<NllLossBackward0>)
tensor(2.4602, grad_fn=<NllLossBackward0>)
tensor(2.0134, grad_fn=<NllLossBackward0>)
tensor(1.9566, grad_fn=<NllLossBackward0>)
tensor(2.1096, grad_fn=<NllLossBackward0>)
tensor(2.3437, grad_fn=<NllLossBackward0>)
tensor(2.2891, grad_fn=<NllLossBackward0>)
tensor(2.6108, grad_fn=<NllLossBackward0>)
tensor(3.5283, grad_fn=<NllLossBackward0>)
tensor(2.6334, grad_fn=<NllLossBackward0>)
tensor(3.1740, grad_fn=<NllLossBackward0>)
tensor(2.37

tensor(2.9407, grad_fn=<NllLossBackward0>)
tensor(3.7324, grad_fn=<NllLossBackward0>)
tensor(3.5661, grad_fn=<NllLossBackward0>)
tensor(2.4277, grad_fn=<NllLossBackward0>)
tensor(2.5541, grad_fn=<NllLossBackward0>)
tensor(3.2776, grad_fn=<NllLossBackward0>)
tensor(3.1960, grad_fn=<NllLossBackward0>)
tensor(2.6383, grad_fn=<NllLossBackward0>)
tensor(2.6338, grad_fn=<NllLossBackward0>)
tensor(3.1432, grad_fn=<NllLossBackward0>)
tensor(2.7089, grad_fn=<NllLossBackward0>)
tensor(2.3060, grad_fn=<NllLossBackward0>)
tensor(3.7639, grad_fn=<NllLossBackward0>)
tensor(1.9693, grad_fn=<NllLossBackward0>)
tensor(2.1715, grad_fn=<NllLossBackward0>)
tensor(2.1096, grad_fn=<NllLossBackward0>)
tensor(2.6001, grad_fn=<NllLossBackward0>)
tensor(1.6971, grad_fn=<NllLossBackward0>)
tensor(2.5145, grad_fn=<NllLossBackward0>)
tensor(2.2090, grad_fn=<NllLossBackward0>)
tensor(4.0871, grad_fn=<NllLossBackward0>)
tensor(1.9647, grad_fn=<NllLossBackward0>)
tensor(2.6550, grad_fn=<NllLossBackward0>)
tensor(2.38

tensor(3.1960, grad_fn=<NllLossBackward0>)
tensor(2.4503, grad_fn=<NllLossBackward0>)
tensor(2.4435, grad_fn=<NllLossBackward0>)
tensor(2.6265, grad_fn=<NllLossBackward0>)
tensor(3.2014, grad_fn=<NllLossBackward0>)
tensor(3.7205, grad_fn=<NllLossBackward0>)
tensor(3.5495, grad_fn=<NllLossBackward0>)
tensor(3.5221, grad_fn=<NllLossBackward0>)
tensor(2.3806, grad_fn=<NllLossBackward0>)
tensor(2.2496, grad_fn=<NllLossBackward0>)
tensor(4.3004, grad_fn=<NllLossBackward0>)
tensor(3.6061, grad_fn=<NllLossBackward0>)
tensor(2.6735, grad_fn=<NllLossBackward0>)
tensor(3.7325, grad_fn=<NllLossBackward0>)
tensor(3.2275, grad_fn=<NllLossBackward0>)
tensor(2.9242, grad_fn=<NllLossBackward0>)
tensor(2.0241, grad_fn=<NllLossBackward0>)
tensor(3.0374, grad_fn=<NllLossBackward0>)
tensor(2.7453, grad_fn=<NllLossBackward0>)
tensor(2.3603, grad_fn=<NllLossBackward0>)
tensor(3.5579, grad_fn=<NllLossBackward0>)
tensor(2.3873, grad_fn=<NllLossBackward0>)
tensor(3.1408, grad_fn=<NllLossBackward0>)
tensor(2.11

tensor(1.9816, grad_fn=<NllLossBackward0>)
tensor(2.4672, grad_fn=<NllLossBackward0>)
tensor(2.8493, grad_fn=<NllLossBackward0>)
tensor(3.5558, grad_fn=<NllLossBackward0>)
tensor(2.9234, grad_fn=<NllLossBackward0>)
tensor(3.8788, grad_fn=<NllLossBackward0>)
tensor(2.4500, grad_fn=<NllLossBackward0>)
tensor(2.7455, grad_fn=<NllLossBackward0>)
tensor(2.8708, grad_fn=<NllLossBackward0>)
tensor(3.2466, grad_fn=<NllLossBackward0>)
tensor(2.5549, grad_fn=<NllLossBackward0>)
tensor(2.1677, grad_fn=<NllLossBackward0>)
tensor(2.4398, grad_fn=<NllLossBackward0>)
tensor(2.9463, grad_fn=<NllLossBackward0>)
tensor(2.3062, grad_fn=<NllLossBackward0>)
tensor(2.9679, grad_fn=<NllLossBackward0>)
tensor(3.1629, grad_fn=<NllLossBackward0>)
tensor(4.0595, grad_fn=<NllLossBackward0>)
tensor(3.5378, grad_fn=<NllLossBackward0>)
tensor(4.2427, grad_fn=<NllLossBackward0>)
tensor(2.6435, grad_fn=<NllLossBackward0>)
tensor(3.0655, grad_fn=<NllLossBackward0>)
tensor(1.9963, grad_fn=<NllLossBackward0>)
tensor(2.58

tensor(2.8794, grad_fn=<NllLossBackward0>)
tensor(2.1285, grad_fn=<NllLossBackward0>)
tensor(2.7005, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(2.6687, grad_fn=<NllLossBackward0>)
tensor(2.4330, grad_fn=<NllLossBackward0>)
tensor(2.8670, grad_fn=<NllLossBackward0>)
tensor(3.0259, grad_fn=<NllLossBackward0>)
tensor(2.1485, grad_fn=<NllLossBackward0>)
tensor(1.9796, grad_fn=<NllLossBackward0>)
tensor(1.7173, grad_fn=<NllLossBackward0>)
tensor(2.8678, grad_fn=<NllLossBackward0>)
tensor(3.3517, grad_fn=<NllLossBackward0>)
tensor(1.9780, grad_fn=<NllLossBackward0>)
tensor(3.1302, grad_fn=<NllLossBackward0>)
tensor(2.3496, grad_fn=<NllLossBackward0>)
tensor(2.9675, grad_fn=<NllLossBackward0>)
tensor(1.9693, grad_fn=<NllLossBackward0>)
tensor(3.1654, grad_fn=<NllLossBackward0>)
tensor(3.4219, grad_fn=<NllLossBackward0>)
tensor(3.2638, grad_fn=<NllLossBackward0>)
tensor(2.6245, grad_fn=<NllLossBackward0>)
tensor(2.8425, grad_fn=<NllLossBackward0>)
tensor(2.95

tensor(3.5298, grad_fn=<NllLossBackward0>)
tensor(2.4488, grad_fn=<NllLossBackward0>)
tensor(2.3021, grad_fn=<NllLossBackward0>)
tensor(3.3455, grad_fn=<NllLossBackward0>)
tensor(2.8699, grad_fn=<NllLossBackward0>)
tensor(3.5263, grad_fn=<NllLossBackward0>)
tensor(2.6860, grad_fn=<NllLossBackward0>)
tensor(2.3038, grad_fn=<NllLossBackward0>)
tensor(4.7822, grad_fn=<NllLossBackward0>)
tensor(2.2131, grad_fn=<NllLossBackward0>)
tensor(3.1600, grad_fn=<NllLossBackward0>)
tensor(2.9906, grad_fn=<NllLossBackward0>)
tensor(2.3643, grad_fn=<NllLossBackward0>)
tensor(2.7908, grad_fn=<NllLossBackward0>)
tensor(2.5597, grad_fn=<NllLossBackward0>)
tensor(2.9036, grad_fn=<NllLossBackward0>)
tensor(2.4381, grad_fn=<NllLossBackward0>)
tensor(2.9955, grad_fn=<NllLossBackward0>)
tensor(3.4836, grad_fn=<NllLossBackward0>)
tensor(4.2934, grad_fn=<NllLossBackward0>)
tensor(2.4327, grad_fn=<NllLossBackward0>)
tensor(2.6519, grad_fn=<NllLossBackward0>)
tensor(2.9007, grad_fn=<NllLossBackward0>)
tensor(2.44

tensor(2.8043, grad_fn=<NllLossBackward0>)
tensor(2.9534, grad_fn=<NllLossBackward0>)
tensor(2.8926, grad_fn=<NllLossBackward0>)
tensor(3.1069, grad_fn=<NllLossBackward0>)
tensor(3.1779, grad_fn=<NllLossBackward0>)
tensor(2.7683, grad_fn=<NllLossBackward0>)
tensor(2.3996, grad_fn=<NllLossBackward0>)
tensor(3.2094, grad_fn=<NllLossBackward0>)
tensor(2.8560, grad_fn=<NllLossBackward0>)
tensor(3.5173, grad_fn=<NllLossBackward0>)
tensor(2.4405, grad_fn=<NllLossBackward0>)
tensor(3.1484, grad_fn=<NllLossBackward0>)
tensor(2.3794, grad_fn=<NllLossBackward0>)
tensor(3.0188, grad_fn=<NllLossBackward0>)
tensor(3.4911, grad_fn=<NllLossBackward0>)
tensor(2.8613, grad_fn=<NllLossBackward0>)
tensor(2.0364, grad_fn=<NllLossBackward0>)
tensor(3.1233, grad_fn=<NllLossBackward0>)
tensor(3.1170, grad_fn=<NllLossBackward0>)
tensor(2.9520, grad_fn=<NllLossBackward0>)
tensor(2.2930, grad_fn=<NllLossBackward0>)
tensor(2.9999, grad_fn=<NllLossBackward0>)
tensor(2.4530, grad_fn=<NllLossBackward0>)
tensor(2.41

tensor(2.6175, grad_fn=<NllLossBackward0>)
tensor(2.9006, grad_fn=<NllLossBackward0>)
tensor(2.8270, grad_fn=<NllLossBackward0>)
tensor(3.4695, grad_fn=<NllLossBackward0>)
tensor(3.6355, grad_fn=<NllLossBackward0>)
tensor(1.7200, grad_fn=<NllLossBackward0>)
tensor(2.4938, grad_fn=<NllLossBackward0>)
tensor(2.3077, grad_fn=<NllLossBackward0>)
tensor(2.8456, grad_fn=<NllLossBackward0>)
tensor(1.9816, grad_fn=<NllLossBackward0>)
tensor(3.5328, grad_fn=<NllLossBackward0>)
tensor(2.2645, grad_fn=<NllLossBackward0>)
tensor(2.1954, grad_fn=<NllLossBackward0>)
tensor(2.6165, grad_fn=<NllLossBackward0>)
tensor(4.3037, grad_fn=<NllLossBackward0>)
tensor(2.4813, grad_fn=<NllLossBackward0>)
tensor(3.0524, grad_fn=<NllLossBackward0>)
tensor(3.1121, grad_fn=<NllLossBackward0>)
tensor(3.0107, grad_fn=<NllLossBackward0>)
tensor(2.3913, grad_fn=<NllLossBackward0>)
tensor(2.7621, grad_fn=<NllLossBackward0>)
tensor(3.3922, grad_fn=<NllLossBackward0>)
tensor(3.0106, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(2.2517, grad_fn=<NllLossBackward0>)
tensor(1.9849, grad_fn=<NllLossBackward0>)
tensor(2.4011, grad_fn=<NllLossBackward0>)
tensor(2.7248, grad_fn=<NllLossBackward0>)
tensor(2.6305, grad_fn=<NllLossBackward0>)
tensor(3.0292, grad_fn=<NllLossBackward0>)
tensor(2.9954, grad_fn=<NllLossBackward0>)
tensor(2.1291, grad_fn=<NllLossBackward0>)
tensor(3.5252, grad_fn=<NllLossBackward0>)
tensor(2.8403, grad_fn=<NllLossBackward0>)
tensor(2.2374, grad_fn=<NllLossBackward0>)
tensor(3.1192, grad_fn=<NllLossBackward0>)
tensor(2.3828, grad_fn=<NllLossBackward0>)
tensor(3.4620, grad_fn=<NllLossBackward0>)
tensor(2.1586, grad_fn=<NllLossBackward0>)
tensor(3.2429, grad_fn=<NllLossBackward0>)
tensor(2.2876, grad_fn=<NllLossBackward0>)
tensor(2.6154, grad_fn=<NllLossBackward0>)
tensor(1.7189, grad_fn=<NllLossBackward0>)
tensor(2.0136, grad_fn=<NllLossBackward0>)
tensor(3.5925, grad_fn=<NllLossBackward0>)
tensor(2.4514, grad_fn=<NllLossBackward0>)
tensor(2.6635, grad_fn=<NllLossBackward0>)
tensor(3.17

tensor(2.4365, grad_fn=<NllLossBackward0>)
tensor(2.9447, grad_fn=<NllLossBackward0>)
tensor(1.9924, grad_fn=<NllLossBackward0>)
tensor(2.3384, grad_fn=<NllLossBackward0>)
tensor(3.2434, grad_fn=<NllLossBackward0>)
tensor(3.9856, grad_fn=<NllLossBackward0>)
tensor(3.5148, grad_fn=<NllLossBackward0>)
tensor(3.3270, grad_fn=<NllLossBackward0>)
tensor(3.2535, grad_fn=<NllLossBackward0>)
tensor(2.0064, grad_fn=<NllLossBackward0>)
tensor(3.2436, grad_fn=<NllLossBackward0>)
tensor(3.1143, grad_fn=<NllLossBackward0>)
tensor(4.1592, grad_fn=<NllLossBackward0>)
tensor(4.5259, grad_fn=<NllLossBackward0>)
tensor(3.6175, grad_fn=<NllLossBackward0>)
tensor(2.9680, grad_fn=<NllLossBackward0>)
tensor(3.6299, grad_fn=<NllLossBackward0>)
tensor(2.1874, grad_fn=<NllLossBackward0>)
tensor(1.9899, grad_fn=<NllLossBackward0>)
tensor(3.4390, grad_fn=<NllLossBackward0>)
tensor(2.5113, grad_fn=<NllLossBackward0>)
tensor(4.0352, grad_fn=<NllLossBackward0>)
tensor(3.9191, grad_fn=<NllLossBackward0>)
tensor(2.59

tensor(2.3080, grad_fn=<NllLossBackward0>)
tensor(2.4144, grad_fn=<NllLossBackward0>)
tensor(2.5103, grad_fn=<NllLossBackward0>)
tensor(3.4141, grad_fn=<NllLossBackward0>)
tensor(3.6052, grad_fn=<NllLossBackward0>)
tensor(2.8650, grad_fn=<NllLossBackward0>)
tensor(3.9656, grad_fn=<NllLossBackward0>)
tensor(2.3767, grad_fn=<NllLossBackward0>)
tensor(2.4824, grad_fn=<NllLossBackward0>)
tensor(2.6514, grad_fn=<NllLossBackward0>)
tensor(2.7441, grad_fn=<NllLossBackward0>)
tensor(2.8438, grad_fn=<NllLossBackward0>)
tensor(3.4436, grad_fn=<NllLossBackward0>)
tensor(2.4408, grad_fn=<NllLossBackward0>)
tensor(3.7752, grad_fn=<NllLossBackward0>)
tensor(2.6894, grad_fn=<NllLossBackward0>)
tensor(2.1313, grad_fn=<NllLossBackward0>)
tensor(3.0369, grad_fn=<NllLossBackward0>)
tensor(2.9826, grad_fn=<NllLossBackward0>)
tensor(3.5725, grad_fn=<NllLossBackward0>)
tensor(2.0037, grad_fn=<NllLossBackward0>)
tensor(2.0373, grad_fn=<NllLossBackward0>)
tensor(2.3937, grad_fn=<NllLossBackward0>)
tensor(2.70

tensor(2.6389, grad_fn=<NllLossBackward0>)
tensor(3.8738, grad_fn=<NllLossBackward0>)
tensor(2.6389, grad_fn=<NllLossBackward0>)
tensor(2.9467, grad_fn=<NllLossBackward0>)
tensor(3.1070, grad_fn=<NllLossBackward0>)
tensor(2.6635, grad_fn=<NllLossBackward0>)
tensor(2.7274, grad_fn=<NllLossBackward0>)
tensor(2.4165, grad_fn=<NllLossBackward0>)
tensor(3.0726, grad_fn=<NllLossBackward0>)
tensor(2.6383, grad_fn=<NllLossBackward0>)
tensor(2.3565, grad_fn=<NllLossBackward0>)
tensor(3.1125, grad_fn=<NllLossBackward0>)
tensor(1.9969, grad_fn=<NllLossBackward0>)
tensor(3.5273, grad_fn=<NllLossBackward0>)
tensor(2.9238, grad_fn=<NllLossBackward0>)
tensor(2.6637, grad_fn=<NllLossBackward0>)
tensor(2.6818, grad_fn=<NllLossBackward0>)
tensor(2.3885, grad_fn=<NllLossBackward0>)
tensor(3.1231, grad_fn=<NllLossBackward0>)
tensor(2.2944, grad_fn=<NllLossBackward0>)
tensor(2.3805, grad_fn=<NllLossBackward0>)
tensor(3.9378, grad_fn=<NllLossBackward0>)
tensor(3.1964, grad_fn=<NllLossBackward0>)
tensor(3.19

tensor(1.9853, grad_fn=<NllLossBackward0>)
tensor(2.9503, grad_fn=<NllLossBackward0>)
tensor(2.2810, grad_fn=<NllLossBackward0>)
tensor(3.1086, grad_fn=<NllLossBackward0>)
tensor(2.8318, grad_fn=<NllLossBackward0>)
tensor(3.4754, grad_fn=<NllLossBackward0>)
tensor(2.7666, grad_fn=<NllLossBackward0>)
tensor(3.3775, grad_fn=<NllLossBackward0>)
tensor(2.4875, grad_fn=<NllLossBackward0>)
tensor(2.8388, grad_fn=<NllLossBackward0>)
tensor(2.6942, grad_fn=<NllLossBackward0>)
tensor(3.1139, grad_fn=<NllLossBackward0>)
tensor(2.0365, grad_fn=<NllLossBackward0>)
tensor(2.1611, grad_fn=<NllLossBackward0>)
tensor(2.9608, grad_fn=<NllLossBackward0>)
tensor(2.3487, grad_fn=<NllLossBackward0>)
tensor(2.3709, grad_fn=<NllLossBackward0>)
tensor(3.6279, grad_fn=<NllLossBackward0>)
tensor(2.3445, grad_fn=<NllLossBackward0>)
tensor(2.6783, grad_fn=<NllLossBackward0>)
tensor(3.3816, grad_fn=<NllLossBackward0>)
tensor(4.3049, grad_fn=<NllLossBackward0>)
tensor(1.9945, grad_fn=<NllLossBackward0>)
tensor(2.38

tensor(2.4133, grad_fn=<NllLossBackward0>)
tensor(2.3534, grad_fn=<NllLossBackward0>)
tensor(1.7109, grad_fn=<NllLossBackward0>)
tensor(2.3340, grad_fn=<NllLossBackward0>)
tensor(2.1420, grad_fn=<NllLossBackward0>)
tensor(2.2809, grad_fn=<NllLossBackward0>)
tensor(2.4015, grad_fn=<NllLossBackward0>)
tensor(2.3913, grad_fn=<NllLossBackward0>)
tensor(3.0700, grad_fn=<NllLossBackward0>)
tensor(2.0317, grad_fn=<NllLossBackward0>)
tensor(3.5731, grad_fn=<NllLossBackward0>)
tensor(2.3396, grad_fn=<NllLossBackward0>)
tensor(2.8581, grad_fn=<NllLossBackward0>)
tensor(2.4732, grad_fn=<NllLossBackward0>)
tensor(2.5535, grad_fn=<NllLossBackward0>)
tensor(2.6337, grad_fn=<NllLossBackward0>)
tensor(3.5430, grad_fn=<NllLossBackward0>)
tensor(2.6110, grad_fn=<NllLossBackward0>)
tensor(4.8721, grad_fn=<NllLossBackward0>)
tensor(2.9802, grad_fn=<NllLossBackward0>)
tensor(3.0329, grad_fn=<NllLossBackward0>)
tensor(2.3952, grad_fn=<NllLossBackward0>)
tensor(2.5276, grad_fn=<NllLossBackward0>)
tensor(2.33

tensor(3.3647, grad_fn=<NllLossBackward0>)
tensor(2.3052, grad_fn=<NllLossBackward0>)
tensor(3.3135, grad_fn=<NllLossBackward0>)
tensor(2.6843, grad_fn=<NllLossBackward0>)
tensor(3.3242, grad_fn=<NllLossBackward0>)
tensor(2.6598, grad_fn=<NllLossBackward0>)
tensor(2.4560, grad_fn=<NllLossBackward0>)
tensor(2.4601, grad_fn=<NllLossBackward0>)
tensor(4.0617, grad_fn=<NllLossBackward0>)
tensor(3.0290, grad_fn=<NllLossBackward0>)
tensor(2.3925, grad_fn=<NllLossBackward0>)
tensor(3.1026, grad_fn=<NllLossBackward0>)
tensor(2.4322, grad_fn=<NllLossBackward0>)
tensor(2.3635, grad_fn=<NllLossBackward0>)
tensor(2.7755, grad_fn=<NllLossBackward0>)
tensor(2.7164, grad_fn=<NllLossBackward0>)
tensor(2.4262, grad_fn=<NllLossBackward0>)
tensor(3.6645, grad_fn=<NllLossBackward0>)
tensor(3.1189, grad_fn=<NllLossBackward0>)
tensor(3.0153, grad_fn=<NllLossBackward0>)
tensor(2.0916, grad_fn=<NllLossBackward0>)
tensor(2.0016, grad_fn=<NllLossBackward0>)
tensor(1.9720, grad_fn=<NllLossBackward0>)
tensor(2.68

tensor(4.0601, grad_fn=<NllLossBackward0>)
tensor(2.3872, grad_fn=<NllLossBackward0>)
tensor(3.3684, grad_fn=<NllLossBackward0>)
tensor(1.9774, grad_fn=<NllLossBackward0>)
tensor(3.6213, grad_fn=<NllLossBackward0>)
tensor(2.9039, grad_fn=<NllLossBackward0>)
tensor(3.1585, grad_fn=<NllLossBackward0>)
tensor(3.4041, grad_fn=<NllLossBackward0>)
tensor(3.3974, grad_fn=<NllLossBackward0>)
tensor(2.6367, grad_fn=<NllLossBackward0>)
tensor(2.2119, grad_fn=<NllLossBackward0>)
tensor(4.4766, grad_fn=<NllLossBackward0>)
tensor(3.2148, grad_fn=<NllLossBackward0>)
tensor(3.0792, grad_fn=<NllLossBackward0>)
tensor(2.8185, grad_fn=<NllLossBackward0>)
tensor(3.1974, grad_fn=<NllLossBackward0>)
tensor(2.9038, grad_fn=<NllLossBackward0>)
tensor(3.7124, grad_fn=<NllLossBackward0>)
tensor(3.1343, grad_fn=<NllLossBackward0>)
tensor(1.9589, grad_fn=<NllLossBackward0>)
tensor(3.5298, grad_fn=<NllLossBackward0>)
tensor(3.0149, grad_fn=<NllLossBackward0>)
tensor(1.9869, grad_fn=<NllLossBackward0>)
tensor(2.66

tensor(2.1310, grad_fn=<NllLossBackward0>)
tensor(3.7465, grad_fn=<NllLossBackward0>)
tensor(1.9791, grad_fn=<NllLossBackward0>)
tensor(2.8206, grad_fn=<NllLossBackward0>)
tensor(2.1192, grad_fn=<NllLossBackward0>)
tensor(2.4692, grad_fn=<NllLossBackward0>)
tensor(2.8198, grad_fn=<NllLossBackward0>)
tensor(1.9700, grad_fn=<NllLossBackward0>)
tensor(3.2733, grad_fn=<NllLossBackward0>)
tensor(3.5630, grad_fn=<NllLossBackward0>)
tensor(2.1177, grad_fn=<NllLossBackward0>)
tensor(2.8845, grad_fn=<NllLossBackward0>)
tensor(3.8016, grad_fn=<NllLossBackward0>)
tensor(1.9631, grad_fn=<NllLossBackward0>)
tensor(1.9778, grad_fn=<NllLossBackward0>)
tensor(3.1447, grad_fn=<NllLossBackward0>)
tensor(3.2432, grad_fn=<NllLossBackward0>)
tensor(2.4502, grad_fn=<NllLossBackward0>)
tensor(3.5963, grad_fn=<NllLossBackward0>)
tensor(3.1408, grad_fn=<NllLossBackward0>)
tensor(2.1949, grad_fn=<NllLossBackward0>)
tensor(2.2751, grad_fn=<NllLossBackward0>)
tensor(2.8704, grad_fn=<NllLossBackward0>)
tensor(3.57

tensor(2.2346, grad_fn=<NllLossBackward0>)
tensor(3.6448, grad_fn=<NllLossBackward0>)
tensor(3.7602, grad_fn=<NllLossBackward0>)
tensor(2.7518, grad_fn=<NllLossBackward0>)
tensor(3.0379, grad_fn=<NllLossBackward0>)
tensor(2.9579, grad_fn=<NllLossBackward0>)
tensor(2.3816, grad_fn=<NllLossBackward0>)
tensor(2.3738, grad_fn=<NllLossBackward0>)
tensor(3.0047, grad_fn=<NllLossBackward0>)
tensor(3.6038, grad_fn=<NllLossBackward0>)
tensor(3.1718, grad_fn=<NllLossBackward0>)
tensor(2.7073, grad_fn=<NllLossBackward0>)
tensor(2.3728, grad_fn=<NllLossBackward0>)
tensor(2.3789, grad_fn=<NllLossBackward0>)
tensor(3.0573, grad_fn=<NllLossBackward0>)
tensor(3.4965, grad_fn=<NllLossBackward0>)
tensor(2.5142, grad_fn=<NllLossBackward0>)
tensor(2.1752, grad_fn=<NllLossBackward0>)
tensor(2.9076, grad_fn=<NllLossBackward0>)
tensor(2.1455, grad_fn=<NllLossBackward0>)
tensor(2.6083, grad_fn=<NllLossBackward0>)
tensor(1.9839, grad_fn=<NllLossBackward0>)
tensor(2.7914, grad_fn=<NllLossBackward0>)
tensor(2.69

tensor(3.1142, grad_fn=<NllLossBackward0>)
tensor(2.8376, grad_fn=<NllLossBackward0>)
tensor(2.3883, grad_fn=<NllLossBackward0>)
tensor(3.9130, grad_fn=<NllLossBackward0>)
tensor(2.4054, grad_fn=<NllLossBackward0>)
tensor(2.7413, grad_fn=<NllLossBackward0>)
tensor(2.4083, grad_fn=<NllLossBackward0>)
tensor(3.1950, grad_fn=<NllLossBackward0>)
tensor(3.3467, grad_fn=<NllLossBackward0>)
tensor(3.5701, grad_fn=<NllLossBackward0>)
tensor(3.7590, grad_fn=<NllLossBackward0>)
tensor(2.6002, grad_fn=<NllLossBackward0>)
tensor(3.9260, grad_fn=<NllLossBackward0>)
tensor(2.9506, grad_fn=<NllLossBackward0>)
tensor(3.2553, grad_fn=<NllLossBackward0>)
tensor(1.9790, grad_fn=<NllLossBackward0>)
tensor(2.4583, grad_fn=<NllLossBackward0>)
tensor(2.9402, grad_fn=<NllLossBackward0>)
tensor(1.6963, grad_fn=<NllLossBackward0>)
tensor(3.3672, grad_fn=<NllLossBackward0>)
tensor(2.8000, grad_fn=<NllLossBackward0>)
tensor(3.5475, grad_fn=<NllLossBackward0>)
tensor(3.2608, grad_fn=<NllLossBackward0>)
tensor(2.90

tensor(3.4334, grad_fn=<NllLossBackward0>)
tensor(2.6655, grad_fn=<NllLossBackward0>)
tensor(3.7242, grad_fn=<NllLossBackward0>)
tensor(3.0843, grad_fn=<NllLossBackward0>)
tensor(1.9879, grad_fn=<NllLossBackward0>)
tensor(2.9465, grad_fn=<NllLossBackward0>)
tensor(3.5604, grad_fn=<NllLossBackward0>)
tensor(3.4361, grad_fn=<NllLossBackward0>)
tensor(1.9754, grad_fn=<NllLossBackward0>)
tensor(2.6290, grad_fn=<NllLossBackward0>)
tensor(2.3425, grad_fn=<NllLossBackward0>)
tensor(3.5083, grad_fn=<NllLossBackward0>)
tensor(1.7156, grad_fn=<NllLossBackward0>)
tensor(2.8730, grad_fn=<NllLossBackward0>)
tensor(2.6030, grad_fn=<NllLossBackward0>)
tensor(2.3375, grad_fn=<NllLossBackward0>)
tensor(3.2752, grad_fn=<NllLossBackward0>)
tensor(3.1493, grad_fn=<NllLossBackward0>)
tensor(4.4626, grad_fn=<NllLossBackward0>)
tensor(3.1121, grad_fn=<NllLossBackward0>)
tensor(3.1621, grad_fn=<NllLossBackward0>)
tensor(2.4481, grad_fn=<NllLossBackward0>)
tensor(1.9818, grad_fn=<NllLossBackward0>)
tensor(4.64

tensor(3.4690, grad_fn=<NllLossBackward0>)
tensor(3.1700, grad_fn=<NllLossBackward0>)
tensor(3.8724, grad_fn=<NllLossBackward0>)
tensor(3.3576, grad_fn=<NllLossBackward0>)
tensor(3.8690, grad_fn=<NllLossBackward0>)
tensor(3.2475, grad_fn=<NllLossBackward0>)
tensor(3.9830, grad_fn=<NllLossBackward0>)
tensor(3.1148, grad_fn=<NllLossBackward0>)
tensor(2.4735, grad_fn=<NllLossBackward0>)
tensor(2.4412, grad_fn=<NllLossBackward0>)
tensor(2.9786, grad_fn=<NllLossBackward0>)
tensor(2.4317, grad_fn=<NllLossBackward0>)
tensor(2.6815, grad_fn=<NllLossBackward0>)
tensor(2.4964, grad_fn=<NllLossBackward0>)
tensor(3.5850, grad_fn=<NllLossBackward0>)
tensor(2.5115, grad_fn=<NllLossBackward0>)
tensor(3.3741, grad_fn=<NllLossBackward0>)
tensor(2.7931, grad_fn=<NllLossBackward0>)
tensor(4.0761, grad_fn=<NllLossBackward0>)
tensor(2.3526, grad_fn=<NllLossBackward0>)
tensor(2.6521, grad_fn=<NllLossBackward0>)
tensor(2.3706, grad_fn=<NllLossBackward0>)
tensor(3.4067, grad_fn=<NllLossBackward0>)
tensor(2.11

tensor(2.4116, grad_fn=<NllLossBackward0>)
tensor(2.7076, grad_fn=<NllLossBackward0>)
tensor(4.0961, grad_fn=<NllLossBackward0>)
tensor(3.1737, grad_fn=<NllLossBackward0>)
tensor(2.3661, grad_fn=<NllLossBackward0>)
tensor(1.9825, grad_fn=<NllLossBackward0>)
tensor(3.7074, grad_fn=<NllLossBackward0>)
tensor(2.4755, grad_fn=<NllLossBackward0>)
tensor(2.4020, grad_fn=<NllLossBackward0>)
tensor(2.4421, grad_fn=<NllLossBackward0>)
tensor(2.4568, grad_fn=<NllLossBackward0>)
tensor(2.7132, grad_fn=<NllLossBackward0>)
tensor(1.7124, grad_fn=<NllLossBackward0>)
tensor(2.2505, grad_fn=<NllLossBackward0>)
tensor(2.2743, grad_fn=<NllLossBackward0>)
tensor(2.9138, grad_fn=<NllLossBackward0>)
tensor(3.3647, grad_fn=<NllLossBackward0>)
tensor(4.2245, grad_fn=<NllLossBackward0>)
tensor(1.7228, grad_fn=<NllLossBackward0>)
tensor(1.7151, grad_fn=<NllLossBackward0>)
tensor(3.0870, grad_fn=<NllLossBackward0>)
tensor(1.7051, grad_fn=<NllLossBackward0>)
tensor(2.4109, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(3.0983, grad_fn=<NllLossBackward0>)
tensor(2.6623, grad_fn=<NllLossBackward0>)
tensor(1.9500, grad_fn=<NllLossBackward0>)
tensor(1.9341, grad_fn=<NllLossBackward0>)
tensor(2.7425, grad_fn=<NllLossBackward0>)
tensor(3.1120, grad_fn=<NllLossBackward0>)
tensor(2.5843, grad_fn=<NllLossBackward0>)
tensor(1.7020, grad_fn=<NllLossBackward0>)
tensor(2.8799, grad_fn=<NllLossBackward0>)
tensor(1.9522, grad_fn=<NllLossBackward0>)
tensor(1.9404, grad_fn=<NllLossBackward0>)
tensor(2.4034, grad_fn=<NllLossBackward0>)
tensor(2.5950, grad_fn=<NllLossBackward0>)
tensor(1.9594, grad_fn=<NllLossBackward0>)
tensor(2.4080, grad_fn=<NllLossBackward0>)
tensor(2.5168, grad_fn=<NllLossBackward0>)
tensor(3.1527, grad_fn=<NllLossBackward0>)
tensor(3.4142, grad_fn=<NllLossBackward0>)
tensor(2.5532, grad_fn=<NllLossBackward0>)
tensor(3.0842, grad_fn=<NllLossBackward0>)
tensor(2.8542, grad_fn=<NllLossBackward0>)
tensor(3.5444, grad_fn=<NllLossBackward0>)
tensor(2.5945, grad_fn=<NllLossBackward0>)
tensor(2.43

tensor(3.5876, grad_fn=<NllLossBackward0>)
tensor(3.0296, grad_fn=<NllLossBackward0>)
tensor(3.2066, grad_fn=<NllLossBackward0>)
tensor(2.6698, grad_fn=<NllLossBackward0>)
tensor(2.5966, grad_fn=<NllLossBackward0>)
tensor(2.9710, grad_fn=<NllLossBackward0>)
tensor(2.3614, grad_fn=<NllLossBackward0>)
tensor(4.2656, grad_fn=<NllLossBackward0>)
tensor(2.5756, grad_fn=<NllLossBackward0>)
tensor(3.2831, grad_fn=<NllLossBackward0>)
tensor(3.1203, grad_fn=<NllLossBackward0>)
tensor(2.3497, grad_fn=<NllLossBackward0>)
tensor(3.5764, grad_fn=<NllLossBackward0>)
tensor(2.2246, grad_fn=<NllLossBackward0>)
tensor(2.4677, grad_fn=<NllLossBackward0>)
tensor(2.4096, grad_fn=<NllLossBackward0>)
tensor(3.0178, grad_fn=<NllLossBackward0>)
tensor(3.1342, grad_fn=<NllLossBackward0>)
tensor(2.2392, grad_fn=<NllLossBackward0>)
tensor(2.1225, grad_fn=<NllLossBackward0>)
tensor(2.7178, grad_fn=<NllLossBackward0>)
tensor(2.3631, grad_fn=<NllLossBackward0>)
tensor(2.4735, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(3.2710, grad_fn=<NllLossBackward0>)
tensor(2.2140, grad_fn=<NllLossBackward0>)
tensor(1.9641, grad_fn=<NllLossBackward0>)
tensor(3.0181, grad_fn=<NllLossBackward0>)
tensor(1.9701, grad_fn=<NllLossBackward0>)
tensor(3.1286, grad_fn=<NllLossBackward0>)
tensor(2.3870, grad_fn=<NllLossBackward0>)
tensor(3.6987, grad_fn=<NllLossBackward0>)
tensor(3.5533, grad_fn=<NllLossBackward0>)
tensor(3.1984, grad_fn=<NllLossBackward0>)
tensor(3.7983, grad_fn=<NllLossBackward0>)
tensor(2.4020, grad_fn=<NllLossBackward0>)
tensor(2.9370, grad_fn=<NllLossBackward0>)
tensor(3.0029, grad_fn=<NllLossBackward0>)
tensor(3.1224, grad_fn=<NllLossBackward0>)
tensor(2.8327, grad_fn=<NllLossBackward0>)
tensor(3.2865, grad_fn=<NllLossBackward0>)
tensor(3.9256, grad_fn=<NllLossBackward0>)
tensor(2.6193, grad_fn=<NllLossBackward0>)
tensor(3.8395, grad_fn=<NllLossBackward0>)
tensor(3.6096, grad_fn=<NllLossBackward0>)
tensor(2.2054, grad_fn=<NllLossBackward0>)
tensor(2.6273, grad_fn=<NllLossBackward0>)
tensor(3.36

tensor(3.6299, grad_fn=<NllLossBackward0>)
tensor(1.9618, grad_fn=<NllLossBackward0>)
tensor(3.5432, grad_fn=<NllLossBackward0>)
tensor(2.8502, grad_fn=<NllLossBackward0>)
tensor(2.3760, grad_fn=<NllLossBackward0>)
tensor(2.2366, grad_fn=<NllLossBackward0>)
tensor(2.4368, grad_fn=<NllLossBackward0>)
tensor(2.8569, grad_fn=<NllLossBackward0>)
tensor(1.7532, grad_fn=<NllLossBackward0>)
tensor(2.3887, grad_fn=<NllLossBackward0>)
tensor(3.4745, grad_fn=<NllLossBackward0>)
tensor(1.9516, grad_fn=<NllLossBackward0>)
tensor(2.3709, grad_fn=<NllLossBackward0>)
tensor(3.1800, grad_fn=<NllLossBackward0>)
tensor(3.0359, grad_fn=<NllLossBackward0>)
tensor(1.9698, grad_fn=<NllLossBackward0>)
tensor(2.3955, grad_fn=<NllLossBackward0>)
tensor(2.9372, grad_fn=<NllLossBackward0>)
tensor(2.5417, grad_fn=<NllLossBackward0>)
tensor(2.5584, grad_fn=<NllLossBackward0>)
tensor(2.6133, grad_fn=<NllLossBackward0>)
tensor(2.2104, grad_fn=<NllLossBackward0>)
tensor(2.8915, grad_fn=<NllLossBackward0>)
tensor(2.70

tensor(2.5723, grad_fn=<NllLossBackward0>)
tensor(2.9246, grad_fn=<NllLossBackward0>)
tensor(2.4102, grad_fn=<NllLossBackward0>)
tensor(2.3004, grad_fn=<NllLossBackward0>)
tensor(3.0560, grad_fn=<NllLossBackward0>)
tensor(3.9075, grad_fn=<NllLossBackward0>)
tensor(2.5544, grad_fn=<NllLossBackward0>)
tensor(2.9626, grad_fn=<NllLossBackward0>)
tensor(2.5728, grad_fn=<NllLossBackward0>)
tensor(3.0433, grad_fn=<NllLossBackward0>)
tensor(2.1957, grad_fn=<NllLossBackward0>)
tensor(3.2218, grad_fn=<NllLossBackward0>)
tensor(2.7236, grad_fn=<NllLossBackward0>)
tensor(2.5256, grad_fn=<NllLossBackward0>)
tensor(3.9280, grad_fn=<NllLossBackward0>)
tensor(2.9968, grad_fn=<NllLossBackward0>)
tensor(2.6368, grad_fn=<NllLossBackward0>)
tensor(3.1049, grad_fn=<NllLossBackward0>)
tensor(3.0540, grad_fn=<NllLossBackward0>)
tensor(3.5240, grad_fn=<NllLossBackward0>)
tensor(3.6149, grad_fn=<NllLossBackward0>)
tensor(2.4960, grad_fn=<NllLossBackward0>)
tensor(2.3640, grad_fn=<NllLossBackward0>)
tensor(4.43

tensor(3.1439, grad_fn=<NllLossBackward0>)
tensor(3.3736, grad_fn=<NllLossBackward0>)
tensor(2.9225, grad_fn=<NllLossBackward0>)
tensor(2.4480, grad_fn=<NllLossBackward0>)
tensor(1.9828, grad_fn=<NllLossBackward0>)
tensor(3.2611, grad_fn=<NllLossBackward0>)
tensor(3.2678, grad_fn=<NllLossBackward0>)
tensor(2.5992, grad_fn=<NllLossBackward0>)
tensor(3.8914, grad_fn=<NllLossBackward0>)
tensor(2.5249, grad_fn=<NllLossBackward0>)
tensor(2.8708, grad_fn=<NllLossBackward0>)
tensor(1.9908, grad_fn=<NllLossBackward0>)
tensor(1.9590, grad_fn=<NllLossBackward0>)
tensor(1.9652, grad_fn=<NllLossBackward0>)
tensor(2.4399, grad_fn=<NllLossBackward0>)
tensor(2.4211, grad_fn=<NllLossBackward0>)
tensor(2.3056, grad_fn=<NllLossBackward0>)
tensor(2.9784, grad_fn=<NllLossBackward0>)
tensor(4.2664, grad_fn=<NllLossBackward0>)
tensor(1.9396, grad_fn=<NllLossBackward0>)
tensor(3.0956, grad_fn=<NllLossBackward0>)
tensor(2.0180, grad_fn=<NllLossBackward0>)
tensor(3.0981, grad_fn=<NllLossBackward0>)
tensor(2.58

tensor(2.4187, grad_fn=<NllLossBackward0>)
tensor(2.8554, grad_fn=<NllLossBackward0>)
tensor(3.1079, grad_fn=<NllLossBackward0>)
tensor(3.8122, grad_fn=<NllLossBackward0>)
tensor(2.4777, grad_fn=<NllLossBackward0>)
tensor(2.8348, grad_fn=<NllLossBackward0>)
tensor(2.3621, grad_fn=<NllLossBackward0>)
tensor(3.1589, grad_fn=<NllLossBackward0>)
tensor(2.4468, grad_fn=<NllLossBackward0>)
tensor(3.6522, grad_fn=<NllLossBackward0>)
tensor(2.6771, grad_fn=<NllLossBackward0>)
tensor(2.8651, grad_fn=<NllLossBackward0>)
tensor(2.3049, grad_fn=<NllLossBackward0>)
tensor(2.3206, grad_fn=<NllLossBackward0>)
tensor(3.0157, grad_fn=<NllLossBackward0>)
tensor(2.3176, grad_fn=<NllLossBackward0>)
tensor(3.9653, grad_fn=<NllLossBackward0>)
tensor(2.2735, grad_fn=<NllLossBackward0>)
tensor(2.6929, grad_fn=<NllLossBackward0>)
tensor(3.0043, grad_fn=<NllLossBackward0>)
tensor(3.9827, grad_fn=<NllLossBackward0>)
tensor(2.5667, grad_fn=<NllLossBackward0>)
tensor(2.8212, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(4.5072, grad_fn=<NllLossBackward0>)
tensor(3.1490, grad_fn=<NllLossBackward0>)
tensor(2.6046, grad_fn=<NllLossBackward0>)
tensor(5.2446, grad_fn=<NllLossBackward0>)
tensor(3.1532, grad_fn=<NllLossBackward0>)
tensor(2.8112, grad_fn=<NllLossBackward0>)
tensor(2.5098, grad_fn=<NllLossBackward0>)
tensor(3.2190, grad_fn=<NllLossBackward0>)
tensor(2.9148, grad_fn=<NllLossBackward0>)
tensor(2.9936, grad_fn=<NllLossBackward0>)
tensor(4.5058, grad_fn=<NllLossBackward0>)
tensor(3.0230, grad_fn=<NllLossBackward0>)
tensor(2.1667, grad_fn=<NllLossBackward0>)
tensor(2.3099, grad_fn=<NllLossBackward0>)
tensor(3.9566, grad_fn=<NllLossBackward0>)
tensor(2.9248, grad_fn=<NllLossBackward0>)
tensor(2.1793, grad_fn=<NllLossBackward0>)
tensor(3.5244, grad_fn=<NllLossBackward0>)
tensor(2.7534, grad_fn=<NllLossBackward0>)
tensor(2.7053, grad_fn=<NllLossBackward0>)
tensor(2.4088, grad_fn=<NllLossBackward0>)
tensor(3.2944, grad_fn=<NllLossBackward0>)
tensor(3.4127, grad_fn=<NllLossBackward0>)
tensor(3.75

tensor(2.3136, grad_fn=<NllLossBackward0>)
tensor(2.5610, grad_fn=<NllLossBackward0>)
tensor(1.9733, grad_fn=<NllLossBackward0>)
tensor(2.7087, grad_fn=<NllLossBackward0>)
tensor(3.6330, grad_fn=<NllLossBackward0>)
tensor(2.5695, grad_fn=<NllLossBackward0>)
tensor(3.7569, grad_fn=<NllLossBackward0>)
tensor(2.3274, grad_fn=<NllLossBackward0>)
tensor(2.6587, grad_fn=<NllLossBackward0>)
tensor(3.2219, grad_fn=<NllLossBackward0>)
tensor(3.2902, grad_fn=<NllLossBackward0>)
tensor(2.3224, grad_fn=<NllLossBackward0>)
tensor(3.3728, grad_fn=<NllLossBackward0>)
tensor(3.0751, grad_fn=<NllLossBackward0>)
tensor(2.6756, grad_fn=<NllLossBackward0>)
tensor(2.3443, grad_fn=<NllLossBackward0>)
tensor(3.5408, grad_fn=<NllLossBackward0>)
tensor(2.3476, grad_fn=<NllLossBackward0>)
tensor(4.4888, grad_fn=<NllLossBackward0>)
tensor(2.2899, grad_fn=<NllLossBackward0>)
tensor(2.8122, grad_fn=<NllLossBackward0>)
tensor(2.7044, grad_fn=<NllLossBackward0>)
tensor(2.6001, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(2.1051, grad_fn=<NllLossBackward0>)
tensor(2.7951, grad_fn=<NllLossBackward0>)
tensor(3.1357, grad_fn=<NllLossBackward0>)
tensor(2.8805, grad_fn=<NllLossBackward0>)
tensor(4.2988, grad_fn=<NllLossBackward0>)
tensor(1.9645, grad_fn=<NllLossBackward0>)
tensor(3.1212, grad_fn=<NllLossBackward0>)
tensor(3.2787, grad_fn=<NllLossBackward0>)
tensor(2.1543, grad_fn=<NllLossBackward0>)
tensor(2.8238, grad_fn=<NllLossBackward0>)
tensor(2.7184, grad_fn=<NllLossBackward0>)
tensor(4.0847, grad_fn=<NllLossBackward0>)
tensor(2.3438, grad_fn=<NllLossBackward0>)
tensor(3.6287, grad_fn=<NllLossBackward0>)
tensor(2.4215, grad_fn=<NllLossBackward0>)
tensor(4.1054, grad_fn=<NllLossBackward0>)
tensor(1.9772, grad_fn=<NllLossBackward0>)
tensor(2.6128, grad_fn=<NllLossBackward0>)
tensor(2.2468, grad_fn=<NllLossBackward0>)
tensor(3.1541, grad_fn=<NllLossBackward0>)
tensor(2.9865, grad_fn=<NllLossBackward0>)
tensor(2.6550, grad_fn=<NllLossBackward0>)
tensor(3.7976, grad_fn=<NllLossBackward0>)
tensor(2.41

tensor(2.8550, grad_fn=<NllLossBackward0>)
tensor(3.6750, grad_fn=<NllLossBackward0>)
tensor(2.3080, grad_fn=<NllLossBackward0>)
tensor(3.1179, grad_fn=<NllLossBackward0>)
tensor(2.7556, grad_fn=<NllLossBackward0>)
tensor(3.2048, grad_fn=<NllLossBackward0>)
tensor(3.0004, grad_fn=<NllLossBackward0>)
tensor(2.3832, grad_fn=<NllLossBackward0>)
tensor(4.2058, grad_fn=<NllLossBackward0>)
tensor(2.1864, grad_fn=<NllLossBackward0>)
tensor(3.7326, grad_fn=<NllLossBackward0>)
tensor(2.2738, grad_fn=<NllLossBackward0>)
tensor(2.1026, grad_fn=<NllLossBackward0>)
tensor(2.4660, grad_fn=<NllLossBackward0>)
tensor(4.2232, grad_fn=<NllLossBackward0>)
tensor(4.0550, grad_fn=<NllLossBackward0>)
tensor(3.4301, grad_fn=<NllLossBackward0>)
tensor(2.4051, grad_fn=<NllLossBackward0>)
tensor(2.9724, grad_fn=<NllLossBackward0>)
tensor(2.3404, grad_fn=<NllLossBackward0>)
tensor(1.7117, grad_fn=<NllLossBackward0>)
tensor(2.9501, grad_fn=<NllLossBackward0>)
tensor(2.6458, grad_fn=<NllLossBackward0>)
tensor(2.97

tensor(2.7839, grad_fn=<NllLossBackward0>)
tensor(2.6217, grad_fn=<NllLossBackward0>)
tensor(3.1240, grad_fn=<NllLossBackward0>)
tensor(3.0012, grad_fn=<NllLossBackward0>)
tensor(2.3107, grad_fn=<NllLossBackward0>)
tensor(2.9679, grad_fn=<NllLossBackward0>)
tensor(2.3928, grad_fn=<NllLossBackward0>)
tensor(1.7491, grad_fn=<NllLossBackward0>)
tensor(2.3964, grad_fn=<NllLossBackward0>)
tensor(3.0410, grad_fn=<NllLossBackward0>)
tensor(3.0838, grad_fn=<NllLossBackward0>)
tensor(1.9994, grad_fn=<NllLossBackward0>)
tensor(3.0441, grad_fn=<NllLossBackward0>)
tensor(3.8751, grad_fn=<NllLossBackward0>)
tensor(2.3609, grad_fn=<NllLossBackward0>)
tensor(2.8715, grad_fn=<NllLossBackward0>)
tensor(3.0522, grad_fn=<NllLossBackward0>)
tensor(2.8876, grad_fn=<NllLossBackward0>)
tensor(2.6862, grad_fn=<NllLossBackward0>)
tensor(3.0305, grad_fn=<NllLossBackward0>)
tensor(2.3948, grad_fn=<NllLossBackward0>)
tensor(3.4547, grad_fn=<NllLossBackward0>)
tensor(2.0197, grad_fn=<NllLossBackward0>)
tensor(3.66

tensor(2.6924, grad_fn=<NllLossBackward0>)
tensor(2.6618, grad_fn=<NllLossBackward0>)
tensor(2.8413, grad_fn=<NllLossBackward0>)
tensor(2.3037, grad_fn=<NllLossBackward0>)
tensor(3.0552, grad_fn=<NllLossBackward0>)
tensor(1.9917, grad_fn=<NllLossBackward0>)
tensor(3.3154, grad_fn=<NllLossBackward0>)
tensor(3.4407, grad_fn=<NllLossBackward0>)
tensor(2.5186, grad_fn=<NllLossBackward0>)
tensor(2.5923, grad_fn=<NllLossBackward0>)
tensor(2.6039, grad_fn=<NllLossBackward0>)
tensor(3.9008, grad_fn=<NllLossBackward0>)
tensor(2.0203, grad_fn=<NllLossBackward0>)
tensor(2.3695, grad_fn=<NllLossBackward0>)
tensor(2.6029, grad_fn=<NllLossBackward0>)
tensor(2.7241, grad_fn=<NllLossBackward0>)
tensor(2.9803, grad_fn=<NllLossBackward0>)
tensor(2.2774, grad_fn=<NllLossBackward0>)
tensor(2.5281, grad_fn=<NllLossBackward0>)
tensor(4.1115, grad_fn=<NllLossBackward0>)
tensor(3.5562, grad_fn=<NllLossBackward0>)
tensor(2.7171, grad_fn=<NllLossBackward0>)
tensor(3.0236, grad_fn=<NllLossBackward0>)
tensor(2.50

tensor(3.5670, grad_fn=<NllLossBackward0>)
tensor(2.4335, grad_fn=<NllLossBackward0>)
tensor(2.9659, grad_fn=<NllLossBackward0>)
tensor(3.1556, grad_fn=<NllLossBackward0>)
tensor(2.6097, grad_fn=<NllLossBackward0>)
tensor(4.4627, grad_fn=<NllLossBackward0>)
tensor(3.0357, grad_fn=<NllLossBackward0>)
tensor(2.6407, grad_fn=<NllLossBackward0>)
tensor(1.7734, grad_fn=<NllLossBackward0>)
tensor(2.9459, grad_fn=<NllLossBackward0>)
tensor(2.7115, grad_fn=<NllLossBackward0>)
tensor(2.4891, grad_fn=<NllLossBackward0>)
tensor(2.4803, grad_fn=<NllLossBackward0>)
tensor(1.9749, grad_fn=<NllLossBackward0>)
tensor(2.0095, grad_fn=<NllLossBackward0>)
tensor(3.4507, grad_fn=<NllLossBackward0>)
tensor(2.5876, grad_fn=<NllLossBackward0>)
tensor(2.4540, grad_fn=<NllLossBackward0>)
tensor(2.3870, grad_fn=<NllLossBackward0>)
tensor(3.5051, grad_fn=<NllLossBackward0>)
tensor(1.7693, grad_fn=<NllLossBackward0>)
tensor(3.6293, grad_fn=<NllLossBackward0>)
tensor(3.2388, grad_fn=<NllLossBackward0>)
tensor(3.06

tensor(3.2367, grad_fn=<NllLossBackward0>)
tensor(2.4102, grad_fn=<NllLossBackward0>)
tensor(2.3679, grad_fn=<NllLossBackward0>)
tensor(2.4596, grad_fn=<NllLossBackward0>)
tensor(2.0553, grad_fn=<NllLossBackward0>)
tensor(2.6347, grad_fn=<NllLossBackward0>)
tensor(2.9300, grad_fn=<NllLossBackward0>)
tensor(1.9618, grad_fn=<NllLossBackward0>)
tensor(2.7326, grad_fn=<NllLossBackward0>)
tensor(2.6533, grad_fn=<NllLossBackward0>)
tensor(2.3925, grad_fn=<NllLossBackward0>)
tensor(2.6007, grad_fn=<NllLossBackward0>)
tensor(1.9858, grad_fn=<NllLossBackward0>)
tensor(2.3669, grad_fn=<NllLossBackward0>)
tensor(2.3031, grad_fn=<NllLossBackward0>)
tensor(3.0579, grad_fn=<NllLossBackward0>)
tensor(3.0866, grad_fn=<NllLossBackward0>)
tensor(2.9978, grad_fn=<NllLossBackward0>)
tensor(4.3570, grad_fn=<NllLossBackward0>)
tensor(3.0051, grad_fn=<NllLossBackward0>)
tensor(2.5325, grad_fn=<NllLossBackward0>)
tensor(3.3358, grad_fn=<NllLossBackward0>)
tensor(3.0523, grad_fn=<NllLossBackward0>)
tensor(4.34

tensor(2.9729, grad_fn=<NllLossBackward0>)
tensor(3.9652, grad_fn=<NllLossBackward0>)
tensor(2.8491, grad_fn=<NllLossBackward0>)
tensor(2.7394, grad_fn=<NllLossBackward0>)
tensor(2.5966, grad_fn=<NllLossBackward0>)
tensor(2.5627, grad_fn=<NllLossBackward0>)
tensor(3.0802, grad_fn=<NllLossBackward0>)
tensor(4.4456, grad_fn=<NllLossBackward0>)
tensor(2.4321, grad_fn=<NllLossBackward0>)
tensor(3.5539, grad_fn=<NllLossBackward0>)
tensor(2.1986, grad_fn=<NllLossBackward0>)
tensor(2.4335, grad_fn=<NllLossBackward0>)
tensor(3.2957, grad_fn=<NllLossBackward0>)
tensor(3.7488, grad_fn=<NllLossBackward0>)
tensor(2.5468, grad_fn=<NllLossBackward0>)
tensor(3.5045, grad_fn=<NllLossBackward0>)
tensor(2.8939, grad_fn=<NllLossBackward0>)
tensor(3.6577, grad_fn=<NllLossBackward0>)
tensor(2.6461, grad_fn=<NllLossBackward0>)
tensor(3.1288, grad_fn=<NllLossBackward0>)
tensor(4.1078, grad_fn=<NllLossBackward0>)
tensor(3.1319, grad_fn=<NllLossBackward0>)
tensor(2.3756, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(2.4068, grad_fn=<NllLossBackward0>)
tensor(3.0561, grad_fn=<NllLossBackward0>)
tensor(4.4544, grad_fn=<NllLossBackward0>)
tensor(4.2773, grad_fn=<NllLossBackward0>)
tensor(3.4737, grad_fn=<NllLossBackward0>)
tensor(4.1824, grad_fn=<NllLossBackward0>)
tensor(2.8183, grad_fn=<NllLossBackward0>)
tensor(2.9601, grad_fn=<NllLossBackward0>)
tensor(3.0900, grad_fn=<NllLossBackward0>)
tensor(1.9769, grad_fn=<NllLossBackward0>)
tensor(1.7653, grad_fn=<NllLossBackward0>)
tensor(2.7198, grad_fn=<NllLossBackward0>)
tensor(2.6277, grad_fn=<NllLossBackward0>)
tensor(3.0912, grad_fn=<NllLossBackward0>)
tensor(4.2705, grad_fn=<NllLossBackward0>)
tensor(3.4742, grad_fn=<NllLossBackward0>)
tensor(1.7699, grad_fn=<NllLossBackward0>)
tensor(2.6133, grad_fn=<NllLossBackward0>)
tensor(3.5135, grad_fn=<NllLossBackward0>)
tensor(3.1072, grad_fn=<NllLossBackward0>)
tensor(2.3982, grad_fn=<NllLossBackward0>)
tensor(2.0052, grad_fn=<NllLossBackward0>)
tensor(2.6456, grad_fn=<NllLossBackward0>)
tensor(2.92

tensor(2.4193, grad_fn=<NllLossBackward0>)
tensor(2.0197, grad_fn=<NllLossBackward0>)
tensor(4.9948, grad_fn=<NllLossBackward0>)
tensor(3.0838, grad_fn=<NllLossBackward0>)
tensor(2.1369, grad_fn=<NllLossBackward0>)
tensor(3.6867, grad_fn=<NllLossBackward0>)
tensor(2.5959, grad_fn=<NllLossBackward0>)
tensor(2.7920, grad_fn=<NllLossBackward0>)
tensor(2.2483, grad_fn=<NllLossBackward0>)
tensor(2.3858, grad_fn=<NllLossBackward0>)
tensor(2.8079, grad_fn=<NllLossBackward0>)
tensor(2.4535, grad_fn=<NllLossBackward0>)
tensor(3.4394, grad_fn=<NllLossBackward0>)
tensor(2.4128, grad_fn=<NllLossBackward0>)
tensor(4.6709, grad_fn=<NllLossBackward0>)
tensor(4.4344, grad_fn=<NllLossBackward0>)
tensor(2.8274, grad_fn=<NllLossBackward0>)
tensor(3.7988, grad_fn=<NllLossBackward0>)
tensor(3.2605, grad_fn=<NllLossBackward0>)
tensor(2.2357, grad_fn=<NllLossBackward0>)
tensor(2.3909, grad_fn=<NllLossBackward0>)
tensor(2.4753, grad_fn=<NllLossBackward0>)
tensor(1.9800, grad_fn=<NllLossBackward0>)
tensor(2.87

tensor(2.4992, grad_fn=<NllLossBackward0>)
tensor(3.5550, grad_fn=<NllLossBackward0>)
tensor(2.9366, grad_fn=<NllLossBackward0>)
tensor(2.9652, grad_fn=<NllLossBackward0>)
tensor(3.4953, grad_fn=<NllLossBackward0>)
tensor(1.9904, grad_fn=<NllLossBackward0>)
tensor(2.8788, grad_fn=<NllLossBackward0>)
tensor(3.6054, grad_fn=<NllLossBackward0>)
tensor(2.5490, grad_fn=<NllLossBackward0>)
tensor(2.7169, grad_fn=<NllLossBackward0>)
tensor(2.6008, grad_fn=<NllLossBackward0>)
tensor(3.0137, grad_fn=<NllLossBackward0>)
tensor(2.3139, grad_fn=<NllLossBackward0>)
tensor(2.4044, grad_fn=<NllLossBackward0>)
tensor(3.1906, grad_fn=<NllLossBackward0>)
tensor(2.3412, grad_fn=<NllLossBackward0>)
tensor(3.6005, grad_fn=<NllLossBackward0>)
tensor(1.7389, grad_fn=<NllLossBackward0>)
tensor(2.8601, grad_fn=<NllLossBackward0>)
tensor(1.9810, grad_fn=<NllLossBackward0>)
tensor(3.2058, grad_fn=<NllLossBackward0>)
tensor(3.5740, grad_fn=<NllLossBackward0>)
tensor(2.3188, grad_fn=<NllLossBackward0>)
tensor(2.91

tensor(2.1068, grad_fn=<NllLossBackward0>)
tensor(2.3389, grad_fn=<NllLossBackward0>)
tensor(1.7011, grad_fn=<NllLossBackward0>)
tensor(2.2829, grad_fn=<NllLossBackward0>)
tensor(2.5794, grad_fn=<NllLossBackward0>)
tensor(3.4635, grad_fn=<NllLossBackward0>)
tensor(2.6182, grad_fn=<NllLossBackward0>)
tensor(1.6923, grad_fn=<NllLossBackward0>)
tensor(2.9695, grad_fn=<NllLossBackward0>)
tensor(2.5808, grad_fn=<NllLossBackward0>)
tensor(2.3395, grad_fn=<NllLossBackward0>)
tensor(2.6162, grad_fn=<NllLossBackward0>)
tensor(2.8235, grad_fn=<NllLossBackward0>)
tensor(3.4908, grad_fn=<NllLossBackward0>)
tensor(4.0876, grad_fn=<NllLossBackward0>)
tensor(2.1949, grad_fn=<NllLossBackward0>)
tensor(2.6432, grad_fn=<NllLossBackward0>)
tensor(2.6364, grad_fn=<NllLossBackward0>)
tensor(4.3422, grad_fn=<NllLossBackward0>)
tensor(3.7557, grad_fn=<NllLossBackward0>)
tensor(2.9445, grad_fn=<NllLossBackward0>)
tensor(4.2244, grad_fn=<NllLossBackward0>)
tensor(2.0228, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(3.0452, grad_fn=<NllLossBackward0>)
tensor(3.1654, grad_fn=<NllLossBackward0>)
tensor(4.0203, grad_fn=<NllLossBackward0>)
tensor(3.0621, grad_fn=<NllLossBackward0>)
tensor(3.1105, grad_fn=<NllLossBackward0>)
tensor(2.3013, grad_fn=<NllLossBackward0>)
tensor(3.1029, grad_fn=<NllLossBackward0>)
tensor(2.5524, grad_fn=<NllLossBackward0>)
tensor(2.9600, grad_fn=<NllLossBackward0>)
tensor(3.4369, grad_fn=<NllLossBackward0>)
tensor(2.3196, grad_fn=<NllLossBackward0>)
tensor(2.6197, grad_fn=<NllLossBackward0>)
tensor(3.3854, grad_fn=<NllLossBackward0>)
tensor(3.9745, grad_fn=<NllLossBackward0>)
tensor(3.6636, grad_fn=<NllLossBackward0>)
tensor(3.7845, grad_fn=<NllLossBackward0>)
tensor(2.4188, grad_fn=<NllLossBackward0>)
tensor(2.3002, grad_fn=<NllLossBackward0>)
tensor(3.1891, grad_fn=<NllLossBackward0>)
tensor(2.3962, grad_fn=<NllLossBackward0>)
tensor(2.9632, grad_fn=<NllLossBackward0>)
tensor(3.9713, grad_fn=<NllLossBackward0>)
tensor(3.0862, grad_fn=<NllLossBackward0>)
tensor(2.04

tensor(3.0715, grad_fn=<NllLossBackward0>)
tensor(3.7485, grad_fn=<NllLossBackward0>)
tensor(3.4185, grad_fn=<NllLossBackward0>)
tensor(3.0519, grad_fn=<NllLossBackward0>)
tensor(2.6571, grad_fn=<NllLossBackward0>)
tensor(1.7230, grad_fn=<NllLossBackward0>)
tensor(2.9833, grad_fn=<NllLossBackward0>)
tensor(2.7439, grad_fn=<NllLossBackward0>)
tensor(3.0483, grad_fn=<NllLossBackward0>)
tensor(1.9672, grad_fn=<NllLossBackward0>)
tensor(4.6248, grad_fn=<NllLossBackward0>)
tensor(3.1863, grad_fn=<NllLossBackward0>)
tensor(2.5677, grad_fn=<NllLossBackward0>)
tensor(2.6127, grad_fn=<NllLossBackward0>)
tensor(3.1141, grad_fn=<NllLossBackward0>)
tensor(3.5119, grad_fn=<NllLossBackward0>)
tensor(2.5274, grad_fn=<NllLossBackward0>)
tensor(3.2804, grad_fn=<NllLossBackward0>)
tensor(2.7117, grad_fn=<NllLossBackward0>)
tensor(2.3289, grad_fn=<NllLossBackward0>)
tensor(2.9071, grad_fn=<NllLossBackward0>)
tensor(2.8223, grad_fn=<NllLossBackward0>)
tensor(2.5393, grad_fn=<NllLossBackward0>)
tensor(1.98

tensor(2.3487, grad_fn=<NllLossBackward0>)
tensor(3.1256, grad_fn=<NllLossBackward0>)
tensor(3.0022, grad_fn=<NllLossBackward0>)
tensor(2.2384, grad_fn=<NllLossBackward0>)
tensor(2.0690, grad_fn=<NllLossBackward0>)
tensor(2.4214, grad_fn=<NllLossBackward0>)
tensor(3.5669, grad_fn=<NllLossBackward0>)
tensor(2.3476, grad_fn=<NllLossBackward0>)
tensor(3.1274, grad_fn=<NllLossBackward0>)
tensor(2.3443, grad_fn=<NllLossBackward0>)
tensor(3.4287, grad_fn=<NllLossBackward0>)
tensor(2.8608, grad_fn=<NllLossBackward0>)
tensor(2.4850, grad_fn=<NllLossBackward0>)
tensor(2.9361, grad_fn=<NllLossBackward0>)
tensor(2.7061, grad_fn=<NllLossBackward0>)
tensor(3.4321, grad_fn=<NllLossBackward0>)
tensor(3.7135, grad_fn=<NllLossBackward0>)
tensor(2.2974, grad_fn=<NllLossBackward0>)
tensor(2.3395, grad_fn=<NllLossBackward0>)
tensor(2.5534, grad_fn=<NllLossBackward0>)
tensor(2.0557, grad_fn=<NllLossBackward0>)
tensor(3.0418, grad_fn=<NllLossBackward0>)
tensor(3.9036, grad_fn=<NllLossBackward0>)
tensor(2.65

tensor(2.4309, grad_fn=<NllLossBackward0>)
tensor(2.5700, grad_fn=<NllLossBackward0>)
tensor(4.0927, grad_fn=<NllLossBackward0>)
tensor(2.4011, grad_fn=<NllLossBackward0>)
tensor(3.5496, grad_fn=<NllLossBackward0>)
tensor(1.9714, grad_fn=<NllLossBackward0>)
tensor(2.3070, grad_fn=<NllLossBackward0>)
tensor(3.7262, grad_fn=<NllLossBackward0>)
tensor(2.3546, grad_fn=<NllLossBackward0>)
tensor(1.9846, grad_fn=<NllLossBackward0>)
tensor(3.0360, grad_fn=<NllLossBackward0>)
tensor(1.9890, grad_fn=<NllLossBackward0>)
tensor(2.8280, grad_fn=<NllLossBackward0>)
tensor(2.5789, grad_fn=<NllLossBackward0>)
tensor(4.1646, grad_fn=<NllLossBackward0>)
tensor(1.9893, grad_fn=<NllLossBackward0>)
tensor(2.9453, grad_fn=<NllLossBackward0>)
tensor(3.4020, grad_fn=<NllLossBackward0>)
tensor(3.0887, grad_fn=<NllLossBackward0>)
tensor(3.0987, grad_fn=<NllLossBackward0>)
tensor(2.3999, grad_fn=<NllLossBackward0>)
tensor(2.4761, grad_fn=<NllLossBackward0>)
tensor(2.0760, grad_fn=<NllLossBackward0>)
tensor(2.61

tensor(2.9160, grad_fn=<NllLossBackward0>)
tensor(2.8729, grad_fn=<NllLossBackward0>)
tensor(2.3813, grad_fn=<NllLossBackward0>)
tensor(3.9866, grad_fn=<NllLossBackward0>)
tensor(2.0494, grad_fn=<NllLossBackward0>)
tensor(2.9234, grad_fn=<NllLossBackward0>)
tensor(3.0325, grad_fn=<NllLossBackward0>)
tensor(2.6728, grad_fn=<NllLossBackward0>)
tensor(2.4096, grad_fn=<NllLossBackward0>)
tensor(2.1804, grad_fn=<NllLossBackward0>)
tensor(3.3836, grad_fn=<NllLossBackward0>)
tensor(4.0332, grad_fn=<NllLossBackward0>)
tensor(3.0610, grad_fn=<NllLossBackward0>)
tensor(3.1296, grad_fn=<NllLossBackward0>)
tensor(1.9739, grad_fn=<NllLossBackward0>)
tensor(2.9409, grad_fn=<NllLossBackward0>)
tensor(2.0877, grad_fn=<NllLossBackward0>)
tensor(4.2828, grad_fn=<NllLossBackward0>)
tensor(2.0045, grad_fn=<NllLossBackward0>)
tensor(2.8793, grad_fn=<NllLossBackward0>)
tensor(2.5215, grad_fn=<NllLossBackward0>)
tensor(3.0333, grad_fn=<NllLossBackward0>)
tensor(3.6494, grad_fn=<NllLossBackward0>)
tensor(1.72

tensor(3.8454, grad_fn=<NllLossBackward0>)
tensor(3.1447, grad_fn=<NllLossBackward0>)
tensor(2.2250, grad_fn=<NllLossBackward0>)
tensor(2.7556, grad_fn=<NllLossBackward0>)
tensor(3.5899, grad_fn=<NllLossBackward0>)
tensor(2.8046, grad_fn=<NllLossBackward0>)
tensor(3.2138, grad_fn=<NllLossBackward0>)
tensor(3.5632, grad_fn=<NllLossBackward0>)
tensor(3.3529, grad_fn=<NllLossBackward0>)
tensor(3.9865, grad_fn=<NllLossBackward0>)
tensor(4.0066, grad_fn=<NllLossBackward0>)
tensor(2.9711, grad_fn=<NllLossBackward0>)
tensor(2.1769, grad_fn=<NllLossBackward0>)
tensor(2.6676, grad_fn=<NllLossBackward0>)
tensor(2.0023, grad_fn=<NllLossBackward0>)
tensor(3.5191, grad_fn=<NllLossBackward0>)
tensor(2.9649, grad_fn=<NllLossBackward0>)
tensor(3.0406, grad_fn=<NllLossBackward0>)
tensor(2.5531, grad_fn=<NllLossBackward0>)
tensor(1.7163, grad_fn=<NllLossBackward0>)
tensor(2.8114, grad_fn=<NllLossBackward0>)
tensor(1.9580, grad_fn=<NllLossBackward0>)
tensor(2.1970, grad_fn=<NllLossBackward0>)
tensor(3.42

tensor(1.9799, grad_fn=<NllLossBackward0>)
tensor(1.9897, grad_fn=<NllLossBackward0>)
tensor(2.3941, grad_fn=<NllLossBackward0>)
tensor(3.3539, grad_fn=<NllLossBackward0>)
tensor(2.6476, grad_fn=<NllLossBackward0>)
tensor(2.9010, grad_fn=<NllLossBackward0>)
tensor(2.4040, grad_fn=<NllLossBackward0>)
tensor(1.9952, grad_fn=<NllLossBackward0>)
tensor(2.3355, grad_fn=<NllLossBackward0>)
tensor(2.5363, grad_fn=<NllLossBackward0>)
tensor(3.3178, grad_fn=<NllLossBackward0>)
tensor(2.3528, grad_fn=<NllLossBackward0>)
tensor(3.8850, grad_fn=<NllLossBackward0>)
tensor(2.8026, grad_fn=<NllLossBackward0>)
tensor(2.1377, grad_fn=<NllLossBackward0>)
tensor(2.7728, grad_fn=<NllLossBackward0>)
tensor(3.1082, grad_fn=<NllLossBackward0>)
tensor(2.5207, grad_fn=<NllLossBackward0>)
tensor(3.0608, grad_fn=<NllLossBackward0>)
tensor(3.4847, grad_fn=<NllLossBackward0>)
tensor(2.3781, grad_fn=<NllLossBackward0>)
tensor(2.8184, grad_fn=<NllLossBackward0>)
tensor(2.4163, grad_fn=<NllLossBackward0>)
tensor(2.31

tensor(2.4568, grad_fn=<NllLossBackward0>)
tensor(3.5552, grad_fn=<NllLossBackward0>)
tensor(2.6590, grad_fn=<NllLossBackward0>)
tensor(2.7030, grad_fn=<NllLossBackward0>)
tensor(3.2875, grad_fn=<NllLossBackward0>)
tensor(3.7221, grad_fn=<NllLossBackward0>)
tensor(3.7095, grad_fn=<NllLossBackward0>)
tensor(2.8546, grad_fn=<NllLossBackward0>)
tensor(2.4186, grad_fn=<NllLossBackward0>)
tensor(2.5212, grad_fn=<NllLossBackward0>)
tensor(2.7903, grad_fn=<NllLossBackward0>)
tensor(3.4094, grad_fn=<NllLossBackward0>)
tensor(3.6424, grad_fn=<NllLossBackward0>)
tensor(3.6528, grad_fn=<NllLossBackward0>)
tensor(3.1908, grad_fn=<NllLossBackward0>)
tensor(2.3447, grad_fn=<NllLossBackward0>)
tensor(2.8339, grad_fn=<NllLossBackward0>)
tensor(2.8109, grad_fn=<NllLossBackward0>)
tensor(3.6067, grad_fn=<NllLossBackward0>)
tensor(3.7979, grad_fn=<NllLossBackward0>)
tensor(2.9021, grad_fn=<NllLossBackward0>)
tensor(3.0869, grad_fn=<NllLossBackward0>)
tensor(2.4425, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(2.7203, grad_fn=<NllLossBackward0>)
tensor(2.3668, grad_fn=<NllLossBackward0>)
tensor(2.3116, grad_fn=<NllLossBackward0>)
tensor(3.1436, grad_fn=<NllLossBackward0>)
tensor(2.8838, grad_fn=<NllLossBackward0>)
tensor(1.7410, grad_fn=<NllLossBackward0>)
tensor(2.6740, grad_fn=<NllLossBackward0>)
tensor(2.3435, grad_fn=<NllLossBackward0>)
tensor(1.7494, grad_fn=<NllLossBackward0>)
tensor(3.4291, grad_fn=<NllLossBackward0>)
tensor(2.5647, grad_fn=<NllLossBackward0>)
tensor(2.6319, grad_fn=<NllLossBackward0>)
tensor(2.7018, grad_fn=<NllLossBackward0>)
tensor(2.3146, grad_fn=<NllLossBackward0>)
tensor(2.4711, grad_fn=<NllLossBackward0>)
tensor(2.9092, grad_fn=<NllLossBackward0>)
tensor(1.9959, grad_fn=<NllLossBackward0>)
tensor(5.0085, grad_fn=<NllLossBackward0>)
tensor(2.7897, grad_fn=<NllLossBackward0>)
tensor(2.2624, grad_fn=<NllLossBackward0>)
tensor(1.7479, grad_fn=<NllLossBackward0>)
tensor(3.3900, grad_fn=<NllLossBackward0>)
tensor(3.1805, grad_fn=<NllLossBackward0>)
tensor(2.38

tensor(3.6908, grad_fn=<NllLossBackward0>)
tensor(2.0304, grad_fn=<NllLossBackward0>)
tensor(2.3103, grad_fn=<NllLossBackward0>)
tensor(2.1346, grad_fn=<NllLossBackward0>)
tensor(3.8421, grad_fn=<NllLossBackward0>)
tensor(3.1544, grad_fn=<NllLossBackward0>)
tensor(2.3935, grad_fn=<NllLossBackward0>)
tensor(3.3152, grad_fn=<NllLossBackward0>)
tensor(2.5059, grad_fn=<NllLossBackward0>)
tensor(3.8822, grad_fn=<NllLossBackward0>)
tensor(3.8716, grad_fn=<NllLossBackward0>)
tensor(1.7328, grad_fn=<NllLossBackward0>)
tensor(3.9257, grad_fn=<NllLossBackward0>)
tensor(4.2827, grad_fn=<NllLossBackward0>)
tensor(2.5600, grad_fn=<NllLossBackward0>)
tensor(3.6347, grad_fn=<NllLossBackward0>)
tensor(2.7914, grad_fn=<NllLossBackward0>)
tensor(2.8246, grad_fn=<NllLossBackward0>)
tensor(2.9784, grad_fn=<NllLossBackward0>)
tensor(1.9877, grad_fn=<NllLossBackward0>)
tensor(2.8641, grad_fn=<NllLossBackward0>)
tensor(2.2667, grad_fn=<NllLossBackward0>)
tensor(3.4017, grad_fn=<NllLossBackward0>)
tensor(2.65

tensor(2.6327, grad_fn=<NllLossBackward0>)
tensor(3.9794, grad_fn=<NllLossBackward0>)
tensor(2.5769, grad_fn=<NllLossBackward0>)
tensor(2.6354, grad_fn=<NllLossBackward0>)
tensor(2.0461, grad_fn=<NllLossBackward0>)
tensor(2.4358, grad_fn=<NllLossBackward0>)
tensor(3.9432, grad_fn=<NllLossBackward0>)
tensor(1.7307, grad_fn=<NllLossBackward0>)
tensor(2.2824, grad_fn=<NllLossBackward0>)
tensor(2.5102, grad_fn=<NllLossBackward0>)
tensor(2.9951, grad_fn=<NllLossBackward0>)
tensor(2.3745, grad_fn=<NllLossBackward0>)
tensor(2.5206, grad_fn=<NllLossBackward0>)
tensor(3.4246, grad_fn=<NllLossBackward0>)
tensor(4.0805, grad_fn=<NllLossBackward0>)
tensor(3.6021, grad_fn=<NllLossBackward0>)
tensor(2.8024, grad_fn=<NllLossBackward0>)
tensor(2.1414, grad_fn=<NllLossBackward0>)
tensor(2.3921, grad_fn=<NllLossBackward0>)
tensor(2.8821, grad_fn=<NllLossBackward0>)
tensor(2.2945, grad_fn=<NllLossBackward0>)
tensor(3.2952, grad_fn=<NllLossBackward0>)
tensor(1.9770, grad_fn=<NllLossBackward0>)
tensor(4.17

tensor(2.3903, grad_fn=<NllLossBackward0>)
tensor(2.9356, grad_fn=<NllLossBackward0>)
tensor(2.3244, grad_fn=<NllLossBackward0>)
tensor(2.6189, grad_fn=<NllLossBackward0>)
tensor(2.4827, grad_fn=<NllLossBackward0>)
tensor(1.7418, grad_fn=<NllLossBackward0>)
tensor(3.2810, grad_fn=<NllLossBackward0>)
tensor(3.5029, grad_fn=<NllLossBackward0>)
tensor(2.3477, grad_fn=<NllLossBackward0>)
tensor(2.4338, grad_fn=<NllLossBackward0>)
tensor(2.4053, grad_fn=<NllLossBackward0>)
tensor(2.1572, grad_fn=<NllLossBackward0>)
tensor(3.2346, grad_fn=<NllLossBackward0>)
tensor(3.6094, grad_fn=<NllLossBackward0>)
tensor(4.1319, grad_fn=<NllLossBackward0>)
tensor(3.0683, grad_fn=<NllLossBackward0>)
tensor(2.3206, grad_fn=<NllLossBackward0>)
tensor(2.5075, grad_fn=<NllLossBackward0>)
tensor(4.1446, grad_fn=<NllLossBackward0>)
tensor(2.3419, grad_fn=<NllLossBackward0>)
tensor(2.9861, grad_fn=<NllLossBackward0>)
tensor(2.5003, grad_fn=<NllLossBackward0>)
tensor(2.6972, grad_fn=<NllLossBackward0>)
tensor(2.58

tensor(2.3927, grad_fn=<NllLossBackward0>)
tensor(2.9163, grad_fn=<NllLossBackward0>)
tensor(2.3065, grad_fn=<NllLossBackward0>)
tensor(3.1232, grad_fn=<NllLossBackward0>)
tensor(2.4667, grad_fn=<NllLossBackward0>)
tensor(2.4689, grad_fn=<NllLossBackward0>)
tensor(2.9056, grad_fn=<NllLossBackward0>)
tensor(3.4842, grad_fn=<NllLossBackward0>)
tensor(2.4282, grad_fn=<NllLossBackward0>)
tensor(2.1337, grad_fn=<NllLossBackward0>)
tensor(2.5815, grad_fn=<NllLossBackward0>)
tensor(2.3965, grad_fn=<NllLossBackward0>)
tensor(2.8213, grad_fn=<NllLossBackward0>)
tensor(2.7376, grad_fn=<NllLossBackward0>)
tensor(2.3639, grad_fn=<NllLossBackward0>)
tensor(1.7345, grad_fn=<NllLossBackward0>)
tensor(2.2279, grad_fn=<NllLossBackward0>)
tensor(2.3946, grad_fn=<NllLossBackward0>)
tensor(3.5017, grad_fn=<NllLossBackward0>)
tensor(2.3933, grad_fn=<NllLossBackward0>)
tensor(2.3166, grad_fn=<NllLossBackward0>)
tensor(2.3468, grad_fn=<NllLossBackward0>)
tensor(1.9403, grad_fn=<NllLossBackward0>)
tensor(3.62

tensor(3.2366, grad_fn=<NllLossBackward0>)
tensor(3.4832, grad_fn=<NllLossBackward0>)
tensor(3.0873, grad_fn=<NllLossBackward0>)
tensor(2.6300, grad_fn=<NllLossBackward0>)
tensor(2.5719, grad_fn=<NllLossBackward0>)
tensor(3.0734, grad_fn=<NllLossBackward0>)
tensor(2.9898, grad_fn=<NllLossBackward0>)
tensor(3.1657, grad_fn=<NllLossBackward0>)
tensor(2.8253, grad_fn=<NllLossBackward0>)
tensor(3.3712, grad_fn=<NllLossBackward0>)
tensor(2.9791, grad_fn=<NllLossBackward0>)
tensor(2.8698, grad_fn=<NllLossBackward0>)
tensor(2.3396, grad_fn=<NllLossBackward0>)
tensor(1.7453, grad_fn=<NllLossBackward0>)
tensor(2.3616, grad_fn=<NllLossBackward0>)
tensor(2.6069, grad_fn=<NllLossBackward0>)
tensor(2.2888, grad_fn=<NllLossBackward0>)
tensor(3.0711, grad_fn=<NllLossBackward0>)
tensor(2.9253, grad_fn=<NllLossBackward0>)
tensor(1.7388, grad_fn=<NllLossBackward0>)
tensor(3.8434, grad_fn=<NllLossBackward0>)
tensor(2.3276, grad_fn=<NllLossBackward0>)
tensor(2.4011, grad_fn=<NllLossBackward0>)
tensor(2.90

tensor(3.3893, grad_fn=<NllLossBackward0>)
tensor(3.3465, grad_fn=<NllLossBackward0>)
tensor(3.0471, grad_fn=<NllLossBackward0>)
tensor(1.9624, grad_fn=<NllLossBackward0>)
tensor(3.5189, grad_fn=<NllLossBackward0>)
tensor(3.5220, grad_fn=<NllLossBackward0>)
tensor(2.3786, grad_fn=<NllLossBackward0>)
tensor(2.9874, grad_fn=<NllLossBackward0>)
tensor(2.3197, grad_fn=<NllLossBackward0>)
tensor(2.8725, grad_fn=<NllLossBackward0>)
tensor(1.9773, grad_fn=<NllLossBackward0>)
tensor(2.9774, grad_fn=<NllLossBackward0>)
tensor(2.3274, grad_fn=<NllLossBackward0>)
tensor(2.8520, grad_fn=<NllLossBackward0>)
tensor(3.2452, grad_fn=<NllLossBackward0>)
tensor(4.9914, grad_fn=<NllLossBackward0>)
tensor(3.0541, grad_fn=<NllLossBackward0>)
tensor(3.4628, grad_fn=<NllLossBackward0>)
tensor(2.4313, grad_fn=<NllLossBackward0>)
tensor(2.0087, grad_fn=<NllLossBackward0>)
tensor(2.9582, grad_fn=<NllLossBackward0>)
tensor(4.3893, grad_fn=<NllLossBackward0>)
tensor(3.4806, grad_fn=<NllLossBackward0>)
tensor(3.62

tensor(2.8346, grad_fn=<NllLossBackward0>)
tensor(2.2545, grad_fn=<NllLossBackward0>)
tensor(2.3428, grad_fn=<NllLossBackward0>)
tensor(1.9875, grad_fn=<NllLossBackward0>)
tensor(3.3856, grad_fn=<NllLossBackward0>)
tensor(2.4265, grad_fn=<NllLossBackward0>)
tensor(2.3225, grad_fn=<NllLossBackward0>)
tensor(2.3543, grad_fn=<NllLossBackward0>)
tensor(2.4430, grad_fn=<NllLossBackward0>)
tensor(2.2967, grad_fn=<NllLossBackward0>)
tensor(2.5346, grad_fn=<NllLossBackward0>)
tensor(2.8869, grad_fn=<NllLossBackward0>)
tensor(2.5181, grad_fn=<NllLossBackward0>)
tensor(2.7362, grad_fn=<NllLossBackward0>)
tensor(3.1123, grad_fn=<NllLossBackward0>)
tensor(3.0240, grad_fn=<NllLossBackward0>)
tensor(2.6076, grad_fn=<NllLossBackward0>)
tensor(3.0300, grad_fn=<NllLossBackward0>)
tensor(4.5574, grad_fn=<NllLossBackward0>)
tensor(2.4162, grad_fn=<NllLossBackward0>)
tensor(4.4784, grad_fn=<NllLossBackward0>)
tensor(2.9936, grad_fn=<NllLossBackward0>)
tensor(2.7869, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(3.4025, grad_fn=<NllLossBackward0>)
tensor(3.2402, grad_fn=<NllLossBackward0>)
tensor(2.2028, grad_fn=<NllLossBackward0>)
tensor(3.0431, grad_fn=<NllLossBackward0>)
tensor(3.1592, grad_fn=<NllLossBackward0>)
tensor(5.5177, grad_fn=<NllLossBackward0>)
tensor(3.3046, grad_fn=<NllLossBackward0>)
tensor(2.1807, grad_fn=<NllLossBackward0>)
tensor(1.9563, grad_fn=<NllLossBackward0>)
tensor(2.0643, grad_fn=<NllLossBackward0>)
tensor(3.5061, grad_fn=<NllLossBackward0>)
tensor(2.0231, grad_fn=<NllLossBackward0>)
tensor(1.9991, grad_fn=<NllLossBackward0>)
tensor(2.4056, grad_fn=<NllLossBackward0>)
tensor(2.3121, grad_fn=<NllLossBackward0>)
tensor(2.5866, grad_fn=<NllLossBackward0>)
tensor(3.0550, grad_fn=<NllLossBackward0>)
tensor(1.9713, grad_fn=<NllLossBackward0>)
tensor(3.5011, grad_fn=<NllLossBackward0>)
tensor(2.7260, grad_fn=<NllLossBackward0>)
tensor(2.7670, grad_fn=<NllLossBackward0>)
tensor(2.8334, grad_fn=<NllLossBackward0>)
tensor(2.9859, grad_fn=<NllLossBackward0>)
tensor(2.99

tensor(2.3956, grad_fn=<NllLossBackward0>)
tensor(2.3374, grad_fn=<NllLossBackward0>)
tensor(2.7964, grad_fn=<NllLossBackward0>)
tensor(3.0288, grad_fn=<NllLossBackward0>)
tensor(2.7008, grad_fn=<NllLossBackward0>)
tensor(3.6157, grad_fn=<NllLossBackward0>)
tensor(3.4902, grad_fn=<NllLossBackward0>)
tensor(1.9583, grad_fn=<NllLossBackward0>)
tensor(3.6275, grad_fn=<NllLossBackward0>)
tensor(3.5719, grad_fn=<NllLossBackward0>)
tensor(3.6006, grad_fn=<NllLossBackward0>)
tensor(3.2087, grad_fn=<NllLossBackward0>)
tensor(3.4620, grad_fn=<NllLossBackward0>)
tensor(3.2367, grad_fn=<NllLossBackward0>)
tensor(2.4033, grad_fn=<NllLossBackward0>)
tensor(3.1674, grad_fn=<NllLossBackward0>)
tensor(2.8531, grad_fn=<NllLossBackward0>)
tensor(2.9124, grad_fn=<NllLossBackward0>)
tensor(2.9252, grad_fn=<NllLossBackward0>)
tensor(3.3048, grad_fn=<NllLossBackward0>)
tensor(1.7339, grad_fn=<NllLossBackward0>)
tensor(2.5353, grad_fn=<NllLossBackward0>)
tensor(2.9180, grad_fn=<NllLossBackward0>)
tensor(3.53

tensor(2.6143, grad_fn=<NllLossBackward0>)
tensor(2.7434, grad_fn=<NllLossBackward0>)
tensor(2.6274, grad_fn=<NllLossBackward0>)
tensor(3.6409, grad_fn=<NllLossBackward0>)
tensor(3.0402, grad_fn=<NllLossBackward0>)
tensor(2.0131, grad_fn=<NllLossBackward0>)
tensor(2.4357, grad_fn=<NllLossBackward0>)
tensor(3.3513, grad_fn=<NllLossBackward0>)
tensor(2.8519, grad_fn=<NllLossBackward0>)
tensor(3.0300, grad_fn=<NllLossBackward0>)
tensor(3.9403, grad_fn=<NllLossBackward0>)
tensor(2.9129, grad_fn=<NllLossBackward0>)
tensor(2.3240, grad_fn=<NllLossBackward0>)
tensor(2.8948, grad_fn=<NllLossBackward0>)
tensor(1.9840, grad_fn=<NllLossBackward0>)
tensor(2.8950, grad_fn=<NllLossBackward0>)
tensor(2.2890, grad_fn=<NllLossBackward0>)
tensor(4.8166, grad_fn=<NllLossBackward0>)
tensor(2.3827, grad_fn=<NllLossBackward0>)
tensor(2.6897, grad_fn=<NllLossBackward0>)
tensor(2.8070, grad_fn=<NllLossBackward0>)
tensor(3.4028, grad_fn=<NllLossBackward0>)
tensor(3.0754, grad_fn=<NllLossBackward0>)
tensor(2.28

tensor(2.8767, grad_fn=<NllLossBackward0>)
tensor(2.6731, grad_fn=<NllLossBackward0>)
tensor(2.9046, grad_fn=<NllLossBackward0>)
tensor(2.3568, grad_fn=<NllLossBackward0>)
tensor(2.4659, grad_fn=<NllLossBackward0>)
tensor(2.5025, grad_fn=<NllLossBackward0>)
tensor(2.7971, grad_fn=<NllLossBackward0>)
tensor(2.6688, grad_fn=<NllLossBackward0>)
tensor(2.5086, grad_fn=<NllLossBackward0>)
tensor(3.0127, grad_fn=<NllLossBackward0>)
tensor(2.0136, grad_fn=<NllLossBackward0>)
tensor(2.9163, grad_fn=<NllLossBackward0>)
tensor(2.5480, grad_fn=<NllLossBackward0>)
tensor(2.9860, grad_fn=<NllLossBackward0>)
tensor(1.7344, grad_fn=<NllLossBackward0>)
tensor(4.3353, grad_fn=<NllLossBackward0>)
tensor(3.3346, grad_fn=<NllLossBackward0>)
tensor(2.7548, grad_fn=<NllLossBackward0>)
tensor(1.9953, grad_fn=<NllLossBackward0>)
tensor(3.0715, grad_fn=<NllLossBackward0>)
tensor(1.9977, grad_fn=<NllLossBackward0>)
tensor(2.4838, grad_fn=<NllLossBackward0>)
tensor(3.1549, grad_fn=<NllLossBackward0>)
tensor(3.43

tensor(2.3153, grad_fn=<NllLossBackward0>)
tensor(2.3580, grad_fn=<NllLossBackward0>)
tensor(2.5332, grad_fn=<NllLossBackward0>)
tensor(2.9739, grad_fn=<NllLossBackward0>)
tensor(3.6359, grad_fn=<NllLossBackward0>)
tensor(2.2741, grad_fn=<NllLossBackward0>)
tensor(3.3149, grad_fn=<NllLossBackward0>)
tensor(3.9793, grad_fn=<NllLossBackward0>)
tensor(2.5229, grad_fn=<NllLossBackward0>)
tensor(1.9778, grad_fn=<NllLossBackward0>)
tensor(2.2960, grad_fn=<NllLossBackward0>)
tensor(3.1201, grad_fn=<NllLossBackward0>)
tensor(3.4492, grad_fn=<NllLossBackward0>)
tensor(2.9777, grad_fn=<NllLossBackward0>)
tensor(2.9383, grad_fn=<NllLossBackward0>)
tensor(2.3811, grad_fn=<NllLossBackward0>)
tensor(2.9793, grad_fn=<NllLossBackward0>)
tensor(4.1535, grad_fn=<NllLossBackward0>)
tensor(2.8121, grad_fn=<NllLossBackward0>)
tensor(2.9848, grad_fn=<NllLossBackward0>)
tensor(3.2678, grad_fn=<NllLossBackward0>)
tensor(2.3420, grad_fn=<NllLossBackward0>)
tensor(2.5702, grad_fn=<NllLossBackward0>)
tensor(1.96

tensor(4.3271, grad_fn=<NllLossBackward0>)
tensor(1.9329, grad_fn=<NllLossBackward0>)
tensor(3.6242, grad_fn=<NllLossBackward0>)
tensor(2.3738, grad_fn=<NllLossBackward0>)
tensor(2.2726, grad_fn=<NllLossBackward0>)
tensor(1.7201, grad_fn=<NllLossBackward0>)
tensor(2.8226, grad_fn=<NllLossBackward0>)
tensor(2.5509, grad_fn=<NllLossBackward0>)
tensor(2.9745, grad_fn=<NllLossBackward0>)
tensor(2.3187, grad_fn=<NllLossBackward0>)
tensor(2.4001, grad_fn=<NllLossBackward0>)
tensor(3.1885, grad_fn=<NllLossBackward0>)
tensor(3.0508, grad_fn=<NllLossBackward0>)
tensor(4.1285, grad_fn=<NllLossBackward0>)
tensor(2.5357, grad_fn=<NllLossBackward0>)
tensor(3.4268, grad_fn=<NllLossBackward0>)
tensor(2.6721, grad_fn=<NllLossBackward0>)
tensor(3.0890, grad_fn=<NllLossBackward0>)
tensor(2.9027, grad_fn=<NllLossBackward0>)
tensor(4.7385, grad_fn=<NllLossBackward0>)
tensor(3.0041, grad_fn=<NllLossBackward0>)
tensor(3.0436, grad_fn=<NllLossBackward0>)
tensor(2.3774, grad_fn=<NllLossBackward0>)
tensor(2.02

tensor(2.9198, grad_fn=<NllLossBackward0>)
tensor(3.0200, grad_fn=<NllLossBackward0>)
tensor(3.2070, grad_fn=<NllLossBackward0>)
tensor(3.0503, grad_fn=<NllLossBackward0>)
tensor(2.5851, grad_fn=<NllLossBackward0>)
tensor(2.4653, grad_fn=<NllLossBackward0>)
tensor(2.9857, grad_fn=<NllLossBackward0>)
tensor(2.8121, grad_fn=<NllLossBackward0>)
tensor(2.4541, grad_fn=<NllLossBackward0>)
tensor(2.3271, grad_fn=<NllLossBackward0>)
tensor(1.7611, grad_fn=<NllLossBackward0>)
tensor(3.5395, grad_fn=<NllLossBackward0>)
tensor(4.1093, grad_fn=<NllLossBackward0>)
tensor(2.5969, grad_fn=<NllLossBackward0>)
tensor(2.3483, grad_fn=<NllLossBackward0>)
tensor(2.5910, grad_fn=<NllLossBackward0>)
tensor(2.8108, grad_fn=<NllLossBackward0>)
tensor(1.7618, grad_fn=<NllLossBackward0>)
tensor(1.7485, grad_fn=<NllLossBackward0>)
tensor(2.7962, grad_fn=<NllLossBackward0>)
tensor(2.0874, grad_fn=<NllLossBackward0>)
tensor(2.6924, grad_fn=<NllLossBackward0>)
tensor(2.6770, grad_fn=<NllLossBackward0>)
tensor(2.79

tensor(3.0106, grad_fn=<NllLossBackward0>)
tensor(1.7602, grad_fn=<NllLossBackward0>)
tensor(2.7713, grad_fn=<NllLossBackward0>)
tensor(1.9593, grad_fn=<NllLossBackward0>)
tensor(2.0455, grad_fn=<NllLossBackward0>)
tensor(2.3093, grad_fn=<NllLossBackward0>)
tensor(2.9091, grad_fn=<NllLossBackward0>)
tensor(3.2471, grad_fn=<NllLossBackward0>)
tensor(3.6838, grad_fn=<NllLossBackward0>)
tensor(3.3214, grad_fn=<NllLossBackward0>)
tensor(2.2596, grad_fn=<NllLossBackward0>)
tensor(2.8566, grad_fn=<NllLossBackward0>)
tensor(2.8046, grad_fn=<NllLossBackward0>)
tensor(2.9360, grad_fn=<NllLossBackward0>)
tensor(4.1565, grad_fn=<NllLossBackward0>)
tensor(3.1381, grad_fn=<NllLossBackward0>)
tensor(3.7453, grad_fn=<NllLossBackward0>)
tensor(2.6461, grad_fn=<NllLossBackward0>)
tensor(2.9648, grad_fn=<NllLossBackward0>)
tensor(2.6537, grad_fn=<NllLossBackward0>)
tensor(2.6273, grad_fn=<NllLossBackward0>)
tensor(2.3161, grad_fn=<NllLossBackward0>)
tensor(2.5958, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(4.4108, grad_fn=<NllLossBackward0>)
tensor(2.9954, grad_fn=<NllLossBackward0>)
tensor(3.9110, grad_fn=<NllLossBackward0>)
tensor(3.0582, grad_fn=<NllLossBackward0>)
tensor(2.2793, grad_fn=<NllLossBackward0>)
tensor(2.5402, grad_fn=<NllLossBackward0>)
tensor(3.0316, grad_fn=<NllLossBackward0>)
tensor(3.6785, grad_fn=<NllLossBackward0>)
tensor(2.4919, grad_fn=<NllLossBackward0>)
tensor(3.6113, grad_fn=<NllLossBackward0>)
tensor(3.9718, grad_fn=<NllLossBackward0>)
tensor(2.9079, grad_fn=<NllLossBackward0>)
tensor(2.6184, grad_fn=<NllLossBackward0>)
tensor(2.3683, grad_fn=<NllLossBackward0>)
tensor(3.0223, grad_fn=<NllLossBackward0>)
tensor(2.5756, grad_fn=<NllLossBackward0>)
tensor(4.5655, grad_fn=<NllLossBackward0>)
tensor(2.8714, grad_fn=<NllLossBackward0>)
tensor(2.3734, grad_fn=<NllLossBackward0>)
tensor(2.7884, grad_fn=<NllLossBackward0>)
tensor(1.8050, grad_fn=<NllLossBackward0>)
tensor(2.3500, grad_fn=<NllLossBackward0>)
tensor(2.3844, grad_fn=<NllLossBackward0>)
tensor(1.98

tensor(3.9713, grad_fn=<NllLossBackward0>)
tensor(3.3241, grad_fn=<NllLossBackward0>)
tensor(3.5818, grad_fn=<NllLossBackward0>)
tensor(2.7105, grad_fn=<NllLossBackward0>)
tensor(3.8148, grad_fn=<NllLossBackward0>)
tensor(1.9994, grad_fn=<NllLossBackward0>)
tensor(2.5848, grad_fn=<NllLossBackward0>)
tensor(2.9082, grad_fn=<NllLossBackward0>)
tensor(2.0797, grad_fn=<NllLossBackward0>)
tensor(2.8378, grad_fn=<NllLossBackward0>)
tensor(2.2948, grad_fn=<NllLossBackward0>)
tensor(2.6686, grad_fn=<NllLossBackward0>)
tensor(2.5173, grad_fn=<NllLossBackward0>)
tensor(3.2257, grad_fn=<NllLossBackward0>)
tensor(3.9101, grad_fn=<NllLossBackward0>)
tensor(2.9348, grad_fn=<NllLossBackward0>)
tensor(2.9326, grad_fn=<NllLossBackward0>)
tensor(1.7710, grad_fn=<NllLossBackward0>)
tensor(3.1057, grad_fn=<NllLossBackward0>)
tensor(2.0547, grad_fn=<NllLossBackward0>)
tensor(2.9669, grad_fn=<NllLossBackward0>)
tensor(3.3324, grad_fn=<NllLossBackward0>)
tensor(2.5119, grad_fn=<NllLossBackward0>)
tensor(4.11

tensor(2.4815, grad_fn=<NllLossBackward0>)
tensor(3.3419, grad_fn=<NllLossBackward0>)
tensor(2.4250, grad_fn=<NllLossBackward0>)
tensor(1.9846, grad_fn=<NllLossBackward0>)
tensor(2.3148, grad_fn=<NllLossBackward0>)
tensor(2.3988, grad_fn=<NllLossBackward0>)
tensor(2.6649, grad_fn=<NllLossBackward0>)
tensor(2.2912, grad_fn=<NllLossBackward0>)
tensor(2.2586, grad_fn=<NllLossBackward0>)
tensor(3.0215, grad_fn=<NllLossBackward0>)
tensor(2.3928, grad_fn=<NllLossBackward0>)
tensor(3.1935, grad_fn=<NllLossBackward0>)
tensor(2.6687, grad_fn=<NllLossBackward0>)
tensor(2.5833, grad_fn=<NllLossBackward0>)
tensor(2.6879, grad_fn=<NllLossBackward0>)
tensor(2.4783, grad_fn=<NllLossBackward0>)
tensor(3.0202, grad_fn=<NllLossBackward0>)
tensor(3.7784, grad_fn=<NllLossBackward0>)
tensor(3.5106, grad_fn=<NllLossBackward0>)
tensor(2.0523, grad_fn=<NllLossBackward0>)
tensor(2.3367, grad_fn=<NllLossBackward0>)
tensor(3.7917, grad_fn=<NllLossBackward0>)
tensor(2.3784, grad_fn=<NllLossBackward0>)
tensor(2.82

tensor(3.0391, grad_fn=<NllLossBackward0>)
tensor(2.9219, grad_fn=<NllLossBackward0>)
tensor(3.3954, grad_fn=<NllLossBackward0>)
tensor(2.5817, grad_fn=<NllLossBackward0>)
tensor(2.7776, grad_fn=<NllLossBackward0>)
tensor(3.4235, grad_fn=<NllLossBackward0>)
tensor(2.9111, grad_fn=<NllLossBackward0>)
tensor(2.7351, grad_fn=<NllLossBackward0>)
tensor(1.9844, grad_fn=<NllLossBackward0>)
tensor(3.3815, grad_fn=<NllLossBackward0>)
tensor(3.9911, grad_fn=<NllLossBackward0>)
tensor(2.0404, grad_fn=<NllLossBackward0>)
tensor(2.3987, grad_fn=<NllLossBackward0>)
tensor(3.9035, grad_fn=<NllLossBackward0>)
tensor(2.2092, grad_fn=<NllLossBackward0>)
tensor(2.4821, grad_fn=<NllLossBackward0>)
tensor(3.1731, grad_fn=<NllLossBackward0>)
tensor(4.6134, grad_fn=<NllLossBackward0>)
tensor(2.3419, grad_fn=<NllLossBackward0>)
tensor(2.9488, grad_fn=<NllLossBackward0>)
tensor(4.0866, grad_fn=<NllLossBackward0>)
tensor(3.3638, grad_fn=<NllLossBackward0>)
tensor(1.9845, grad_fn=<NllLossBackward0>)
tensor(4.43

tensor(1.9419, grad_fn=<NllLossBackward0>)
tensor(3.5292, grad_fn=<NllLossBackward0>)
tensor(2.7317, grad_fn=<NllLossBackward0>)
tensor(3.3521, grad_fn=<NllLossBackward0>)
tensor(2.3643, grad_fn=<NllLossBackward0>)
tensor(3.4112, grad_fn=<NllLossBackward0>)
tensor(3.2251, grad_fn=<NllLossBackward0>)
tensor(2.4622, grad_fn=<NllLossBackward0>)
tensor(4.5388, grad_fn=<NllLossBackward0>)
tensor(2.6376, grad_fn=<NllLossBackward0>)
tensor(2.5657, grad_fn=<NllLossBackward0>)
tensor(2.0978, grad_fn=<NllLossBackward0>)
tensor(3.7282, grad_fn=<NllLossBackward0>)
tensor(3.5128, grad_fn=<NllLossBackward0>)
tensor(3.6458, grad_fn=<NllLossBackward0>)
tensor(4.3127, grad_fn=<NllLossBackward0>)
tensor(2.9233, grad_fn=<NllLossBackward0>)
tensor(2.5841, grad_fn=<NllLossBackward0>)
tensor(3.2643, grad_fn=<NllLossBackward0>)
tensor(2.9977, grad_fn=<NllLossBackward0>)
tensor(3.2372, grad_fn=<NllLossBackward0>)
tensor(3.9869, grad_fn=<NllLossBackward0>)
tensor(2.5552, grad_fn=<NllLossBackward0>)
tensor(4.30

tensor(3.0364, grad_fn=<NllLossBackward0>)
tensor(1.9339, grad_fn=<NllLossBackward0>)
tensor(3.0314, grad_fn=<NllLossBackward0>)
tensor(2.5945, grad_fn=<NllLossBackward0>)
tensor(3.3389, grad_fn=<NllLossBackward0>)
tensor(3.5906, grad_fn=<NllLossBackward0>)
tensor(2.9260, grad_fn=<NllLossBackward0>)
tensor(3.0381, grad_fn=<NllLossBackward0>)
tensor(2.2622, grad_fn=<NllLossBackward0>)
tensor(2.3973, grad_fn=<NllLossBackward0>)
tensor(2.1874, grad_fn=<NllLossBackward0>)
tensor(3.1050, grad_fn=<NllLossBackward0>)
tensor(3.6367, grad_fn=<NllLossBackward0>)
tensor(2.8995, grad_fn=<NllLossBackward0>)
tensor(2.5191, grad_fn=<NllLossBackward0>)
tensor(1.9581, grad_fn=<NllLossBackward0>)
tensor(1.9864, grad_fn=<NllLossBackward0>)
tensor(3.6424, grad_fn=<NllLossBackward0>)
tensor(2.3034, grad_fn=<NllLossBackward0>)
tensor(3.9523, grad_fn=<NllLossBackward0>)
tensor(3.6657, grad_fn=<NllLossBackward0>)
tensor(3.9900, grad_fn=<NllLossBackward0>)
tensor(2.0959, grad_fn=<NllLossBackward0>)
tensor(4.01

tensor(2.9997, grad_fn=<NllLossBackward0>)
tensor(2.2955, grad_fn=<NllLossBackward0>)
tensor(3.2506, grad_fn=<NllLossBackward0>)
tensor(3.2015, grad_fn=<NllLossBackward0>)
tensor(2.8436, grad_fn=<NllLossBackward0>)
tensor(2.2616, grad_fn=<NllLossBackward0>)
tensor(1.7111, grad_fn=<NllLossBackward0>)
tensor(3.1992, grad_fn=<NllLossBackward0>)
tensor(2.0492, grad_fn=<NllLossBackward0>)
tensor(2.6087, grad_fn=<NllLossBackward0>)
tensor(2.2442, grad_fn=<NllLossBackward0>)
tensor(3.0596, grad_fn=<NllLossBackward0>)
tensor(2.5469, grad_fn=<NllLossBackward0>)
tensor(3.1204, grad_fn=<NllLossBackward0>)
tensor(2.8310, grad_fn=<NllLossBackward0>)
tensor(2.5538, grad_fn=<NllLossBackward0>)
tensor(2.4049, grad_fn=<NllLossBackward0>)
tensor(2.3497, grad_fn=<NllLossBackward0>)
tensor(2.4101, grad_fn=<NllLossBackward0>)
tensor(2.1604, grad_fn=<NllLossBackward0>)
tensor(3.9569, grad_fn=<NllLossBackward0>)
tensor(4.0691, grad_fn=<NllLossBackward0>)
tensor(2.8014, grad_fn=<NllLossBackward0>)
tensor(4.17

tensor(2.8949, grad_fn=<NllLossBackward0>)
tensor(1.9817, grad_fn=<NllLossBackward0>)
tensor(3.4339, grad_fn=<NllLossBackward0>)
tensor(1.9489, grad_fn=<NllLossBackward0>)
tensor(4.5666, grad_fn=<NllLossBackward0>)
tensor(1.9816, grad_fn=<NllLossBackward0>)
tensor(3.5552, grad_fn=<NllLossBackward0>)
tensor(3.1215, grad_fn=<NllLossBackward0>)
tensor(4.6216, grad_fn=<NllLossBackward0>)
tensor(2.9398, grad_fn=<NllLossBackward0>)
tensor(2.3462, grad_fn=<NllLossBackward0>)
tensor(3.6670, grad_fn=<NllLossBackward0>)
tensor(1.9065, grad_fn=<NllLossBackward0>)
tensor(1.7369, grad_fn=<NllLossBackward0>)
tensor(2.5068, grad_fn=<NllLossBackward0>)
tensor(2.7090, grad_fn=<NllLossBackward0>)
tensor(2.3725, grad_fn=<NllLossBackward0>)
tensor(2.5083, grad_fn=<NllLossBackward0>)
tensor(2.8007, grad_fn=<NllLossBackward0>)
tensor(2.4984, grad_fn=<NllLossBackward0>)
tensor(2.3511, grad_fn=<NllLossBackward0>)
tensor(1.7498, grad_fn=<NllLossBackward0>)
tensor(2.4665, grad_fn=<NllLossBackward0>)
tensor(2.53

tensor(2.9024, grad_fn=<NllLossBackward0>)
tensor(2.4594, grad_fn=<NllLossBackward0>)
tensor(4.9684, grad_fn=<NllLossBackward0>)
tensor(2.1819, grad_fn=<NllLossBackward0>)
tensor(2.8289, grad_fn=<NllLossBackward0>)
tensor(3.0042, grad_fn=<NllLossBackward0>)
tensor(2.4733, grad_fn=<NllLossBackward0>)
tensor(2.6397, grad_fn=<NllLossBackward0>)
tensor(2.2761, grad_fn=<NllLossBackward0>)
tensor(2.4335, grad_fn=<NllLossBackward0>)
tensor(3.2429, grad_fn=<NllLossBackward0>)
tensor(2.3753, grad_fn=<NllLossBackward0>)
tensor(3.3614, grad_fn=<NllLossBackward0>)
tensor(2.4855, grad_fn=<NllLossBackward0>)
tensor(2.1264, grad_fn=<NllLossBackward0>)
tensor(3.0235, grad_fn=<NllLossBackward0>)
tensor(4.0554, grad_fn=<NllLossBackward0>)
tensor(2.4959, grad_fn=<NllLossBackward0>)
tensor(4.1602, grad_fn=<NllLossBackward0>)
tensor(2.3991, grad_fn=<NllLossBackward0>)
tensor(2.9454, grad_fn=<NllLossBackward0>)
tensor(2.2329, grad_fn=<NllLossBackward0>)
tensor(2.4287, grad_fn=<NllLossBackward0>)
tensor(2.55

tensor(3.2830, grad_fn=<NllLossBackward0>)
tensor(3.4055, grad_fn=<NllLossBackward0>)
tensor(3.0796, grad_fn=<NllLossBackward0>)
tensor(3.6431, grad_fn=<NllLossBackward0>)
tensor(1.9539, grad_fn=<NllLossBackward0>)
tensor(2.2537, grad_fn=<NllLossBackward0>)
tensor(2.5079, grad_fn=<NllLossBackward0>)
tensor(2.8199, grad_fn=<NllLossBackward0>)
tensor(3.1580, grad_fn=<NllLossBackward0>)
tensor(3.4922, grad_fn=<NllLossBackward0>)
tensor(3.1700, grad_fn=<NllLossBackward0>)
tensor(2.1031, grad_fn=<NllLossBackward0>)
tensor(2.6513, grad_fn=<NllLossBackward0>)
tensor(3.1343, grad_fn=<NllLossBackward0>)
tensor(2.5590, grad_fn=<NllLossBackward0>)
tensor(3.1101, grad_fn=<NllLossBackward0>)
tensor(2.6072, grad_fn=<NllLossBackward0>)
tensor(2.4452, grad_fn=<NllLossBackward0>)
tensor(2.8197, grad_fn=<NllLossBackward0>)
tensor(2.5171, grad_fn=<NllLossBackward0>)
tensor(3.2255, grad_fn=<NllLossBackward0>)
tensor(3.9243, grad_fn=<NllLossBackward0>)
tensor(1.7276, grad_fn=<NllLossBackward0>)
tensor(2.41

tensor(2.5389, grad_fn=<NllLossBackward0>)
tensor(2.8685, grad_fn=<NllLossBackward0>)
tensor(2.4332, grad_fn=<NllLossBackward0>)
tensor(1.9677, grad_fn=<NllLossBackward0>)
tensor(3.1322, grad_fn=<NllLossBackward0>)
tensor(2.9778, grad_fn=<NllLossBackward0>)
tensor(2.6465, grad_fn=<NllLossBackward0>)
tensor(2.9594, grad_fn=<NllLossBackward0>)
tensor(3.5622, grad_fn=<NllLossBackward0>)
tensor(2.6908, grad_fn=<NllLossBackward0>)
tensor(3.0476, grad_fn=<NllLossBackward0>)
tensor(2.5137, grad_fn=<NllLossBackward0>)
tensor(2.9566, grad_fn=<NllLossBackward0>)
tensor(3.3536, grad_fn=<NllLossBackward0>)
tensor(1.9808, grad_fn=<NllLossBackward0>)
tensor(2.0016, grad_fn=<NllLossBackward0>)
tensor(3.4893, grad_fn=<NllLossBackward0>)
tensor(3.6532, grad_fn=<NllLossBackward0>)
tensor(3.5182, grad_fn=<NllLossBackward0>)
tensor(2.6161, grad_fn=<NllLossBackward0>)
tensor(3.0545, grad_fn=<NllLossBackward0>)
tensor(2.5663, grad_fn=<NllLossBackward0>)
tensor(4.4586, grad_fn=<NllLossBackward0>)
tensor(2.35

tensor(3.8764, grad_fn=<NllLossBackward0>)
tensor(2.1983, grad_fn=<NllLossBackward0>)
tensor(4.3478, grad_fn=<NllLossBackward0>)
tensor(2.5521, grad_fn=<NllLossBackward0>)
tensor(3.5933, grad_fn=<NllLossBackward0>)
tensor(3.5173, grad_fn=<NllLossBackward0>)
tensor(1.7195, grad_fn=<NllLossBackward0>)
tensor(2.8447, grad_fn=<NllLossBackward0>)
tensor(2.5755, grad_fn=<NllLossBackward0>)
tensor(2.8451, grad_fn=<NllLossBackward0>)
tensor(2.6053, grad_fn=<NllLossBackward0>)
tensor(2.4666, grad_fn=<NllLossBackward0>)
tensor(3.6027, grad_fn=<NllLossBackward0>)
tensor(3.5250, grad_fn=<NllLossBackward0>)
tensor(2.6796, grad_fn=<NllLossBackward0>)
tensor(2.6483, grad_fn=<NllLossBackward0>)
tensor(3.1492, grad_fn=<NllLossBackward0>)
tensor(2.1904, grad_fn=<NllLossBackward0>)
tensor(4.2964, grad_fn=<NllLossBackward0>)
tensor(3.0464, grad_fn=<NllLossBackward0>)
tensor(3.6700, grad_fn=<NllLossBackward0>)
tensor(2.5831, grad_fn=<NllLossBackward0>)
tensor(3.2123, grad_fn=<NllLossBackward0>)
tensor(2.49

tensor(2.8142, grad_fn=<NllLossBackward0>)
tensor(3.6674, grad_fn=<NllLossBackward0>)
tensor(2.9208, grad_fn=<NllLossBackward0>)
tensor(2.3526, grad_fn=<NllLossBackward0>)
tensor(3.4756, grad_fn=<NllLossBackward0>)
tensor(2.4799, grad_fn=<NllLossBackward0>)
tensor(3.1288, grad_fn=<NllLossBackward0>)
tensor(2.8572, grad_fn=<NllLossBackward0>)
tensor(3.1126, grad_fn=<NllLossBackward0>)
tensor(2.8539, grad_fn=<NllLossBackward0>)
tensor(3.0298, grad_fn=<NllLossBackward0>)
tensor(3.6106, grad_fn=<NllLossBackward0>)
tensor(3.8922, grad_fn=<NllLossBackward0>)
tensor(3.0472, grad_fn=<NllLossBackward0>)
tensor(2.3211, grad_fn=<NllLossBackward0>)
tensor(3.0495, grad_fn=<NllLossBackward0>)
tensor(2.6865, grad_fn=<NllLossBackward0>)
tensor(2.2973, grad_fn=<NllLossBackward0>)
tensor(2.8905, grad_fn=<NllLossBackward0>)
tensor(3.4294, grad_fn=<NllLossBackward0>)
tensor(2.3862, grad_fn=<NllLossBackward0>)
tensor(2.9509, grad_fn=<NllLossBackward0>)
tensor(2.0378, grad_fn=<NllLossBackward0>)
tensor(3.47

tensor(3.5360, grad_fn=<NllLossBackward0>)
tensor(2.0051, grad_fn=<NllLossBackward0>)
tensor(2.8692, grad_fn=<NllLossBackward0>)
tensor(3.0368, grad_fn=<NllLossBackward0>)
tensor(3.3401, grad_fn=<NllLossBackward0>)
tensor(1.9573, grad_fn=<NllLossBackward0>)
tensor(2.2852, grad_fn=<NllLossBackward0>)
tensor(3.5111, grad_fn=<NllLossBackward0>)
tensor(2.2572, grad_fn=<NllLossBackward0>)
tensor(2.7153, grad_fn=<NllLossBackward0>)
tensor(2.8249, grad_fn=<NllLossBackward0>)
tensor(3.5698, grad_fn=<NllLossBackward0>)
tensor(2.6407, grad_fn=<NllLossBackward0>)
tensor(2.4404, grad_fn=<NllLossBackward0>)
tensor(1.7334, grad_fn=<NllLossBackward0>)
tensor(3.0975, grad_fn=<NllLossBackward0>)
tensor(2.5063, grad_fn=<NllLossBackward0>)
tensor(3.6671, grad_fn=<NllLossBackward0>)
tensor(2.2177, grad_fn=<NllLossBackward0>)
tensor(3.3728, grad_fn=<NllLossBackward0>)
tensor(4.0855, grad_fn=<NllLossBackward0>)
tensor(2.3464, grad_fn=<NllLossBackward0>)
tensor(3.6237, grad_fn=<NllLossBackward0>)
tensor(2.15

tensor(3.5525, grad_fn=<NllLossBackward0>)
tensor(2.2729, grad_fn=<NllLossBackward0>)
tensor(3.3903, grad_fn=<NllLossBackward0>)
tensor(2.4405, grad_fn=<NllLossBackward0>)
tensor(3.1459, grad_fn=<NllLossBackward0>)
tensor(2.5696, grad_fn=<NllLossBackward0>)
tensor(2.8158, grad_fn=<NllLossBackward0>)
tensor(2.4874, grad_fn=<NllLossBackward0>)
tensor(4.0561, grad_fn=<NllLossBackward0>)
tensor(1.7143, grad_fn=<NllLossBackward0>)
tensor(2.6380, grad_fn=<NllLossBackward0>)
tensor(2.5865, grad_fn=<NllLossBackward0>)
tensor(3.5771, grad_fn=<NllLossBackward0>)
tensor(2.2697, grad_fn=<NllLossBackward0>)
tensor(2.3055, grad_fn=<NllLossBackward0>)
tensor(3.3550, grad_fn=<NllLossBackward0>)
tensor(2.3064, grad_fn=<NllLossBackward0>)
tensor(3.0091, grad_fn=<NllLossBackward0>)
tensor(2.5764, grad_fn=<NllLossBackward0>)
tensor(3.3768, grad_fn=<NllLossBackward0>)
tensor(3.8117, grad_fn=<NllLossBackward0>)
tensor(3.5069, grad_fn=<NllLossBackward0>)
tensor(3.5456, grad_fn=<NllLossBackward0>)
tensor(4.80

tensor(2.9052, grad_fn=<NllLossBackward0>)
tensor(3.6797, grad_fn=<NllLossBackward0>)
tensor(2.2669, grad_fn=<NllLossBackward0>)
tensor(2.3347, grad_fn=<NllLossBackward0>)
tensor(1.7398, grad_fn=<NllLossBackward0>)
tensor(2.3117, grad_fn=<NllLossBackward0>)
tensor(3.0092, grad_fn=<NllLossBackward0>)
tensor(2.6652, grad_fn=<NllLossBackward0>)
tensor(2.3542, grad_fn=<NllLossBackward0>)
tensor(2.9977, grad_fn=<NllLossBackward0>)
tensor(2.9189, grad_fn=<NllLossBackward0>)
tensor(3.1394, grad_fn=<NllLossBackward0>)
tensor(2.2775, grad_fn=<NllLossBackward0>)
tensor(2.5682, grad_fn=<NllLossBackward0>)
tensor(2.2949, grad_fn=<NllLossBackward0>)
tensor(2.9412, grad_fn=<NllLossBackward0>)
tensor(1.9720, grad_fn=<NllLossBackward0>)
tensor(3.8843, grad_fn=<NllLossBackward0>)
tensor(2.9279, grad_fn=<NllLossBackward0>)
tensor(3.3129, grad_fn=<NllLossBackward0>)
tensor(2.7390, grad_fn=<NllLossBackward0>)
tensor(3.2436, grad_fn=<NllLossBackward0>)
tensor(2.6250, grad_fn=<NllLossBackward0>)
tensor(2.50

tensor(1.9451, grad_fn=<NllLossBackward0>)
tensor(2.5587, grad_fn=<NllLossBackward0>)
tensor(2.7540, grad_fn=<NllLossBackward0>)
tensor(2.3530, grad_fn=<NllLossBackward0>)
tensor(2.9842, grad_fn=<NllLossBackward0>)
tensor(1.7445, grad_fn=<NllLossBackward0>)
tensor(2.9085, grad_fn=<NllLossBackward0>)
tensor(2.0916, grad_fn=<NllLossBackward0>)
tensor(3.6168, grad_fn=<NllLossBackward0>)
tensor(3.5307, grad_fn=<NllLossBackward0>)
tensor(2.7283, grad_fn=<NllLossBackward0>)
tensor(2.8587, grad_fn=<NllLossBackward0>)
tensor(3.0454, grad_fn=<NllLossBackward0>)
tensor(2.1192, grad_fn=<NllLossBackward0>)
tensor(2.2802, grad_fn=<NllLossBackward0>)
tensor(2.5885, grad_fn=<NllLossBackward0>)
tensor(3.3479, grad_fn=<NllLossBackward0>)
tensor(2.8167, grad_fn=<NllLossBackward0>)
tensor(3.3493, grad_fn=<NllLossBackward0>)
tensor(2.2798, grad_fn=<NllLossBackward0>)
tensor(1.9428, grad_fn=<NllLossBackward0>)
tensor(2.2101, grad_fn=<NllLossBackward0>)
tensor(4.1051, grad_fn=<NllLossBackward0>)
tensor(4.52

tensor(2.9257, grad_fn=<NllLossBackward0>)
tensor(1.9808, grad_fn=<NllLossBackward0>)
tensor(2.6005, grad_fn=<NllLossBackward0>)
tensor(2.4269, grad_fn=<NllLossBackward0>)
tensor(1.9725, grad_fn=<NllLossBackward0>)
tensor(2.4443, grad_fn=<NllLossBackward0>)
tensor(2.8604, grad_fn=<NllLossBackward0>)
tensor(2.7335, grad_fn=<NllLossBackward0>)
tensor(3.0706, grad_fn=<NllLossBackward0>)
tensor(2.9509, grad_fn=<NllLossBackward0>)
tensor(2.6301, grad_fn=<NllLossBackward0>)
tensor(2.2936, grad_fn=<NllLossBackward0>)
tensor(1.7407, grad_fn=<NllLossBackward0>)
tensor(1.9919, grad_fn=<NllLossBackward0>)
tensor(2.5458, grad_fn=<NllLossBackward0>)
tensor(2.2309, grad_fn=<NllLossBackward0>)
tensor(2.6127, grad_fn=<NllLossBackward0>)
tensor(3.6382, grad_fn=<NllLossBackward0>)
tensor(2.5784, grad_fn=<NllLossBackward0>)
tensor(2.3697, grad_fn=<NllLossBackward0>)
tensor(2.5132, grad_fn=<NllLossBackward0>)
tensor(2.3801, grad_fn=<NllLossBackward0>)
tensor(2.7525, grad_fn=<NllLossBackward0>)
tensor(3.02

tensor(3.2284, grad_fn=<NllLossBackward0>)
tensor(2.5743, grad_fn=<NllLossBackward0>)
tensor(2.6206, grad_fn=<NllLossBackward0>)
tensor(2.4197, grad_fn=<NllLossBackward0>)
tensor(4.6184, grad_fn=<NllLossBackward0>)
tensor(1.7253, grad_fn=<NllLossBackward0>)
tensor(2.5454, grad_fn=<NllLossBackward0>)
tensor(3.0300, grad_fn=<NllLossBackward0>)
tensor(2.5244, grad_fn=<NllLossBackward0>)
tensor(2.9399, grad_fn=<NllLossBackward0>)
tensor(3.5557, grad_fn=<NllLossBackward0>)
tensor(2.6466, grad_fn=<NllLossBackward0>)
tensor(3.7216, grad_fn=<NllLossBackward0>)
tensor(2.2677, grad_fn=<NllLossBackward0>)
tensor(2.6916, grad_fn=<NllLossBackward0>)
tensor(2.2999, grad_fn=<NllLossBackward0>)
tensor(2.3597, grad_fn=<NllLossBackward0>)
tensor(2.9151, grad_fn=<NllLossBackward0>)
tensor(2.5902, grad_fn=<NllLossBackward0>)
tensor(2.9019, grad_fn=<NllLossBackward0>)
tensor(2.9946, grad_fn=<NllLossBackward0>)
tensor(3.0077, grad_fn=<NllLossBackward0>)
tensor(4.5077, grad_fn=<NllLossBackward0>)
tensor(2.62

tensor(2.8118, grad_fn=<NllLossBackward0>)
tensor(2.5895, grad_fn=<NllLossBackward0>)
tensor(2.9699, grad_fn=<NllLossBackward0>)
tensor(2.3297, grad_fn=<NllLossBackward0>)
tensor(1.7237, grad_fn=<NllLossBackward0>)
tensor(2.5267, grad_fn=<NllLossBackward0>)
tensor(2.6718, grad_fn=<NllLossBackward0>)
tensor(2.6851, grad_fn=<NllLossBackward0>)
tensor(4.1343, grad_fn=<NllLossBackward0>)
tensor(2.7980, grad_fn=<NllLossBackward0>)
tensor(2.5095, grad_fn=<NllLossBackward0>)
tensor(2.8218, grad_fn=<NllLossBackward0>)
tensor(1.7276, grad_fn=<NllLossBackward0>)
tensor(3.1822, grad_fn=<NllLossBackward0>)
tensor(2.0675, grad_fn=<NllLossBackward0>)
tensor(2.9125, grad_fn=<NllLossBackward0>)
tensor(2.5964, grad_fn=<NllLossBackward0>)
tensor(2.3257, grad_fn=<NllLossBackward0>)
tensor(3.0583, grad_fn=<NllLossBackward0>)
tensor(3.6057, grad_fn=<NllLossBackward0>)
tensor(3.5748, grad_fn=<NllLossBackward0>)
tensor(2.6775, grad_fn=<NllLossBackward0>)
tensor(3.5489, grad_fn=<NllLossBackward0>)
tensor(2.07

tensor(3.0714, grad_fn=<NllLossBackward0>)
tensor(4.2114, grad_fn=<NllLossBackward0>)
tensor(3.0209, grad_fn=<NllLossBackward0>)
tensor(2.3200, grad_fn=<NllLossBackward0>)
tensor(2.8174, grad_fn=<NllLossBackward0>)
tensor(3.6086, grad_fn=<NllLossBackward0>)
tensor(2.6434, grad_fn=<NllLossBackward0>)
tensor(2.9528, grad_fn=<NllLossBackward0>)
tensor(2.3661, grad_fn=<NllLossBackward0>)
tensor(2.9411, grad_fn=<NllLossBackward0>)
tensor(3.4374, grad_fn=<NllLossBackward0>)
tensor(3.7244, grad_fn=<NllLossBackward0>)
tensor(2.4609, grad_fn=<NllLossBackward0>)
tensor(2.9113, grad_fn=<NllLossBackward0>)
tensor(2.3225, grad_fn=<NllLossBackward0>)
tensor(3.2122, grad_fn=<NllLossBackward0>)
tensor(3.5087, grad_fn=<NllLossBackward0>)
tensor(2.2582, grad_fn=<NllLossBackward0>)
tensor(3.0860, grad_fn=<NllLossBackward0>)
tensor(3.4293, grad_fn=<NllLossBackward0>)
tensor(1.8059, grad_fn=<NllLossBackward0>)
tensor(1.7777, grad_fn=<NllLossBackward0>)
tensor(1.9658, grad_fn=<NllLossBackward0>)
tensor(4.28

tensor(4.0157, grad_fn=<NllLossBackward0>)
tensor(2.9551, grad_fn=<NllLossBackward0>)
tensor(2.8627, grad_fn=<NllLossBackward0>)
tensor(1.7426, grad_fn=<NllLossBackward0>)
tensor(2.4758, grad_fn=<NllLossBackward0>)
tensor(2.7576, grad_fn=<NllLossBackward0>)
tensor(3.1417, grad_fn=<NllLossBackward0>)
tensor(2.6513, grad_fn=<NllLossBackward0>)
tensor(1.9490, grad_fn=<NllLossBackward0>)
tensor(2.4655, grad_fn=<NllLossBackward0>)
tensor(2.5664, grad_fn=<NllLossBackward0>)
tensor(2.4813, grad_fn=<NllLossBackward0>)
tensor(3.6241, grad_fn=<NllLossBackward0>)
tensor(4.2056, grad_fn=<NllLossBackward0>)
tensor(3.3700, grad_fn=<NllLossBackward0>)
tensor(1.7500, grad_fn=<NllLossBackward0>)
tensor(3.4334, grad_fn=<NllLossBackward0>)
tensor(2.8363, grad_fn=<NllLossBackward0>)
tensor(2.6667, grad_fn=<NllLossBackward0>)
tensor(2.2701, grad_fn=<NllLossBackward0>)
tensor(3.7274, grad_fn=<NllLossBackward0>)
tensor(1.9377, grad_fn=<NllLossBackward0>)
tensor(3.9607, grad_fn=<NllLossBackward0>)
tensor(2.77

tensor(3.3552, grad_fn=<NllLossBackward0>)
tensor(2.7524, grad_fn=<NllLossBackward0>)
tensor(1.7759, grad_fn=<NllLossBackward0>)
tensor(2.6853, grad_fn=<NllLossBackward0>)
tensor(2.9697, grad_fn=<NllLossBackward0>)
tensor(3.3662, grad_fn=<NllLossBackward0>)
tensor(2.6918, grad_fn=<NllLossBackward0>)
tensor(2.5705, grad_fn=<NllLossBackward0>)
tensor(2.1532, grad_fn=<NllLossBackward0>)
tensor(2.4002, grad_fn=<NllLossBackward0>)
tensor(1.7818, grad_fn=<NllLossBackward0>)
tensor(3.1541, grad_fn=<NllLossBackward0>)
tensor(2.9494, grad_fn=<NllLossBackward0>)
tensor(3.0664, grad_fn=<NllLossBackward0>)
tensor(3.1753, grad_fn=<NllLossBackward0>)
tensor(2.9196, grad_fn=<NllLossBackward0>)
tensor(2.6472, grad_fn=<NllLossBackward0>)
tensor(3.4762, grad_fn=<NllLossBackward0>)
tensor(4.3043, grad_fn=<NllLossBackward0>)
tensor(2.4932, grad_fn=<NllLossBackward0>)
tensor(3.4588, grad_fn=<NllLossBackward0>)
tensor(2.6036, grad_fn=<NllLossBackward0>)
tensor(4.0557, grad_fn=<NllLossBackward0>)
tensor(3.55

tensor(3.2621, grad_fn=<NllLossBackward0>)
tensor(2.7618, grad_fn=<NllLossBackward0>)
tensor(1.9737, grad_fn=<NllLossBackward0>)
tensor(2.0763, grad_fn=<NllLossBackward0>)
tensor(3.1682, grad_fn=<NllLossBackward0>)
tensor(2.5255, grad_fn=<NllLossBackward0>)
tensor(2.4799, grad_fn=<NllLossBackward0>)
tensor(2.5934, grad_fn=<NllLossBackward0>)
tensor(2.9759, grad_fn=<NllLossBackward0>)
tensor(2.8445, grad_fn=<NllLossBackward0>)
tensor(2.6506, grad_fn=<NllLossBackward0>)
tensor(2.7309, grad_fn=<NllLossBackward0>)
tensor(2.9231, grad_fn=<NllLossBackward0>)
tensor(2.3342, grad_fn=<NllLossBackward0>)
tensor(2.8731, grad_fn=<NllLossBackward0>)
tensor(3.5129, grad_fn=<NllLossBackward0>)
tensor(3.3335, grad_fn=<NllLossBackward0>)
tensor(2.7352, grad_fn=<NllLossBackward0>)
tensor(3.0130, grad_fn=<NllLossBackward0>)
tensor(2.2971, grad_fn=<NllLossBackward0>)
tensor(2.9080, grad_fn=<NllLossBackward0>)
tensor(3.0028, grad_fn=<NllLossBackward0>)
tensor(3.0607, grad_fn=<NllLossBackward0>)
tensor(3.52

tensor(1.9758, grad_fn=<NllLossBackward0>)
tensor(4.0683, grad_fn=<NllLossBackward0>)
tensor(3.2242, grad_fn=<NllLossBackward0>)
tensor(2.5574, grad_fn=<NllLossBackward0>)
tensor(3.4527, grad_fn=<NllLossBackward0>)
tensor(3.1646, grad_fn=<NllLossBackward0>)
tensor(2.7961, grad_fn=<NllLossBackward0>)
tensor(2.2091, grad_fn=<NllLossBackward0>)
tensor(2.6928, grad_fn=<NllLossBackward0>)
tensor(2.3566, grad_fn=<NllLossBackward0>)
tensor(3.0169, grad_fn=<NllLossBackward0>)
tensor(2.9807, grad_fn=<NllLossBackward0>)
tensor(3.0278, grad_fn=<NllLossBackward0>)
tensor(2.0220, grad_fn=<NllLossBackward0>)
tensor(2.3831, grad_fn=<NllLossBackward0>)
tensor(3.0407, grad_fn=<NllLossBackward0>)
tensor(1.7396, grad_fn=<NllLossBackward0>)
tensor(2.8654, grad_fn=<NllLossBackward0>)
tensor(2.3345, grad_fn=<NllLossBackward0>)
tensor(2.8347, grad_fn=<NllLossBackward0>)
tensor(2.3230, grad_fn=<NllLossBackward0>)
tensor(2.1724, grad_fn=<NllLossBackward0>)
tensor(2.2983, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(1.7186, grad_fn=<NllLossBackward0>)
tensor(2.2102, grad_fn=<NllLossBackward0>)
tensor(3.0376, grad_fn=<NllLossBackward0>)
tensor(2.9267, grad_fn=<NllLossBackward0>)
tensor(2.8935, grad_fn=<NllLossBackward0>)
tensor(2.5456, grad_fn=<NllLossBackward0>)
tensor(2.3525, grad_fn=<NllLossBackward0>)
tensor(1.9447, grad_fn=<NllLossBackward0>)
tensor(2.1919, grad_fn=<NllLossBackward0>)
tensor(2.8007, grad_fn=<NllLossBackward0>)
tensor(2.5821, grad_fn=<NllLossBackward0>)
tensor(3.0755, grad_fn=<NllLossBackward0>)
tensor(3.4205, grad_fn=<NllLossBackward0>)
tensor(2.9659, grad_fn=<NllLossBackward0>)
tensor(3.3459, grad_fn=<NllLossBackward0>)
tensor(2.2570, grad_fn=<NllLossBackward0>)
tensor(2.5265, grad_fn=<NllLossBackward0>)
tensor(3.5728, grad_fn=<NllLossBackward0>)
tensor(3.6587, grad_fn=<NllLossBackward0>)
tensor(2.4303, grad_fn=<NllLossBackward0>)
tensor(2.3471, grad_fn=<NllLossBackward0>)
tensor(2.9583, grad_fn=<NllLossBackward0>)
tensor(2.4198, grad_fn=<NllLossBackward0>)
tensor(2.12

tensor(2.2466, grad_fn=<NllLossBackward0>)
tensor(3.0720, grad_fn=<NllLossBackward0>)
tensor(2.7675, grad_fn=<NllLossBackward0>)
tensor(2.7556, grad_fn=<NllLossBackward0>)
tensor(3.0104, grad_fn=<NllLossBackward0>)
tensor(2.8513, grad_fn=<NllLossBackward0>)
tensor(2.9209, grad_fn=<NllLossBackward0>)
tensor(4.5383, grad_fn=<NllLossBackward0>)
tensor(3.0850, grad_fn=<NllLossBackward0>)
tensor(2.0282, grad_fn=<NllLossBackward0>)
tensor(4.0919, grad_fn=<NllLossBackward0>)
tensor(2.6043, grad_fn=<NllLossBackward0>)
tensor(2.2990, grad_fn=<NllLossBackward0>)
tensor(3.5458, grad_fn=<NllLossBackward0>)
tensor(3.0126, grad_fn=<NllLossBackward0>)
tensor(5.0661, grad_fn=<NllLossBackward0>)
tensor(2.8548, grad_fn=<NllLossBackward0>)
tensor(2.2842, grad_fn=<NllLossBackward0>)
tensor(3.0798, grad_fn=<NllLossBackward0>)
tensor(2.4816, grad_fn=<NllLossBackward0>)
tensor(2.3937, grad_fn=<NllLossBackward0>)
tensor(3.0289, grad_fn=<NllLossBackward0>)
tensor(2.5351, grad_fn=<NllLossBackward0>)
tensor(2.21

tensor(2.5788, grad_fn=<NllLossBackward0>)
tensor(3.6748, grad_fn=<NllLossBackward0>)
tensor(2.3011, grad_fn=<NllLossBackward0>)
tensor(3.5097, grad_fn=<NllLossBackward0>)
tensor(2.8849, grad_fn=<NllLossBackward0>)
tensor(3.0199, grad_fn=<NllLossBackward0>)
tensor(2.5884, grad_fn=<NllLossBackward0>)
tensor(2.8052, grad_fn=<NllLossBackward0>)
tensor(2.3219, grad_fn=<NllLossBackward0>)
tensor(3.6794, grad_fn=<NllLossBackward0>)
tensor(2.9106, grad_fn=<NllLossBackward0>)
tensor(2.2470, grad_fn=<NllLossBackward0>)
tensor(3.1966, grad_fn=<NllLossBackward0>)
tensor(2.5626, grad_fn=<NllLossBackward0>)
tensor(2.6493, grad_fn=<NllLossBackward0>)
tensor(3.4558, grad_fn=<NllLossBackward0>)
tensor(2.5101, grad_fn=<NllLossBackward0>)
tensor(2.8569, grad_fn=<NllLossBackward0>)
tensor(2.9123, grad_fn=<NllLossBackward0>)
tensor(4.1157, grad_fn=<NllLossBackward0>)
tensor(2.8263, grad_fn=<NllLossBackward0>)
tensor(3.0539, grad_fn=<NllLossBackward0>)
tensor(3.7059, grad_fn=<NllLossBackward0>)
tensor(3.03

tensor(2.3684, grad_fn=<NllLossBackward0>)
tensor(2.4790, grad_fn=<NllLossBackward0>)
tensor(2.6011, grad_fn=<NllLossBackward0>)
tensor(3.3990, grad_fn=<NllLossBackward0>)
tensor(3.2436, grad_fn=<NllLossBackward0>)
tensor(2.0605, grad_fn=<NllLossBackward0>)
tensor(4.9693, grad_fn=<NllLossBackward0>)
tensor(2.3505, grad_fn=<NllLossBackward0>)
tensor(2.3619, grad_fn=<NllLossBackward0>)
tensor(2.9079, grad_fn=<NllLossBackward0>)
tensor(2.3702, grad_fn=<NllLossBackward0>)
tensor(2.3770, grad_fn=<NllLossBackward0>)
tensor(2.8456, grad_fn=<NllLossBackward0>)
tensor(3.7409, grad_fn=<NllLossBackward0>)
tensor(3.5472, grad_fn=<NllLossBackward0>)
tensor(2.6219, grad_fn=<NllLossBackward0>)
tensor(2.6784, grad_fn=<NllLossBackward0>)
tensor(1.9394, grad_fn=<NllLossBackward0>)
tensor(3.3389, grad_fn=<NllLossBackward0>)
tensor(2.8949, grad_fn=<NllLossBackward0>)
tensor(2.2600, grad_fn=<NllLossBackward0>)
tensor(2.3441, grad_fn=<NllLossBackward0>)
tensor(2.0160, grad_fn=<NllLossBackward0>)
tensor(3.49

tensor(2.2650, grad_fn=<NllLossBackward0>)
tensor(2.9133, grad_fn=<NllLossBackward0>)
tensor(4.3276, grad_fn=<NllLossBackward0>)
tensor(3.1024, grad_fn=<NllLossBackward0>)
tensor(3.0856, grad_fn=<NllLossBackward0>)
tensor(3.3648, grad_fn=<NllLossBackward0>)
tensor(2.8895, grad_fn=<NllLossBackward0>)
tensor(3.4181, grad_fn=<NllLossBackward0>)
tensor(2.8811, grad_fn=<NllLossBackward0>)
tensor(2.3525, grad_fn=<NllLossBackward0>)
tensor(2.5456, grad_fn=<NllLossBackward0>)
tensor(2.1191, grad_fn=<NllLossBackward0>)
tensor(2.8958, grad_fn=<NllLossBackward0>)
tensor(2.6599, grad_fn=<NllLossBackward0>)
tensor(2.9124, grad_fn=<NllLossBackward0>)
tensor(3.8327, grad_fn=<NllLossBackward0>)
tensor(3.5516, grad_fn=<NllLossBackward0>)
tensor(2.4383, grad_fn=<NllLossBackward0>)
tensor(1.7345, grad_fn=<NllLossBackward0>)
tensor(3.0129, grad_fn=<NllLossBackward0>)
tensor(1.9312, grad_fn=<NllLossBackward0>)
tensor(2.8586, grad_fn=<NllLossBackward0>)
tensor(1.9717, grad_fn=<NllLossBackward0>)
tensor(2.66

tensor(2.2727, grad_fn=<NllLossBackward0>)
tensor(3.2906, grad_fn=<NllLossBackward0>)
tensor(2.3848, grad_fn=<NllLossBackward0>)
tensor(3.4845, grad_fn=<NllLossBackward0>)
tensor(2.4452, grad_fn=<NllLossBackward0>)
tensor(2.3512, grad_fn=<NllLossBackward0>)
tensor(2.6210, grad_fn=<NllLossBackward0>)
tensor(2.3039, grad_fn=<NllLossBackward0>)
tensor(3.1286, grad_fn=<NllLossBackward0>)
tensor(2.6494, grad_fn=<NllLossBackward0>)
tensor(2.8418, grad_fn=<NllLossBackward0>)
tensor(2.4044, grad_fn=<NllLossBackward0>)
tensor(2.2620, grad_fn=<NllLossBackward0>)
tensor(2.0234, grad_fn=<NllLossBackward0>)
tensor(2.9702, grad_fn=<NllLossBackward0>)
tensor(2.2834, grad_fn=<NllLossBackward0>)
tensor(2.5814, grad_fn=<NllLossBackward0>)
tensor(2.2495, grad_fn=<NllLossBackward0>)
tensor(2.7105, grad_fn=<NllLossBackward0>)
tensor(3.5978, grad_fn=<NllLossBackward0>)
tensor(2.4310, grad_fn=<NllLossBackward0>)
tensor(3.8183, grad_fn=<NllLossBackward0>)
tensor(2.9948, grad_fn=<NllLossBackward0>)
tensor(2.58

tensor(2.9702, grad_fn=<NllLossBackward0>)
tensor(2.6785, grad_fn=<NllLossBackward0>)
tensor(3.0865, grad_fn=<NllLossBackward0>)
tensor(2.9561, grad_fn=<NllLossBackward0>)
tensor(2.0053, grad_fn=<NllLossBackward0>)
tensor(2.3532, grad_fn=<NllLossBackward0>)
tensor(2.3890, grad_fn=<NllLossBackward0>)
tensor(3.5871, grad_fn=<NllLossBackward0>)
tensor(2.2840, grad_fn=<NllLossBackward0>)
tensor(2.4298, grad_fn=<NllLossBackward0>)
tensor(1.7925, grad_fn=<NllLossBackward0>)
tensor(3.2883, grad_fn=<NllLossBackward0>)
tensor(2.0475, grad_fn=<NllLossBackward0>)
tensor(2.4368, grad_fn=<NllLossBackward0>)
tensor(2.5197, grad_fn=<NllLossBackward0>)
tensor(2.5076, grad_fn=<NllLossBackward0>)
tensor(2.9885, grad_fn=<NllLossBackward0>)
tensor(2.8782, grad_fn=<NllLossBackward0>)
tensor(2.2957, grad_fn=<NllLossBackward0>)
tensor(2.5680, grad_fn=<NllLossBackward0>)
tensor(2.4130, grad_fn=<NllLossBackward0>)
tensor(3.0894, grad_fn=<NllLossBackward0>)
tensor(2.3273, grad_fn=<NllLossBackward0>)
tensor(3.02

tensor(3.9166, grad_fn=<NllLossBackward0>)
tensor(3.6600, grad_fn=<NllLossBackward0>)
tensor(3.0854, grad_fn=<NllLossBackward0>)
tensor(2.3107, grad_fn=<NllLossBackward0>)
tensor(2.2819, grad_fn=<NllLossBackward0>)
tensor(3.0060, grad_fn=<NllLossBackward0>)
tensor(2.3089, grad_fn=<NllLossBackward0>)
tensor(2.8699, grad_fn=<NllLossBackward0>)
tensor(3.6526, grad_fn=<NllLossBackward0>)
tensor(2.3504, grad_fn=<NllLossBackward0>)
tensor(2.4035, grad_fn=<NllLossBackward0>)
tensor(3.4425, grad_fn=<NllLossBackward0>)
tensor(2.4894, grad_fn=<NllLossBackward0>)
tensor(2.8704, grad_fn=<NllLossBackward0>)
tensor(2.2836, grad_fn=<NllLossBackward0>)
tensor(3.7540, grad_fn=<NllLossBackward0>)
tensor(2.2469, grad_fn=<NllLossBackward0>)
tensor(2.8276, grad_fn=<NllLossBackward0>)
tensor(2.7297, grad_fn=<NllLossBackward0>)
tensor(3.5186, grad_fn=<NllLossBackward0>)
tensor(2.7614, grad_fn=<NllLossBackward0>)
tensor(3.0221, grad_fn=<NllLossBackward0>)
tensor(2.8853, grad_fn=<NllLossBackward0>)
tensor(3.90

tensor(2.4891, grad_fn=<NllLossBackward0>)
tensor(3.5371, grad_fn=<NllLossBackward0>)
tensor(2.5817, grad_fn=<NllLossBackward0>)
tensor(2.2532, grad_fn=<NllLossBackward0>)
tensor(3.4449, grad_fn=<NllLossBackward0>)
tensor(2.4820, grad_fn=<NllLossBackward0>)
tensor(2.2746, grad_fn=<NllLossBackward0>)
tensor(3.4815, grad_fn=<NllLossBackward0>)
tensor(1.7448, grad_fn=<NllLossBackward0>)
tensor(3.5991, grad_fn=<NllLossBackward0>)
tensor(3.6219, grad_fn=<NllLossBackward0>)
tensor(2.4565, grad_fn=<NllLossBackward0>)
tensor(2.3014, grad_fn=<NllLossBackward0>)
tensor(3.9032, grad_fn=<NllLossBackward0>)
tensor(2.5213, grad_fn=<NllLossBackward0>)
tensor(2.9835, grad_fn=<NllLossBackward0>)
tensor(2.3033, grad_fn=<NllLossBackward0>)
tensor(3.4805, grad_fn=<NllLossBackward0>)
tensor(3.4305, grad_fn=<NllLossBackward0>)
tensor(3.9387, grad_fn=<NllLossBackward0>)
tensor(2.3242, grad_fn=<NllLossBackward0>)
tensor(3.3337, grad_fn=<NllLossBackward0>)
tensor(2.7128, grad_fn=<NllLossBackward0>)
tensor(4.16

tensor(2.9377, grad_fn=<NllLossBackward0>)
tensor(3.4620, grad_fn=<NllLossBackward0>)
tensor(2.3488, grad_fn=<NllLossBackward0>)
tensor(2.8135, grad_fn=<NllLossBackward0>)
tensor(3.5723, grad_fn=<NllLossBackward0>)
tensor(2.5350, grad_fn=<NllLossBackward0>)
tensor(2.4070, grad_fn=<NllLossBackward0>)
tensor(3.4918, grad_fn=<NllLossBackward0>)
tensor(3.0654, grad_fn=<NllLossBackward0>)
tensor(2.7717, grad_fn=<NllLossBackward0>)
tensor(1.9726, grad_fn=<NllLossBackward0>)
tensor(2.4192, grad_fn=<NllLossBackward0>)
tensor(2.3994, grad_fn=<NllLossBackward0>)
tensor(2.4167, grad_fn=<NllLossBackward0>)
tensor(3.9186, grad_fn=<NllLossBackward0>)
tensor(2.9437, grad_fn=<NllLossBackward0>)
tensor(1.9588, grad_fn=<NllLossBackward0>)
tensor(4.4096, grad_fn=<NllLossBackward0>)
tensor(2.3671, grad_fn=<NllLossBackward0>)
tensor(3.3957, grad_fn=<NllLossBackward0>)
tensor(2.3490, grad_fn=<NllLossBackward0>)
tensor(2.6919, grad_fn=<NllLossBackward0>)
tensor(2.9968, grad_fn=<NllLossBackward0>)
tensor(2.75

tensor(2.8834, grad_fn=<NllLossBackward0>)
tensor(2.6693, grad_fn=<NllLossBackward0>)
tensor(2.8345, grad_fn=<NllLossBackward0>)
tensor(2.1770, grad_fn=<NllLossBackward0>)
tensor(3.5676, grad_fn=<NllLossBackward0>)
tensor(3.4405, grad_fn=<NllLossBackward0>)
tensor(1.7436, grad_fn=<NllLossBackward0>)
tensor(2.7877, grad_fn=<NllLossBackward0>)
tensor(2.2600, grad_fn=<NllLossBackward0>)
tensor(1.7492, grad_fn=<NllLossBackward0>)
tensor(2.0700, grad_fn=<NllLossBackward0>)
tensor(3.4332, grad_fn=<NllLossBackward0>)
tensor(3.2321, grad_fn=<NllLossBackward0>)
tensor(2.9244, grad_fn=<NllLossBackward0>)
tensor(2.3991, grad_fn=<NllLossBackward0>)
tensor(2.2666, grad_fn=<NllLossBackward0>)
tensor(2.6388, grad_fn=<NllLossBackward0>)
tensor(2.6214, grad_fn=<NllLossBackward0>)
tensor(2.8521, grad_fn=<NllLossBackward0>)
tensor(3.2111, grad_fn=<NllLossBackward0>)
tensor(2.3249, grad_fn=<NllLossBackward0>)
tensor(3.5576, grad_fn=<NllLossBackward0>)
tensor(2.8071, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(1.9554, grad_fn=<NllLossBackward0>)
tensor(2.7881, grad_fn=<NllLossBackward0>)
tensor(4.6916, grad_fn=<NllLossBackward0>)
tensor(2.9412, grad_fn=<NllLossBackward0>)
tensor(2.6923, grad_fn=<NllLossBackward0>)
tensor(3.5308, grad_fn=<NllLossBackward0>)
tensor(3.4320, grad_fn=<NllLossBackward0>)
tensor(3.4427, grad_fn=<NllLossBackward0>)
tensor(2.4531, grad_fn=<NllLossBackward0>)
tensor(2.3424, grad_fn=<NllLossBackward0>)
tensor(3.4297, grad_fn=<NllLossBackward0>)
tensor(2.5442, grad_fn=<NllLossBackward0>)
tensor(3.5588, grad_fn=<NllLossBackward0>)
tensor(2.9622, grad_fn=<NllLossBackward0>)
tensor(2.4444, grad_fn=<NllLossBackward0>)
tensor(2.7493, grad_fn=<NllLossBackward0>)
tensor(2.3499, grad_fn=<NllLossBackward0>)
tensor(2.6853, grad_fn=<NllLossBackward0>)
tensor(3.8543, grad_fn=<NllLossBackward0>)
tensor(2.4208, grad_fn=<NllLossBackward0>)
tensor(2.8466, grad_fn=<NllLossBackward0>)
tensor(2.3476, grad_fn=<NllLossBackward0>)
tensor(2.4752, grad_fn=<NllLossBackward0>)
tensor(3.54

tensor(3.4532, grad_fn=<NllLossBackward0>)
tensor(4.2702, grad_fn=<NllLossBackward0>)
tensor(2.5241, grad_fn=<NllLossBackward0>)
tensor(2.7448, grad_fn=<NllLossBackward0>)
tensor(3.4349, grad_fn=<NllLossBackward0>)
tensor(2.7307, grad_fn=<NllLossBackward0>)
tensor(2.9100, grad_fn=<NllLossBackward0>)
tensor(2.0987, grad_fn=<NllLossBackward0>)
tensor(2.6664, grad_fn=<NllLossBackward0>)
tensor(4.4438, grad_fn=<NllLossBackward0>)
tensor(2.3256, grad_fn=<NllLossBackward0>)
tensor(2.7545, grad_fn=<NllLossBackward0>)
tensor(2.2063, grad_fn=<NllLossBackward0>)
tensor(2.9053, grad_fn=<NllLossBackward0>)
tensor(3.3483, grad_fn=<NllLossBackward0>)
tensor(2.3188, grad_fn=<NllLossBackward0>)
tensor(2.7627, grad_fn=<NllLossBackward0>)
tensor(3.4509, grad_fn=<NllLossBackward0>)
tensor(1.9640, grad_fn=<NllLossBackward0>)
tensor(3.7210, grad_fn=<NllLossBackward0>)
tensor(2.9736, grad_fn=<NllLossBackward0>)
tensor(2.4732, grad_fn=<NllLossBackward0>)
tensor(3.3654, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(2.7953, grad_fn=<NllLossBackward0>)
tensor(3.0792, grad_fn=<NllLossBackward0>)
tensor(1.8038, grad_fn=<NllLossBackward0>)
tensor(2.7938, grad_fn=<NllLossBackward0>)
tensor(2.7725, grad_fn=<NllLossBackward0>)
tensor(2.6000, grad_fn=<NllLossBackward0>)
tensor(2.3845, grad_fn=<NllLossBackward0>)
tensor(2.0213, grad_fn=<NllLossBackward0>)
tensor(3.0605, grad_fn=<NllLossBackward0>)
tensor(3.4604, grad_fn=<NllLossBackward0>)
tensor(2.3198, grad_fn=<NllLossBackward0>)
tensor(3.4280, grad_fn=<NllLossBackward0>)
tensor(2.4753, grad_fn=<NllLossBackward0>)
tensor(2.0986, grad_fn=<NllLossBackward0>)
tensor(2.0289, grad_fn=<NllLossBackward0>)
tensor(2.1287, grad_fn=<NllLossBackward0>)
tensor(2.7997, grad_fn=<NllLossBackward0>)
tensor(2.9205, grad_fn=<NllLossBackward0>)
tensor(2.5496, grad_fn=<NllLossBackward0>)
tensor(3.3180, grad_fn=<NllLossBackward0>)
tensor(3.5332, grad_fn=<NllLossBackward0>)
tensor(3.5970, grad_fn=<NllLossBackward0>)
tensor(3.5392, grad_fn=<NllLossBackward0>)
tensor(4.30

tensor(2.9490, grad_fn=<NllLossBackward0>)
tensor(3.0512, grad_fn=<NllLossBackward0>)
tensor(2.9248, grad_fn=<NllLossBackward0>)
tensor(2.4854, grad_fn=<NllLossBackward0>)
tensor(4.2917, grad_fn=<NllLossBackward0>)
tensor(3.3363, grad_fn=<NllLossBackward0>)
tensor(3.9827, grad_fn=<NllLossBackward0>)
tensor(1.7872, grad_fn=<NllLossBackward0>)
tensor(3.3956, grad_fn=<NllLossBackward0>)
tensor(2.2789, grad_fn=<NllLossBackward0>)
tensor(2.4436, grad_fn=<NllLossBackward0>)
tensor(2.8178, grad_fn=<NllLossBackward0>)
tensor(3.0828, grad_fn=<NllLossBackward0>)
tensor(3.4425, grad_fn=<NllLossBackward0>)
tensor(3.2456, grad_fn=<NllLossBackward0>)
tensor(2.9323, grad_fn=<NllLossBackward0>)
tensor(2.6646, grad_fn=<NllLossBackward0>)
tensor(4.5431, grad_fn=<NllLossBackward0>)
tensor(2.3060, grad_fn=<NllLossBackward0>)
tensor(3.1304, grad_fn=<NllLossBackward0>)
tensor(3.0011, grad_fn=<NllLossBackward0>)
tensor(2.2753, grad_fn=<NllLossBackward0>)
tensor(2.5556, grad_fn=<NllLossBackward0>)
tensor(2.32

tensor(3.9260, grad_fn=<NllLossBackward0>)
tensor(2.4426, grad_fn=<NllLossBackward0>)
tensor(3.5307, grad_fn=<NllLossBackward0>)
tensor(2.7635, grad_fn=<NllLossBackward0>)
tensor(2.4565, grad_fn=<NllLossBackward0>)
tensor(3.0315, grad_fn=<NllLossBackward0>)
tensor(3.5226, grad_fn=<NllLossBackward0>)
tensor(2.8730, grad_fn=<NllLossBackward0>)
tensor(2.1093, grad_fn=<NllLossBackward0>)
tensor(2.1810, grad_fn=<NllLossBackward0>)
tensor(1.7890, grad_fn=<NllLossBackward0>)
tensor(1.9790, grad_fn=<NllLossBackward0>)
tensor(2.9843, grad_fn=<NllLossBackward0>)
tensor(3.4242, grad_fn=<NllLossBackward0>)
tensor(2.0437, grad_fn=<NllLossBackward0>)
tensor(3.6505, grad_fn=<NllLossBackward0>)
tensor(2.8440, grad_fn=<NllLossBackward0>)
tensor(2.6374, grad_fn=<NllLossBackward0>)
tensor(4.4290, grad_fn=<NllLossBackward0>)
tensor(2.8272, grad_fn=<NllLossBackward0>)
tensor(1.9837, grad_fn=<NllLossBackward0>)
tensor(2.3656, grad_fn=<NllLossBackward0>)
tensor(3.0108, grad_fn=<NllLossBackward0>)
tensor(3.28

tensor(2.6217, grad_fn=<NllLossBackward0>)
tensor(2.4523, grad_fn=<NllLossBackward0>)
tensor(3.6209, grad_fn=<NllLossBackward0>)
tensor(2.7901, grad_fn=<NllLossBackward0>)
tensor(3.2474, grad_fn=<NllLossBackward0>)
tensor(3.3169, grad_fn=<NllLossBackward0>)
tensor(3.7009, grad_fn=<NllLossBackward0>)
tensor(3.4322, grad_fn=<NllLossBackward0>)
tensor(3.7030, grad_fn=<NllLossBackward0>)
tensor(2.9584, grad_fn=<NllLossBackward0>)
tensor(2.3555, grad_fn=<NllLossBackward0>)
tensor(2.4212, grad_fn=<NllLossBackward0>)
tensor(2.5996, grad_fn=<NllLossBackward0>)
tensor(3.0610, grad_fn=<NllLossBackward0>)
tensor(3.4142, grad_fn=<NllLossBackward0>)
tensor(2.8577, grad_fn=<NllLossBackward0>)
tensor(2.4137, grad_fn=<NllLossBackward0>)
tensor(3.5136, grad_fn=<NllLossBackward0>)
tensor(2.9499, grad_fn=<NllLossBackward0>)
tensor(2.9932, grad_fn=<NllLossBackward0>)
tensor(2.8045, grad_fn=<NllLossBackward0>)
tensor(2.9180, grad_fn=<NllLossBackward0>)
tensor(2.4997, grad_fn=<NllLossBackward0>)
tensor(2.56

tensor(2.8132, grad_fn=<NllLossBackward0>)
tensor(2.4015, grad_fn=<NllLossBackward0>)
tensor(2.0099, grad_fn=<NllLossBackward0>)
tensor(2.2592, grad_fn=<NllLossBackward0>)
tensor(2.6743, grad_fn=<NllLossBackward0>)
tensor(3.6677, grad_fn=<NllLossBackward0>)
tensor(3.3440, grad_fn=<NllLossBackward0>)
tensor(2.0630, grad_fn=<NllLossBackward0>)
tensor(2.9994, grad_fn=<NllLossBackward0>)
tensor(3.3338, grad_fn=<NllLossBackward0>)
tensor(2.4113, grad_fn=<NllLossBackward0>)
tensor(2.2128, grad_fn=<NllLossBackward0>)
tensor(4.3161, grad_fn=<NllLossBackward0>)
tensor(2.1297, grad_fn=<NllLossBackward0>)
tensor(3.9036, grad_fn=<NllLossBackward0>)
tensor(2.7800, grad_fn=<NllLossBackward0>)
tensor(2.8985, grad_fn=<NllLossBackward0>)
tensor(4.3821, grad_fn=<NllLossBackward0>)
tensor(2.4914, grad_fn=<NllLossBackward0>)
tensor(2.9300, grad_fn=<NllLossBackward0>)
tensor(2.4322, grad_fn=<NllLossBackward0>)
tensor(2.2333, grad_fn=<NllLossBackward0>)
tensor(2.7497, grad_fn=<NllLossBackward0>)
tensor(1.99

tensor(2.6771, grad_fn=<NllLossBackward0>)
tensor(3.0082, grad_fn=<NllLossBackward0>)
tensor(2.5685, grad_fn=<NllLossBackward0>)
tensor(2.4301, grad_fn=<NllLossBackward0>)
tensor(2.5768, grad_fn=<NllLossBackward0>)
tensor(2.4446, grad_fn=<NllLossBackward0>)
tensor(3.3366, grad_fn=<NllLossBackward0>)
tensor(3.0915, grad_fn=<NllLossBackward0>)
tensor(2.1293, grad_fn=<NllLossBackward0>)
tensor(2.4985, grad_fn=<NllLossBackward0>)
tensor(2.5500, grad_fn=<NllLossBackward0>)
tensor(3.1035, grad_fn=<NllLossBackward0>)
tensor(2.4300, grad_fn=<NllLossBackward0>)
tensor(2.8782, grad_fn=<NllLossBackward0>)
tensor(2.4436, grad_fn=<NllLossBackward0>)
tensor(3.7442, grad_fn=<NllLossBackward0>)
tensor(3.4236, grad_fn=<NllLossBackward0>)
tensor(2.2600, grad_fn=<NllLossBackward0>)
tensor(2.3203, grad_fn=<NllLossBackward0>)
tensor(2.5281, grad_fn=<NllLossBackward0>)
tensor(2.8928, grad_fn=<NllLossBackward0>)
tensor(2.9188, grad_fn=<NllLossBackward0>)
tensor(2.8368, grad_fn=<NllLossBackward0>)
tensor(2.03

tensor(2.4413, grad_fn=<NllLossBackward0>)
tensor(3.4151, grad_fn=<NllLossBackward0>)
tensor(3.7969, grad_fn=<NllLossBackward0>)
tensor(3.9478, grad_fn=<NllLossBackward0>)
tensor(3.0178, grad_fn=<NllLossBackward0>)
tensor(1.7670, grad_fn=<NllLossBackward0>)
tensor(3.0785, grad_fn=<NllLossBackward0>)
tensor(2.3286, grad_fn=<NllLossBackward0>)
tensor(2.3576, grad_fn=<NllLossBackward0>)
tensor(3.3108, grad_fn=<NllLossBackward0>)
tensor(2.4440, grad_fn=<NllLossBackward0>)
tensor(3.6407, grad_fn=<NllLossBackward0>)
tensor(3.0223, grad_fn=<NllLossBackward0>)
tensor(2.5486, grad_fn=<NllLossBackward0>)
tensor(3.0388, grad_fn=<NllLossBackward0>)
tensor(2.3066, grad_fn=<NllLossBackward0>)
tensor(3.2525, grad_fn=<NllLossBackward0>)
tensor(2.3132, grad_fn=<NllLossBackward0>)
tensor(2.5940, grad_fn=<NllLossBackward0>)
tensor(2.9298, grad_fn=<NllLossBackward0>)
tensor(2.1950, grad_fn=<NllLossBackward0>)
tensor(2.0385, grad_fn=<NllLossBackward0>)
tensor(2.9568, grad_fn=<NllLossBackward0>)
tensor(2.56

tensor(4.0467, grad_fn=<NllLossBackward0>)
tensor(4.3511, grad_fn=<NllLossBackward0>)
tensor(1.9937, grad_fn=<NllLossBackward0>)
tensor(2.8568, grad_fn=<NllLossBackward0>)
tensor(2.3949, grad_fn=<NllLossBackward0>)
tensor(2.3297, grad_fn=<NllLossBackward0>)
tensor(3.0038, grad_fn=<NllLossBackward0>)
tensor(3.4988, grad_fn=<NllLossBackward0>)
tensor(3.1442, grad_fn=<NllLossBackward0>)
tensor(2.4450, grad_fn=<NllLossBackward0>)
tensor(2.7224, grad_fn=<NllLossBackward0>)
tensor(2.9434, grad_fn=<NllLossBackward0>)
tensor(3.3436, grad_fn=<NllLossBackward0>)
tensor(2.3205, grad_fn=<NllLossBackward0>)
tensor(2.1352, grad_fn=<NllLossBackward0>)
tensor(2.9603, grad_fn=<NllLossBackward0>)
tensor(2.6231, grad_fn=<NllLossBackward0>)
tensor(2.4364, grad_fn=<NllLossBackward0>)
tensor(2.1286, grad_fn=<NllLossBackward0>)
tensor(2.4052, grad_fn=<NllLossBackward0>)
tensor(3.0782, grad_fn=<NllLossBackward0>)
tensor(2.7991, grad_fn=<NllLossBackward0>)
tensor(1.7475, grad_fn=<NllLossBackward0>)
tensor(2.65

tensor(2.5027, grad_fn=<NllLossBackward0>)
tensor(1.9529, grad_fn=<NllLossBackward0>)
tensor(3.6387, grad_fn=<NllLossBackward0>)
tensor(2.3456, grad_fn=<NllLossBackward0>)
tensor(2.0297, grad_fn=<NllLossBackward0>)
tensor(4.2157, grad_fn=<NllLossBackward0>)
tensor(2.1437, grad_fn=<NllLossBackward0>)
tensor(2.3022, grad_fn=<NllLossBackward0>)
tensor(2.5309, grad_fn=<NllLossBackward0>)
tensor(3.0212, grad_fn=<NllLossBackward0>)
tensor(2.9787, grad_fn=<NllLossBackward0>)
tensor(2.2557, grad_fn=<NllLossBackward0>)
tensor(2.2774, grad_fn=<NllLossBackward0>)
tensor(2.8312, grad_fn=<NllLossBackward0>)
tensor(2.4747, grad_fn=<NllLossBackward0>)
tensor(2.6166, grad_fn=<NllLossBackward0>)
tensor(2.9058, grad_fn=<NllLossBackward0>)
tensor(2.8679, grad_fn=<NllLossBackward0>)
tensor(1.9507, grad_fn=<NllLossBackward0>)
tensor(3.9785, grad_fn=<NllLossBackward0>)
tensor(2.3321, grad_fn=<NllLossBackward0>)
tensor(2.9162, grad_fn=<NllLossBackward0>)
tensor(3.6096, grad_fn=<NllLossBackward0>)
tensor(2.70

tensor(3.3771, grad_fn=<NllLossBackward0>)
tensor(2.7327, grad_fn=<NllLossBackward0>)
tensor(3.4613, grad_fn=<NllLossBackward0>)
tensor(3.0582, grad_fn=<NllLossBackward0>)
tensor(3.5697, grad_fn=<NllLossBackward0>)
tensor(2.1963, grad_fn=<NllLossBackward0>)
tensor(2.3106, grad_fn=<NllLossBackward0>)
tensor(3.7719, grad_fn=<NllLossBackward0>)
tensor(3.0029, grad_fn=<NllLossBackward0>)
tensor(2.4578, grad_fn=<NllLossBackward0>)
tensor(1.9771, grad_fn=<NllLossBackward0>)
tensor(2.7019, grad_fn=<NllLossBackward0>)
tensor(2.1894, grad_fn=<NllLossBackward0>)
tensor(2.2030, grad_fn=<NllLossBackward0>)
tensor(2.0156, grad_fn=<NllLossBackward0>)
tensor(2.2339, grad_fn=<NllLossBackward0>)
tensor(2.6673, grad_fn=<NllLossBackward0>)
tensor(2.3671, grad_fn=<NllLossBackward0>)
tensor(3.0059, grad_fn=<NllLossBackward0>)
tensor(2.2563, grad_fn=<NllLossBackward0>)
tensor(3.4487, grad_fn=<NllLossBackward0>)
tensor(4.2052, grad_fn=<NllLossBackward0>)
tensor(1.9828, grad_fn=<NllLossBackward0>)
tensor(2.35

tensor(3.1381, grad_fn=<NllLossBackward0>)
tensor(4.0445, grad_fn=<NllLossBackward0>)
tensor(2.8481, grad_fn=<NllLossBackward0>)
tensor(2.1111, grad_fn=<NllLossBackward0>)
tensor(3.4813, grad_fn=<NllLossBackward0>)
tensor(2.9226, grad_fn=<NllLossBackward0>)
tensor(3.2100, grad_fn=<NllLossBackward0>)
tensor(2.8774, grad_fn=<NllLossBackward0>)
tensor(2.6882, grad_fn=<NllLossBackward0>)
tensor(2.0456, grad_fn=<NllLossBackward0>)
tensor(3.6795, grad_fn=<NllLossBackward0>)
tensor(2.7087, grad_fn=<NllLossBackward0>)
tensor(2.5550, grad_fn=<NllLossBackward0>)
tensor(3.9421, grad_fn=<NllLossBackward0>)
tensor(2.0998, grad_fn=<NllLossBackward0>)
tensor(2.8167, grad_fn=<NllLossBackward0>)
tensor(2.5042, grad_fn=<NllLossBackward0>)
tensor(3.5124, grad_fn=<NllLossBackward0>)
tensor(3.0245, grad_fn=<NllLossBackward0>)
tensor(3.0510, grad_fn=<NllLossBackward0>)
tensor(3.7016, grad_fn=<NllLossBackward0>)
tensor(2.2621, grad_fn=<NllLossBackward0>)
tensor(2.2847, grad_fn=<NllLossBackward0>)
tensor(4.71

tensor(2.1812, grad_fn=<NllLossBackward0>)
tensor(2.2988, grad_fn=<NllLossBackward0>)
tensor(1.9462, grad_fn=<NllLossBackward0>)
tensor(1.9438, grad_fn=<NllLossBackward0>)
tensor(2.9647, grad_fn=<NllLossBackward0>)
tensor(2.3738, grad_fn=<NllLossBackward0>)
tensor(3.3042, grad_fn=<NllLossBackward0>)
tensor(2.3793, grad_fn=<NllLossBackward0>)
tensor(1.9446, grad_fn=<NllLossBackward0>)
tensor(3.3317, grad_fn=<NllLossBackward0>)
tensor(3.4987, grad_fn=<NllLossBackward0>)
tensor(2.4676, grad_fn=<NllLossBackward0>)
tensor(3.1541, grad_fn=<NllLossBackward0>)
tensor(3.5403, grad_fn=<NllLossBackward0>)
tensor(3.4951, grad_fn=<NllLossBackward0>)
tensor(2.5970, grad_fn=<NllLossBackward0>)
tensor(3.6066, grad_fn=<NllLossBackward0>)
tensor(2.0928, grad_fn=<NllLossBackward0>)
tensor(3.5317, grad_fn=<NllLossBackward0>)
tensor(2.7490, grad_fn=<NllLossBackward0>)
tensor(2.1698, grad_fn=<NllLossBackward0>)
tensor(2.2734, grad_fn=<NllLossBackward0>)
tensor(2.2921, grad_fn=<NllLossBackward0>)
tensor(3.93

tensor(2.7744, grad_fn=<NllLossBackward0>)
tensor(3.5901, grad_fn=<NllLossBackward0>)
tensor(3.6046, grad_fn=<NllLossBackward0>)
tensor(3.4288, grad_fn=<NllLossBackward0>)
tensor(2.3202, grad_fn=<NllLossBackward0>)
tensor(2.3418, grad_fn=<NllLossBackward0>)
tensor(3.3309, grad_fn=<NllLossBackward0>)
tensor(5.1370, grad_fn=<NllLossBackward0>)
tensor(1.9797, grad_fn=<NllLossBackward0>)
tensor(2.3143, grad_fn=<NllLossBackward0>)
tensor(2.3627, grad_fn=<NllLossBackward0>)
tensor(2.0197, grad_fn=<NllLossBackward0>)
tensor(1.7669, grad_fn=<NllLossBackward0>)
tensor(4.1279, grad_fn=<NllLossBackward0>)
tensor(2.1784, grad_fn=<NllLossBackward0>)
tensor(3.4622, grad_fn=<NllLossBackward0>)
tensor(2.9338, grad_fn=<NllLossBackward0>)
tensor(2.3672, grad_fn=<NllLossBackward0>)
tensor(2.6825, grad_fn=<NllLossBackward0>)
tensor(2.9182, grad_fn=<NllLossBackward0>)
tensor(2.3451, grad_fn=<NllLossBackward0>)
tensor(2.7573, grad_fn=<NllLossBackward0>)
tensor(1.9605, grad_fn=<NllLossBackward0>)
tensor(3.36

tensor(2.3492, grad_fn=<NllLossBackward0>)
tensor(3.3802, grad_fn=<NllLossBackward0>)
tensor(3.8177, grad_fn=<NllLossBackward0>)
tensor(3.8550, grad_fn=<NllLossBackward0>)
tensor(2.6778, grad_fn=<NllLossBackward0>)
tensor(2.9994, grad_fn=<NllLossBackward0>)
tensor(2.7572, grad_fn=<NllLossBackward0>)
tensor(3.3350, grad_fn=<NllLossBackward0>)
tensor(2.6114, grad_fn=<NllLossBackward0>)
tensor(2.4716, grad_fn=<NllLossBackward0>)
tensor(2.6957, grad_fn=<NllLossBackward0>)
tensor(2.8039, grad_fn=<NllLossBackward0>)
tensor(2.4827, grad_fn=<NllLossBackward0>)
tensor(2.6080, grad_fn=<NllLossBackward0>)
tensor(2.6876, grad_fn=<NllLossBackward0>)
tensor(2.3048, grad_fn=<NllLossBackward0>)
tensor(2.6080, grad_fn=<NllLossBackward0>)
tensor(2.3079, grad_fn=<NllLossBackward0>)
tensor(3.2644, grad_fn=<NllLossBackward0>)
tensor(2.3292, grad_fn=<NllLossBackward0>)
tensor(3.4792, grad_fn=<NllLossBackward0>)
tensor(3.9648, grad_fn=<NllLossBackward0>)
tensor(2.6574, grad_fn=<NllLossBackward0>)
tensor(2.30

tensor(3.5888, grad_fn=<NllLossBackward0>)
tensor(2.5448, grad_fn=<NllLossBackward0>)
tensor(3.0328, grad_fn=<NllLossBackward0>)
tensor(1.9330, grad_fn=<NllLossBackward0>)
tensor(2.4184, grad_fn=<NllLossBackward0>)
tensor(2.4422, grad_fn=<NllLossBackward0>)
tensor(3.6421, grad_fn=<NllLossBackward0>)
tensor(3.9727, grad_fn=<NllLossBackward0>)
tensor(2.5258, grad_fn=<NllLossBackward0>)
tensor(2.0355, grad_fn=<NllLossBackward0>)
tensor(2.0866, grad_fn=<NllLossBackward0>)
tensor(3.0668, grad_fn=<NllLossBackward0>)
tensor(1.9802, grad_fn=<NllLossBackward0>)
tensor(2.3910, grad_fn=<NllLossBackward0>)
tensor(3.3588, grad_fn=<NllLossBackward0>)
tensor(2.3518, grad_fn=<NllLossBackward0>)
tensor(2.1474, grad_fn=<NllLossBackward0>)
tensor(2.4403, grad_fn=<NllLossBackward0>)
tensor(2.8934, grad_fn=<NllLossBackward0>)
tensor(3.0247, grad_fn=<NllLossBackward0>)
tensor(4.4901, grad_fn=<NllLossBackward0>)
tensor(2.8508, grad_fn=<NllLossBackward0>)
tensor(2.5357, grad_fn=<NllLossBackward0>)
tensor(3.35

tensor(2.5891, grad_fn=<NllLossBackward0>)
tensor(1.9227, grad_fn=<NllLossBackward0>)
tensor(2.5112, grad_fn=<NllLossBackward0>)
tensor(2.5854, grad_fn=<NllLossBackward0>)
tensor(4.5721, grad_fn=<NllLossBackward0>)
tensor(3.2828, grad_fn=<NllLossBackward0>)
tensor(3.0780, grad_fn=<NllLossBackward0>)
tensor(4.9171, grad_fn=<NllLossBackward0>)
tensor(1.7499, grad_fn=<NllLossBackward0>)
tensor(2.8204, grad_fn=<NllLossBackward0>)
tensor(2.4117, grad_fn=<NllLossBackward0>)
tensor(2.6702, grad_fn=<NllLossBackward0>)
tensor(2.2362, grad_fn=<NllLossBackward0>)
tensor(3.2941, grad_fn=<NllLossBackward0>)
tensor(4.7803, grad_fn=<NllLossBackward0>)
tensor(2.5038, grad_fn=<NllLossBackward0>)
tensor(2.5336, grad_fn=<NllLossBackward0>)
tensor(2.6093, grad_fn=<NllLossBackward0>)
tensor(2.9102, grad_fn=<NllLossBackward0>)
tensor(2.3969, grad_fn=<NllLossBackward0>)
tensor(3.4413, grad_fn=<NllLossBackward0>)
tensor(2.7596, grad_fn=<NllLossBackward0>)
tensor(4.6099, grad_fn=<NllLossBackward0>)
tensor(2.04

tensor(2.9750, grad_fn=<NllLossBackward0>)
tensor(2.9922, grad_fn=<NllLossBackward0>)
tensor(2.9363, grad_fn=<NllLossBackward0>)
tensor(2.0899, grad_fn=<NllLossBackward0>)
tensor(2.0473, grad_fn=<NllLossBackward0>)
tensor(2.0845, grad_fn=<NllLossBackward0>)
tensor(2.4195, grad_fn=<NllLossBackward0>)
tensor(3.1677, grad_fn=<NllLossBackward0>)
tensor(2.3934, grad_fn=<NllLossBackward0>)
tensor(2.4953, grad_fn=<NllLossBackward0>)
tensor(2.7785, grad_fn=<NllLossBackward0>)
tensor(2.9198, grad_fn=<NllLossBackward0>)
tensor(2.5833, grad_fn=<NllLossBackward0>)
tensor(3.9596, grad_fn=<NllLossBackward0>)
tensor(3.9451, grad_fn=<NllLossBackward0>)
tensor(2.0440, grad_fn=<NllLossBackward0>)
tensor(2.3318, grad_fn=<NllLossBackward0>)
tensor(2.2908, grad_fn=<NllLossBackward0>)
tensor(3.5796, grad_fn=<NllLossBackward0>)
tensor(2.3751, grad_fn=<NllLossBackward0>)
tensor(2.8636, grad_fn=<NllLossBackward0>)
tensor(2.7775, grad_fn=<NllLossBackward0>)
tensor(2.7130, grad_fn=<NllLossBackward0>)
tensor(3.86

tensor(3.6973, grad_fn=<NllLossBackward0>)
tensor(1.7601, grad_fn=<NllLossBackward0>)
tensor(2.1681, grad_fn=<NllLossBackward0>)
tensor(3.6458, grad_fn=<NllLossBackward0>)
tensor(3.1395, grad_fn=<NllLossBackward0>)
tensor(3.3533, grad_fn=<NllLossBackward0>)
tensor(4.0292, grad_fn=<NllLossBackward0>)
tensor(1.7537, grad_fn=<NllLossBackward0>)
tensor(2.9068, grad_fn=<NllLossBackward0>)
tensor(2.0315, grad_fn=<NllLossBackward0>)
tensor(2.9289, grad_fn=<NllLossBackward0>)
tensor(2.6123, grad_fn=<NllLossBackward0>)
tensor(4.3449, grad_fn=<NllLossBackward0>)
tensor(2.8647, grad_fn=<NllLossBackward0>)
tensor(2.4168, grad_fn=<NllLossBackward0>)
tensor(3.2159, grad_fn=<NllLossBackward0>)
tensor(2.5854, grad_fn=<NllLossBackward0>)
tensor(2.4203, grad_fn=<NllLossBackward0>)
tensor(2.2587, grad_fn=<NllLossBackward0>)
tensor(4.5461, grad_fn=<NllLossBackward0>)
tensor(3.1910, grad_fn=<NllLossBackward0>)
tensor(2.2510, grad_fn=<NllLossBackward0>)
tensor(2.5788, grad_fn=<NllLossBackward0>)
tensor(2.49

tensor(4.5893, grad_fn=<NllLossBackward0>)
tensor(3.5558, grad_fn=<NllLossBackward0>)
tensor(3.5180, grad_fn=<NllLossBackward0>)
tensor(2.7547, grad_fn=<NllLossBackward0>)
tensor(2.7918, grad_fn=<NllLossBackward0>)
tensor(3.0480, grad_fn=<NllLossBackward0>)
tensor(2.4331, grad_fn=<NllLossBackward0>)
tensor(2.9284, grad_fn=<NllLossBackward0>)
tensor(3.3702, grad_fn=<NllLossBackward0>)
tensor(2.0738, grad_fn=<NllLossBackward0>)
tensor(2.1570, grad_fn=<NllLossBackward0>)
tensor(2.4289, grad_fn=<NllLossBackward0>)
tensor(1.7502, grad_fn=<NllLossBackward0>)
tensor(3.3623, grad_fn=<NllLossBackward0>)
tensor(1.7373, grad_fn=<NllLossBackward0>)
tensor(3.0519, grad_fn=<NllLossBackward0>)
tensor(3.5618, grad_fn=<NllLossBackward0>)
tensor(2.3820, grad_fn=<NllLossBackward0>)
tensor(2.4534, grad_fn=<NllLossBackward0>)
tensor(1.9441, grad_fn=<NllLossBackward0>)
tensor(2.1236, grad_fn=<NllLossBackward0>)
tensor(3.1073, grad_fn=<NllLossBackward0>)
tensor(3.3789, grad_fn=<NllLossBackward0>)
tensor(3.17

tensor(3.0546, grad_fn=<NllLossBackward0>)
tensor(2.4579, grad_fn=<NllLossBackward0>)
tensor(2.3678, grad_fn=<NllLossBackward0>)
tensor(1.9741, grad_fn=<NllLossBackward0>)
tensor(3.0643, grad_fn=<NllLossBackward0>)
tensor(3.6381, grad_fn=<NllLossBackward0>)
tensor(3.0021, grad_fn=<NllLossBackward0>)
tensor(3.4979, grad_fn=<NllLossBackward0>)
tensor(2.1159, grad_fn=<NllLossBackward0>)
tensor(3.1509, grad_fn=<NllLossBackward0>)
tensor(2.7095, grad_fn=<NllLossBackward0>)
tensor(3.8831, grad_fn=<NllLossBackward0>)
tensor(2.3132, grad_fn=<NllLossBackward0>)
tensor(3.0622, grad_fn=<NllLossBackward0>)
tensor(2.5530, grad_fn=<NllLossBackward0>)
tensor(2.5228, grad_fn=<NllLossBackward0>)
tensor(2.1230, grad_fn=<NllLossBackward0>)
tensor(2.6556, grad_fn=<NllLossBackward0>)
tensor(4.5746, grad_fn=<NllLossBackward0>)
tensor(4.4056, grad_fn=<NllLossBackward0>)
tensor(3.0771, grad_fn=<NllLossBackward0>)
tensor(2.7346, grad_fn=<NllLossBackward0>)
tensor(2.2982, grad_fn=<NllLossBackward0>)
tensor(1.85

tensor(2.7024, grad_fn=<NllLossBackward0>)
tensor(2.5168, grad_fn=<NllLossBackward0>)
tensor(3.7217, grad_fn=<NllLossBackward0>)
tensor(2.3313, grad_fn=<NllLossBackward0>)
tensor(2.2217, grad_fn=<NllLossBackward0>)
tensor(2.8716, grad_fn=<NllLossBackward0>)
tensor(2.7998, grad_fn=<NllLossBackward0>)
tensor(2.9134, grad_fn=<NllLossBackward0>)
tensor(2.4056, grad_fn=<NllLossBackward0>)
tensor(2.3369, grad_fn=<NllLossBackward0>)
tensor(2.2965, grad_fn=<NllLossBackward0>)
tensor(2.0351, grad_fn=<NllLossBackward0>)
tensor(3.5921, grad_fn=<NllLossBackward0>)
tensor(3.3299, grad_fn=<NllLossBackward0>)
tensor(2.8981, grad_fn=<NllLossBackward0>)
tensor(3.4462, grad_fn=<NllLossBackward0>)
tensor(3.5709, grad_fn=<NllLossBackward0>)
tensor(2.0763, grad_fn=<NllLossBackward0>)
tensor(3.4435, grad_fn=<NllLossBackward0>)
tensor(2.4399, grad_fn=<NllLossBackward0>)
tensor(2.1776, grad_fn=<NllLossBackward0>)
tensor(3.4150, grad_fn=<NllLossBackward0>)
tensor(3.5243, grad_fn=<NllLossBackward0>)
tensor(2.07

tensor(2.4918, grad_fn=<NllLossBackward0>)
tensor(1.7438, grad_fn=<NllLossBackward0>)
tensor(4.0458, grad_fn=<NllLossBackward0>)
tensor(2.9808, grad_fn=<NllLossBackward0>)
tensor(2.2856, grad_fn=<NllLossBackward0>)
tensor(3.3548, grad_fn=<NllLossBackward0>)
tensor(2.0531, grad_fn=<NllLossBackward0>)
tensor(3.6267, grad_fn=<NllLossBackward0>)
tensor(2.5824, grad_fn=<NllLossBackward0>)
tensor(3.0288, grad_fn=<NllLossBackward0>)
tensor(2.3564, grad_fn=<NllLossBackward0>)
tensor(3.3493, grad_fn=<NllLossBackward0>)
tensor(2.4620, grad_fn=<NllLossBackward0>)
tensor(2.3673, grad_fn=<NllLossBackward0>)
tensor(3.5834, grad_fn=<NllLossBackward0>)
tensor(2.5126, grad_fn=<NllLossBackward0>)
tensor(2.4987, grad_fn=<NllLossBackward0>)
tensor(2.3185, grad_fn=<NllLossBackward0>)
tensor(2.8965, grad_fn=<NllLossBackward0>)
tensor(2.9720, grad_fn=<NllLossBackward0>)
tensor(2.7459, grad_fn=<NllLossBackward0>)
tensor(2.7054, grad_fn=<NllLossBackward0>)
tensor(1.7431, grad_fn=<NllLossBackward0>)
tensor(4.08

tensor(3.3975, grad_fn=<NllLossBackward0>)
tensor(2.5062, grad_fn=<NllLossBackward0>)
tensor(3.4784, grad_fn=<NllLossBackward0>)
tensor(2.9224, grad_fn=<NllLossBackward0>)
tensor(2.3314, grad_fn=<NllLossBackward0>)
tensor(3.3312, grad_fn=<NllLossBackward0>)
tensor(4.0070, grad_fn=<NllLossBackward0>)
tensor(1.9776, grad_fn=<NllLossBackward0>)
tensor(3.5698, grad_fn=<NllLossBackward0>)
tensor(3.6016, grad_fn=<NllLossBackward0>)
tensor(3.0404, grad_fn=<NllLossBackward0>)
tensor(2.9658, grad_fn=<NllLossBackward0>)
tensor(3.8887, grad_fn=<NllLossBackward0>)
tensor(2.9092, grad_fn=<NllLossBackward0>)
tensor(3.0868, grad_fn=<NllLossBackward0>)
tensor(3.2727, grad_fn=<NllLossBackward0>)
tensor(4.5274, grad_fn=<NllLossBackward0>)
tensor(1.7708, grad_fn=<NllLossBackward0>)
tensor(3.9558, grad_fn=<NllLossBackward0>)
tensor(2.8293, grad_fn=<NllLossBackward0>)
tensor(2.6234, grad_fn=<NllLossBackward0>)
tensor(3.1468, grad_fn=<NllLossBackward0>)
tensor(2.9380, grad_fn=<NllLossBackward0>)
tensor(4.07

tensor(2.2525, grad_fn=<NllLossBackward0>)
tensor(2.9439, grad_fn=<NllLossBackward0>)
tensor(2.3401, grad_fn=<NllLossBackward0>)
tensor(2.9260, grad_fn=<NllLossBackward0>)
tensor(2.7055, grad_fn=<NllLossBackward0>)
tensor(2.9075, grad_fn=<NllLossBackward0>)
tensor(3.3507, grad_fn=<NllLossBackward0>)
tensor(2.8537, grad_fn=<NllLossBackward0>)
tensor(2.6477, grad_fn=<NllLossBackward0>)
tensor(2.0911, grad_fn=<NllLossBackward0>)
tensor(3.3599, grad_fn=<NllLossBackward0>)
tensor(4.0959, grad_fn=<NllLossBackward0>)
tensor(2.3110, grad_fn=<NllLossBackward0>)
tensor(2.4758, grad_fn=<NllLossBackward0>)
tensor(3.1105, grad_fn=<NllLossBackward0>)
tensor(3.8192, grad_fn=<NllLossBackward0>)
tensor(2.2804, grad_fn=<NllLossBackward0>)
tensor(2.3337, grad_fn=<NllLossBackward0>)
tensor(2.3172, grad_fn=<NllLossBackward0>)
tensor(2.3125, grad_fn=<NllLossBackward0>)
tensor(1.7472, grad_fn=<NllLossBackward0>)
tensor(2.6206, grad_fn=<NllLossBackward0>)
tensor(3.3785, grad_fn=<NllLossBackward0>)
tensor(3.91

tensor(3.1110, grad_fn=<NllLossBackward0>)
tensor(2.3939, grad_fn=<NllLossBackward0>)
tensor(2.8869, grad_fn=<NllLossBackward0>)
tensor(2.4592, grad_fn=<NllLossBackward0>)
tensor(3.9131, grad_fn=<NllLossBackward0>)
tensor(2.4478, grad_fn=<NllLossBackward0>)
tensor(3.1179, grad_fn=<NllLossBackward0>)
tensor(2.2364, grad_fn=<NllLossBackward0>)
tensor(2.9475, grad_fn=<NllLossBackward0>)
tensor(3.5208, grad_fn=<NllLossBackward0>)
tensor(2.2352, grad_fn=<NllLossBackward0>)
tensor(2.9653, grad_fn=<NllLossBackward0>)
tensor(2.2781, grad_fn=<NllLossBackward0>)
tensor(2.4124, grad_fn=<NllLossBackward0>)
tensor(2.2651, grad_fn=<NllLossBackward0>)
tensor(2.4958, grad_fn=<NllLossBackward0>)
tensor(2.4195, grad_fn=<NllLossBackward0>)
tensor(3.4017, grad_fn=<NllLossBackward0>)
tensor(3.5343, grad_fn=<NllLossBackward0>)
tensor(2.4651, grad_fn=<NllLossBackward0>)
tensor(2.9132, grad_fn=<NllLossBackward0>)
tensor(1.8892, grad_fn=<NllLossBackward0>)
tensor(2.7156, grad_fn=<NllLossBackward0>)
tensor(3.94

tensor(3.2921, grad_fn=<NllLossBackward0>)
tensor(3.3838, grad_fn=<NllLossBackward0>)
tensor(3.3524, grad_fn=<NllLossBackward0>)
tensor(4.4372, grad_fn=<NllLossBackward0>)
tensor(2.9508, grad_fn=<NllLossBackward0>)
tensor(3.0300, grad_fn=<NllLossBackward0>)
tensor(2.2979, grad_fn=<NllLossBackward0>)
tensor(3.6184, grad_fn=<NllLossBackward0>)
tensor(2.8659, grad_fn=<NllLossBackward0>)
tensor(2.2649, grad_fn=<NllLossBackward0>)
tensor(2.3199, grad_fn=<NllLossBackward0>)
tensor(3.3882, grad_fn=<NllLossBackward0>)
tensor(2.5323, grad_fn=<NllLossBackward0>)
tensor(2.3612, grad_fn=<NllLossBackward0>)
tensor(3.0234, grad_fn=<NllLossBackward0>)
tensor(3.3660, grad_fn=<NllLossBackward0>)
tensor(2.2913, grad_fn=<NllLossBackward0>)
tensor(2.8040, grad_fn=<NllLossBackward0>)
tensor(2.3325, grad_fn=<NllLossBackward0>)
tensor(1.9945, grad_fn=<NllLossBackward0>)
tensor(2.3460, grad_fn=<NllLossBackward0>)
tensor(3.4713, grad_fn=<NllLossBackward0>)
tensor(2.2707, grad_fn=<NllLossBackward0>)
tensor(3.44

tensor(2.4429, grad_fn=<NllLossBackward0>)
tensor(2.2048, grad_fn=<NllLossBackward0>)
tensor(1.9613, grad_fn=<NllLossBackward0>)
tensor(2.2375, grad_fn=<NllLossBackward0>)
tensor(1.9244, grad_fn=<NllLossBackward0>)
tensor(3.4339, grad_fn=<NllLossBackward0>)
tensor(2.0841, grad_fn=<NllLossBackward0>)
tensor(2.0901, grad_fn=<NllLossBackward0>)
tensor(2.8996, grad_fn=<NllLossBackward0>)
tensor(3.2200, grad_fn=<NllLossBackward0>)
tensor(2.2888, grad_fn=<NllLossBackward0>)
tensor(3.3150, grad_fn=<NllLossBackward0>)
tensor(2.4178, grad_fn=<NllLossBackward0>)
tensor(2.7896, grad_fn=<NllLossBackward0>)
tensor(2.9296, grad_fn=<NllLossBackward0>)
tensor(1.9625, grad_fn=<NllLossBackward0>)
tensor(2.9969, grad_fn=<NllLossBackward0>)
tensor(1.7355, grad_fn=<NllLossBackward0>)
tensor(2.8109, grad_fn=<NllLossBackward0>)
tensor(2.3865, grad_fn=<NllLossBackward0>)
tensor(3.7762, grad_fn=<NllLossBackward0>)
tensor(1.7719, grad_fn=<NllLossBackward0>)
tensor(2.9938, grad_fn=<NllLossBackward0>)
tensor(2.38

tensor(4.2389, grad_fn=<NllLossBackward0>)
tensor(2.0736, grad_fn=<NllLossBackward0>)
tensor(1.9769, grad_fn=<NllLossBackward0>)
tensor(3.5611, grad_fn=<NllLossBackward0>)
tensor(2.3146, grad_fn=<NllLossBackward0>)
tensor(2.3603, grad_fn=<NllLossBackward0>)
tensor(2.2752, grad_fn=<NllLossBackward0>)
tensor(2.6569, grad_fn=<NllLossBackward0>)
tensor(3.4211, grad_fn=<NllLossBackward0>)
tensor(3.0179, grad_fn=<NllLossBackward0>)
tensor(2.1813, grad_fn=<NllLossBackward0>)
tensor(3.1884, grad_fn=<NllLossBackward0>)
tensor(1.7469, grad_fn=<NllLossBackward0>)
tensor(2.3604, grad_fn=<NllLossBackward0>)
tensor(2.8802, grad_fn=<NllLossBackward0>)
tensor(2.2542, grad_fn=<NllLossBackward0>)
tensor(3.4608, grad_fn=<NllLossBackward0>)
tensor(3.5680, grad_fn=<NllLossBackward0>)
tensor(2.4328, grad_fn=<NllLossBackward0>)
tensor(2.8412, grad_fn=<NllLossBackward0>)
tensor(4.3633, grad_fn=<NllLossBackward0>)
tensor(2.0349, grad_fn=<NllLossBackward0>)
tensor(2.8654, grad_fn=<NllLossBackward0>)
tensor(2.16

tensor(2.2414, grad_fn=<NllLossBackward0>)
tensor(2.3391, grad_fn=<NllLossBackward0>)
tensor(2.9003, grad_fn=<NllLossBackward0>)
tensor(2.8033, grad_fn=<NllLossBackward0>)
tensor(2.9564, grad_fn=<NllLossBackward0>)
tensor(2.9091, grad_fn=<NllLossBackward0>)
tensor(2.6094, grad_fn=<NllLossBackward0>)
tensor(3.3694, grad_fn=<NllLossBackward0>)
tensor(2.4893, grad_fn=<NllLossBackward0>)
tensor(3.0224, grad_fn=<NllLossBackward0>)
tensor(2.4030, grad_fn=<NllLossBackward0>)
tensor(3.0142, grad_fn=<NllLossBackward0>)
tensor(2.4353, grad_fn=<NllLossBackward0>)
tensor(3.4374, grad_fn=<NllLossBackward0>)
tensor(2.5667, grad_fn=<NllLossBackward0>)
tensor(3.3538, grad_fn=<NllLossBackward0>)
tensor(3.9829, grad_fn=<NllLossBackward0>)
tensor(2.5225, grad_fn=<NllLossBackward0>)
tensor(3.3243, grad_fn=<NllLossBackward0>)
tensor(1.7319, grad_fn=<NllLossBackward0>)
tensor(1.7250, grad_fn=<NllLossBackward0>)
tensor(2.4774, grad_fn=<NllLossBackward0>)
tensor(2.4466, grad_fn=<NllLossBackward0>)
tensor(3.88

tensor(2.8122, grad_fn=<NllLossBackward0>)
tensor(2.2271, grad_fn=<NllLossBackward0>)
tensor(2.5573, grad_fn=<NllLossBackward0>)
tensor(2.3236, grad_fn=<NllLossBackward0>)
tensor(2.4687, grad_fn=<NllLossBackward0>)
tensor(3.9700, grad_fn=<NllLossBackward0>)
tensor(4.0302, grad_fn=<NllLossBackward0>)
tensor(2.7333, grad_fn=<NllLossBackward0>)
tensor(2.0672, grad_fn=<NllLossBackward0>)
tensor(3.0597, grad_fn=<NllLossBackward0>)
tensor(2.9575, grad_fn=<NllLossBackward0>)
tensor(2.3584, grad_fn=<NllLossBackward0>)
tensor(2.6983, grad_fn=<NllLossBackward0>)
tensor(3.3840, grad_fn=<NllLossBackward0>)
tensor(3.0549, grad_fn=<NllLossBackward0>)
tensor(2.0698, grad_fn=<NllLossBackward0>)
tensor(2.5809, grad_fn=<NllLossBackward0>)
tensor(3.3228, grad_fn=<NllLossBackward0>)
tensor(2.3794, grad_fn=<NllLossBackward0>)
tensor(3.9902, grad_fn=<NllLossBackward0>)
tensor(1.7649, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(2.6238, grad_fn=<NllLossBackward0>)
tensor(3.37

tensor(3.1581, grad_fn=<NllLossBackward0>)
tensor(3.2605, grad_fn=<NllLossBackward0>)
tensor(2.7864, grad_fn=<NllLossBackward0>)
tensor(2.3321, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.1646, grad_fn=<NllLossBackward0>)
tensor(2.3230, grad_fn=<NllLossBackward0>)
tensor(2.3022, grad_fn=<NllLossBackward0>)
tensor(2.4749, grad_fn=<NllLossBackward0>)
tensor(2.8787, grad_fn=<NllLossBackward0>)
tensor(4.2318, grad_fn=<NllLossBackward0>)
tensor(3.5272, grad_fn=<NllLossBackward0>)
tensor(3.8983, grad_fn=<NllLossBackward0>)
tensor(2.9053, grad_fn=<NllLossBackward0>)
tensor(3.3197, grad_fn=<NllLossBackward0>)
tensor(3.7722, grad_fn=<NllLossBackward0>)
tensor(2.2650, grad_fn=<NllLossBackward0>)
tensor(2.8468, grad_fn=<NllLossBackward0>)
tensor(3.1855, grad_fn=<NllLossBackward0>)
tensor(3.6323, grad_fn=<NllLossBackward0>)
tensor(4.5281, grad_fn=<NllLossBackward0>)
tensor(2.1694, grad_fn=<NllLossBackward0>)
tensor(2.0081, grad_fn=<NllLossBackward0>)
tensor(2.92

tensor(1.9709, grad_fn=<NllLossBackward0>)
tensor(2.9159, grad_fn=<NllLossBackward0>)
tensor(2.3766, grad_fn=<NllLossBackward0>)
tensor(2.8929, grad_fn=<NllLossBackward0>)
tensor(2.6040, grad_fn=<NllLossBackward0>)
tensor(2.3401, grad_fn=<NllLossBackward0>)
tensor(2.8354, grad_fn=<NllLossBackward0>)
tensor(2.8968, grad_fn=<NllLossBackward0>)
tensor(2.1829, grad_fn=<NllLossBackward0>)
tensor(2.9684, grad_fn=<NllLossBackward0>)
tensor(2.9728, grad_fn=<NllLossBackward0>)
tensor(2.2170, grad_fn=<NllLossBackward0>)
tensor(2.1521, grad_fn=<NllLossBackward0>)
tensor(2.4044, grad_fn=<NllLossBackward0>)
tensor(2.7033, grad_fn=<NllLossBackward0>)
tensor(2.1192, grad_fn=<NllLossBackward0>)
tensor(4.0067, grad_fn=<NllLossBackward0>)
tensor(2.8894, grad_fn=<NllLossBackward0>)
tensor(2.2795, grad_fn=<NllLossBackward0>)
tensor(3.6252, grad_fn=<NllLossBackward0>)
tensor(2.8743, grad_fn=<NllLossBackward0>)
tensor(2.6948, grad_fn=<NllLossBackward0>)
tensor(2.0910, grad_fn=<NllLossBackward0>)
tensor(1.96

tensor(4.2434, grad_fn=<NllLossBackward0>)
tensor(2.3537, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.7539, grad_fn=<NllLossBackward0>)
tensor(2.1464, grad_fn=<NllLossBackward0>)
tensor(2.4500, grad_fn=<NllLossBackward0>)
tensor(2.0005, grad_fn=<NllLossBackward0>)
tensor(2.7811, grad_fn=<NllLossBackward0>)
tensor(2.1325, grad_fn=<NllLossBackward0>)
tensor(2.8134, grad_fn=<NllLossBackward0>)
tensor(2.0958, grad_fn=<NllLossBackward0>)
tensor(2.9475, grad_fn=<NllLossBackward0>)
tensor(2.6717, grad_fn=<NllLossBackward0>)
tensor(2.5922, grad_fn=<NllLossBackward0>)
tensor(2.8822, grad_fn=<NllLossBackward0>)
tensor(3.5463, grad_fn=<NllLossBackward0>)
tensor(2.1330, grad_fn=<NllLossBackward0>)
tensor(2.2963, grad_fn=<NllLossBackward0>)
tensor(2.5379, grad_fn=<NllLossBackward0>)
tensor(3.1978, grad_fn=<NllLossBackward0>)
tensor(1.9907, grad_fn=<NllLossBackward0>)
tensor(3.6678, grad_fn=<NllLossBackward0>)
tensor(2.6188, grad_fn=<NllLossBackward0>)
tensor(2.38

tensor(3.1118, grad_fn=<NllLossBackward0>)
tensor(2.8722, grad_fn=<NllLossBackward0>)
tensor(2.3412, grad_fn=<NllLossBackward0>)
tensor(3.0017, grad_fn=<NllLossBackward0>)
tensor(2.3330, grad_fn=<NllLossBackward0>)
tensor(3.6563, grad_fn=<NllLossBackward0>)
tensor(1.9707, grad_fn=<NllLossBackward0>)
tensor(3.3252, grad_fn=<NllLossBackward0>)
tensor(2.4936, grad_fn=<NllLossBackward0>)
tensor(2.7535, grad_fn=<NllLossBackward0>)
tensor(2.7649, grad_fn=<NllLossBackward0>)
tensor(2.5115, grad_fn=<NllLossBackward0>)
tensor(2.3566, grad_fn=<NllLossBackward0>)
tensor(2.5884, grad_fn=<NllLossBackward0>)
tensor(2.3132, grad_fn=<NllLossBackward0>)
tensor(2.8324, grad_fn=<NllLossBackward0>)
tensor(2.2225, grad_fn=<NllLossBackward0>)
tensor(2.6812, grad_fn=<NllLossBackward0>)
tensor(1.7196, grad_fn=<NllLossBackward0>)
tensor(1.7116, grad_fn=<NllLossBackward0>)
tensor(3.2807, grad_fn=<NllLossBackward0>)
tensor(3.0531, grad_fn=<NllLossBackward0>)
tensor(3.9953, grad_fn=<NllLossBackward0>)
tensor(3.25

tensor(3.5584, grad_fn=<NllLossBackward0>)
tensor(2.4626, grad_fn=<NllLossBackward0>)
tensor(3.4827, grad_fn=<NllLossBackward0>)
tensor(2.3568, grad_fn=<NllLossBackward0>)
tensor(2.8328, grad_fn=<NllLossBackward0>)
tensor(1.7343, grad_fn=<NllLossBackward0>)
tensor(2.3185, grad_fn=<NllLossBackward0>)
tensor(1.7305, grad_fn=<NllLossBackward0>)
tensor(2.5635, grad_fn=<NllLossBackward0>)
tensor(2.9279, grad_fn=<NllLossBackward0>)
tensor(3.5162, grad_fn=<NllLossBackward0>)
tensor(3.3075, grad_fn=<NllLossBackward0>)
tensor(2.8202, grad_fn=<NllLossBackward0>)
tensor(3.5125, grad_fn=<NllLossBackward0>)
tensor(2.2846, grad_fn=<NllLossBackward0>)
tensor(2.4363, grad_fn=<NllLossBackward0>)
tensor(3.2939, grad_fn=<NllLossBackward0>)
tensor(2.2569, grad_fn=<NllLossBackward0>)
tensor(2.8340, grad_fn=<NllLossBackward0>)
tensor(2.4064, grad_fn=<NllLossBackward0>)
tensor(3.0341, grad_fn=<NllLossBackward0>)
tensor(2.4618, grad_fn=<NllLossBackward0>)
tensor(2.6948, grad_fn=<NllLossBackward0>)
tensor(2.52

tensor(4.2382, grad_fn=<NllLossBackward0>)
tensor(2.9333, grad_fn=<NllLossBackward0>)
tensor(2.8869, grad_fn=<NllLossBackward0>)
tensor(2.5164, grad_fn=<NllLossBackward0>)
tensor(2.8849, grad_fn=<NllLossBackward0>)
tensor(2.1998, grad_fn=<NllLossBackward0>)
tensor(2.2402, grad_fn=<NllLossBackward0>)
tensor(3.5239, grad_fn=<NllLossBackward0>)
tensor(2.3902, grad_fn=<NllLossBackward0>)
tensor(1.7569, grad_fn=<NllLossBackward0>)
tensor(2.2627, grad_fn=<NllLossBackward0>)
tensor(3.7837, grad_fn=<NllLossBackward0>)
tensor(2.3627, grad_fn=<NllLossBackward0>)
tensor(3.9167, grad_fn=<NllLossBackward0>)
tensor(2.3285, grad_fn=<NllLossBackward0>)
tensor(2.6021, grad_fn=<NllLossBackward0>)
tensor(1.9951, grad_fn=<NllLossBackward0>)
tensor(2.4094, grad_fn=<NllLossBackward0>)
tensor(3.5216, grad_fn=<NllLossBackward0>)
tensor(1.7664, grad_fn=<NllLossBackward0>)
tensor(3.5456, grad_fn=<NllLossBackward0>)
tensor(2.0890, grad_fn=<NllLossBackward0>)
tensor(3.5265, grad_fn=<NllLossBackward0>)
tensor(3.39

tensor(4.0485, grad_fn=<NllLossBackward0>)
tensor(2.3464, grad_fn=<NllLossBackward0>)
tensor(2.9031, grad_fn=<NllLossBackward0>)
tensor(2.7159, grad_fn=<NllLossBackward0>)
tensor(3.3597, grad_fn=<NllLossBackward0>)
tensor(2.3429, grad_fn=<NllLossBackward0>)
tensor(3.5339, grad_fn=<NllLossBackward0>)
tensor(2.9132, grad_fn=<NllLossBackward0>)
tensor(3.0337, grad_fn=<NllLossBackward0>)
tensor(1.7697, grad_fn=<NllLossBackward0>)
tensor(2.2257, grad_fn=<NllLossBackward0>)
tensor(3.3878, grad_fn=<NllLossBackward0>)
tensor(2.2709, grad_fn=<NllLossBackward0>)
tensor(3.5912, grad_fn=<NllLossBackward0>)
tensor(2.5406, grad_fn=<NllLossBackward0>)
tensor(2.4465, grad_fn=<NllLossBackward0>)
tensor(2.2071, grad_fn=<NllLossBackward0>)
tensor(2.4245, grad_fn=<NllLossBackward0>)
tensor(1.9290, grad_fn=<NllLossBackward0>)
tensor(3.0172, grad_fn=<NllLossBackward0>)
tensor(2.5676, grad_fn=<NllLossBackward0>)
tensor(2.8669, grad_fn=<NllLossBackward0>)
tensor(2.8540, grad_fn=<NllLossBackward0>)
tensor(2.48

tensor(2.8739, grad_fn=<NllLossBackward0>)
tensor(2.9011, grad_fn=<NllLossBackward0>)
tensor(3.5948, grad_fn=<NllLossBackward0>)
tensor(3.9149, grad_fn=<NllLossBackward0>)
tensor(2.0949, grad_fn=<NllLossBackward0>)
tensor(2.8702, grad_fn=<NllLossBackward0>)
tensor(2.8196, grad_fn=<NllLossBackward0>)
tensor(2.2543, grad_fn=<NllLossBackward0>)
tensor(3.2496, grad_fn=<NllLossBackward0>)
tensor(3.0814, grad_fn=<NllLossBackward0>)
tensor(2.4894, grad_fn=<NllLossBackward0>)
tensor(2.9819, grad_fn=<NllLossBackward0>)
tensor(3.3618, grad_fn=<NllLossBackward0>)
tensor(2.9654, grad_fn=<NllLossBackward0>)
tensor(3.9572, grad_fn=<NllLossBackward0>)
tensor(1.8502, grad_fn=<NllLossBackward0>)
tensor(2.3516, grad_fn=<NllLossBackward0>)
tensor(3.1281, grad_fn=<NllLossBackward0>)
tensor(1.9478, grad_fn=<NllLossBackward0>)
tensor(2.8990, grad_fn=<NllLossBackward0>)
tensor(2.3285, grad_fn=<NllLossBackward0>)
tensor(2.3096, grad_fn=<NllLossBackward0>)
tensor(2.3659, grad_fn=<NllLossBackward0>)
tensor(1.92

tensor(1.7921, grad_fn=<NllLossBackward0>)
tensor(2.9726, grad_fn=<NllLossBackward0>)
tensor(2.5131, grad_fn=<NllLossBackward0>)
tensor(3.3918, grad_fn=<NllLossBackward0>)
tensor(3.0142, grad_fn=<NllLossBackward0>)
tensor(2.7109, grad_fn=<NllLossBackward0>)
tensor(2.2096, grad_fn=<NllLossBackward0>)
tensor(3.3941, grad_fn=<NllLossBackward0>)
tensor(3.0127, grad_fn=<NllLossBackward0>)
tensor(4.9967, grad_fn=<NllLossBackward0>)
tensor(4.1359, grad_fn=<NllLossBackward0>)
tensor(2.8855, grad_fn=<NllLossBackward0>)
tensor(3.0631, grad_fn=<NllLossBackward0>)
tensor(3.4011, grad_fn=<NllLossBackward0>)
tensor(2.6865, grad_fn=<NllLossBackward0>)
tensor(2.3261, grad_fn=<NllLossBackward0>)
tensor(2.8548, grad_fn=<NllLossBackward0>)
tensor(2.8604, grad_fn=<NllLossBackward0>)
tensor(2.2505, grad_fn=<NllLossBackward0>)
tensor(2.8157, grad_fn=<NllLossBackward0>)
tensor(2.8907, grad_fn=<NllLossBackward0>)
tensor(2.2243, grad_fn=<NllLossBackward0>)
tensor(2.3130, grad_fn=<NllLossBackward0>)
tensor(3.02

tensor(2.4440, grad_fn=<NllLossBackward0>)
tensor(3.1063, grad_fn=<NllLossBackward0>)
tensor(2.5108, grad_fn=<NllLossBackward0>)
tensor(2.7662, grad_fn=<NllLossBackward0>)
tensor(2.6894, grad_fn=<NllLossBackward0>)
tensor(2.3407, grad_fn=<NllLossBackward0>)
tensor(2.1189, grad_fn=<NllLossBackward0>)
tensor(2.7411, grad_fn=<NllLossBackward0>)
tensor(3.9262, grad_fn=<NllLossBackward0>)
tensor(2.5084, grad_fn=<NllLossBackward0>)
tensor(2.1251, grad_fn=<NllLossBackward0>)
tensor(3.3814, grad_fn=<NllLossBackward0>)
tensor(2.0987, grad_fn=<NllLossBackward0>)
tensor(2.7005, grad_fn=<NllLossBackward0>)
tensor(3.3350, grad_fn=<NllLossBackward0>)
tensor(2.7394, grad_fn=<NllLossBackward0>)
tensor(3.8217, grad_fn=<NllLossBackward0>)
tensor(3.7488, grad_fn=<NllLossBackward0>)
tensor(2.2977, grad_fn=<NllLossBackward0>)
tensor(2.4237, grad_fn=<NllLossBackward0>)
tensor(2.5998, grad_fn=<NllLossBackward0>)
tensor(1.7806, grad_fn=<NllLossBackward0>)
tensor(2.4503, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(1.7851, grad_fn=<NllLossBackward0>)
tensor(2.3159, grad_fn=<NllLossBackward0>)
tensor(2.2508, grad_fn=<NllLossBackward0>)
tensor(3.5855, grad_fn=<NllLossBackward0>)
tensor(3.7489, grad_fn=<NllLossBackward0>)
tensor(3.1963, grad_fn=<NllLossBackward0>)
tensor(3.5815, grad_fn=<NllLossBackward0>)
tensor(2.2970, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.0835, grad_fn=<NllLossBackward0>)
tensor(1.9523, grad_fn=<NllLossBackward0>)
tensor(2.7622, grad_fn=<NllLossBackward0>)
tensor(2.3115, grad_fn=<NllLossBackward0>)
tensor(2.3760, grad_fn=<NllLossBackward0>)
tensor(3.3460, grad_fn=<NllLossBackward0>)
tensor(3.2358, grad_fn=<NllLossBackward0>)
tensor(3.4309, grad_fn=<NllLossBackward0>)
tensor(3.3747, grad_fn=<NllLossBackward0>)
tensor(2.5191, grad_fn=<NllLossBackward0>)
tensor(3.6176, grad_fn=<NllLossBackward0>)
tensor(1.8962, grad_fn=<NllLossBackward0>)
tensor(2.0382, grad_fn=<NllLossBackward0>)
tensor(3.2447, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(2.5508, grad_fn=<NllLossBackward0>)
tensor(3.0876, grad_fn=<NllLossBackward0>)
tensor(3.0663, grad_fn=<NllLossBackward0>)
tensor(2.3686, grad_fn=<NllLossBackward0>)
tensor(2.7398, grad_fn=<NllLossBackward0>)
tensor(3.1399, grad_fn=<NllLossBackward0>)
tensor(2.0545, grad_fn=<NllLossBackward0>)
tensor(2.1472, grad_fn=<NllLossBackward0>)
tensor(2.2494, grad_fn=<NllLossBackward0>)
tensor(3.3041, grad_fn=<NllLossBackward0>)
tensor(2.3349, grad_fn=<NllLossBackward0>)
tensor(2.4518, grad_fn=<NllLossBackward0>)
tensor(3.0590, grad_fn=<NllLossBackward0>)
tensor(3.4810, grad_fn=<NllLossBackward0>)
tensor(2.8732, grad_fn=<NllLossBackward0>)
tensor(2.7443, grad_fn=<NllLossBackward0>)
tensor(2.3274, grad_fn=<NllLossBackward0>)
tensor(2.0675, grad_fn=<NllLossBackward0>)
tensor(2.2523, grad_fn=<NllLossBackward0>)
tensor(2.9570, grad_fn=<NllLossBackward0>)
tensor(3.4900, grad_fn=<NllLossBackward0>)
tensor(2.4321, grad_fn=<NllLossBackward0>)
tensor(2.6557, grad_fn=<NllLossBackward0>)
tensor(2.71

tensor(1.9877, grad_fn=<NllLossBackward0>)
tensor(2.9853, grad_fn=<NllLossBackward0>)
tensor(2.7491, grad_fn=<NllLossBackward0>)
tensor(3.1470, grad_fn=<NllLossBackward0>)
tensor(2.7950, grad_fn=<NllLossBackward0>)
tensor(2.2876, grad_fn=<NllLossBackward0>)
tensor(3.1771, grad_fn=<NllLossBackward0>)
tensor(3.5168, grad_fn=<NllLossBackward0>)
tensor(4.4762, grad_fn=<NllLossBackward0>)
tensor(1.7839, grad_fn=<NllLossBackward0>)
tensor(2.9549, grad_fn=<NllLossBackward0>)
tensor(3.0763, grad_fn=<NllLossBackward0>)
tensor(2.0566, grad_fn=<NllLossBackward0>)
tensor(2.2611, grad_fn=<NllLossBackward0>)
tensor(2.2373, grad_fn=<NllLossBackward0>)
tensor(2.5220, grad_fn=<NllLossBackward0>)
tensor(2.0760, grad_fn=<NllLossBackward0>)
tensor(2.3324, grad_fn=<NllLossBackward0>)
tensor(2.8504, grad_fn=<NllLossBackward0>)
tensor(2.9745, grad_fn=<NllLossBackward0>)
tensor(2.0166, grad_fn=<NllLossBackward0>)
tensor(2.6281, grad_fn=<NllLossBackward0>)
tensor(2.6158, grad_fn=<NllLossBackward0>)
tensor(2.28

tensor(2.9296, grad_fn=<NllLossBackward0>)
tensor(4.0525, grad_fn=<NllLossBackward0>)
tensor(3.6364, grad_fn=<NllLossBackward0>)
tensor(2.5591, grad_fn=<NllLossBackward0>)
tensor(3.4442, grad_fn=<NllLossBackward0>)
tensor(2.2714, grad_fn=<NllLossBackward0>)
tensor(2.3695, grad_fn=<NllLossBackward0>)
tensor(2.4046, grad_fn=<NllLossBackward0>)
tensor(1.9930, grad_fn=<NllLossBackward0>)
tensor(2.4051, grad_fn=<NllLossBackward0>)
tensor(2.7133, grad_fn=<NllLossBackward0>)
tensor(3.6702, grad_fn=<NllLossBackward0>)
tensor(2.7044, grad_fn=<NllLossBackward0>)
tensor(2.2974, grad_fn=<NllLossBackward0>)
tensor(2.8523, grad_fn=<NllLossBackward0>)
tensor(3.0247, grad_fn=<NllLossBackward0>)
tensor(2.7608, grad_fn=<NllLossBackward0>)
tensor(2.2879, grad_fn=<NllLossBackward0>)
tensor(2.4230, grad_fn=<NllLossBackward0>)
tensor(3.0932, grad_fn=<NllLossBackward0>)
tensor(2.4871, grad_fn=<NllLossBackward0>)
tensor(4.0420, grad_fn=<NllLossBackward0>)
tensor(3.0295, grad_fn=<NllLossBackward0>)
tensor(2.31

tensor(2.9564, grad_fn=<NllLossBackward0>)
tensor(2.5818, grad_fn=<NllLossBackward0>)
tensor(3.0131, grad_fn=<NllLossBackward0>)
tensor(2.8900, grad_fn=<NllLossBackward0>)
tensor(2.6536, grad_fn=<NllLossBackward0>)
tensor(2.8163, grad_fn=<NllLossBackward0>)
tensor(3.2066, grad_fn=<NllLossBackward0>)
tensor(2.6384, grad_fn=<NllLossBackward0>)
tensor(4.1723, grad_fn=<NllLossBackward0>)
tensor(2.9316, grad_fn=<NllLossBackward0>)
tensor(2.2607, grad_fn=<NllLossBackward0>)
tensor(2.4589, grad_fn=<NllLossBackward0>)
tensor(3.1309, grad_fn=<NllLossBackward0>)
tensor(3.2217, grad_fn=<NllLossBackward0>)
tensor(2.0667, grad_fn=<NllLossBackward0>)
tensor(3.0562, grad_fn=<NllLossBackward0>)
tensor(2.5556, grad_fn=<NllLossBackward0>)
tensor(2.6158, grad_fn=<NllLossBackward0>)
tensor(3.4125, grad_fn=<NllLossBackward0>)
tensor(2.8031, grad_fn=<NllLossBackward0>)
tensor(2.0877, grad_fn=<NllLossBackward0>)
tensor(2.4350, grad_fn=<NllLossBackward0>)
tensor(2.8460, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(1.9745, grad_fn=<NllLossBackward0>)
tensor(3.9923, grad_fn=<NllLossBackward0>)
tensor(3.0023, grad_fn=<NllLossBackward0>)
tensor(2.3247, grad_fn=<NllLossBackward0>)
tensor(3.0221, grad_fn=<NllLossBackward0>)
tensor(2.1538, grad_fn=<NllLossBackward0>)
tensor(2.0852, grad_fn=<NllLossBackward0>)
tensor(3.6821, grad_fn=<NllLossBackward0>)
tensor(2.8494, grad_fn=<NllLossBackward0>)
tensor(3.1983, grad_fn=<NllLossBackward0>)
tensor(2.6416, grad_fn=<NllLossBackward0>)
tensor(3.5580, grad_fn=<NllLossBackward0>)
tensor(2.2745, grad_fn=<NllLossBackward0>)
tensor(4.6427, grad_fn=<NllLossBackward0>)
tensor(1.9489, grad_fn=<NllLossBackward0>)
tensor(4.0199, grad_fn=<NllLossBackward0>)
tensor(4.7150, grad_fn=<NllLossBackward0>)
tensor(2.4928, grad_fn=<NllLossBackward0>)
tensor(3.2672, grad_fn=<NllLossBackward0>)
tensor(3.2171, grad_fn=<NllLossBackward0>)
tensor(3.7230, grad_fn=<NllLossBackward0>)
tensor(2.3148, grad_fn=<NllLossBackward0>)
tensor(3.3997, grad_fn=<NllLossBackward0>)
tensor(2.02

tensor(2.9660, grad_fn=<NllLossBackward0>)
tensor(2.4889, grad_fn=<NllLossBackward0>)
tensor(2.4597, grad_fn=<NllLossBackward0>)
tensor(2.8408, grad_fn=<NllLossBackward0>)
tensor(2.0357, grad_fn=<NllLossBackward0>)
tensor(2.4311, grad_fn=<NllLossBackward0>)
tensor(2.2969, grad_fn=<NllLossBackward0>)
tensor(3.4808, grad_fn=<NllLossBackward0>)
tensor(2.5094, grad_fn=<NllLossBackward0>)
tensor(2.0471, grad_fn=<NllLossBackward0>)
tensor(2.4580, grad_fn=<NllLossBackward0>)
tensor(3.0425, grad_fn=<NllLossBackward0>)
tensor(2.4468, grad_fn=<NllLossBackward0>)
tensor(2.7450, grad_fn=<NllLossBackward0>)
tensor(3.7326, grad_fn=<NllLossBackward0>)
tensor(2.0378, grad_fn=<NllLossBackward0>)
tensor(2.5537, grad_fn=<NllLossBackward0>)
tensor(2.0758, grad_fn=<NllLossBackward0>)
tensor(2.0574, grad_fn=<NllLossBackward0>)
tensor(2.9842, grad_fn=<NllLossBackward0>)
tensor(3.4355, grad_fn=<NllLossBackward0>)
tensor(1.8223, grad_fn=<NllLossBackward0>)
tensor(3.0035, grad_fn=<NllLossBackward0>)
tensor(4.26

tensor(2.2916, grad_fn=<NllLossBackward0>)
tensor(3.1702, grad_fn=<NllLossBackward0>)
tensor(3.4354, grad_fn=<NllLossBackward0>)
tensor(2.8074, grad_fn=<NllLossBackward0>)
tensor(2.4800, grad_fn=<NllLossBackward0>)
tensor(2.3675, grad_fn=<NllLossBackward0>)
tensor(2.0657, grad_fn=<NllLossBackward0>)
tensor(2.8246, grad_fn=<NllLossBackward0>)
tensor(3.0312, grad_fn=<NllLossBackward0>)
tensor(2.9981, grad_fn=<NllLossBackward0>)
tensor(2.3193, grad_fn=<NllLossBackward0>)
tensor(3.3174, grad_fn=<NllLossBackward0>)
tensor(2.3215, grad_fn=<NllLossBackward0>)
tensor(2.4728, grad_fn=<NllLossBackward0>)
tensor(2.3851, grad_fn=<NllLossBackward0>)
tensor(3.8889, grad_fn=<NllLossBackward0>)
tensor(2.7611, grad_fn=<NllLossBackward0>)
tensor(2.2873, grad_fn=<NllLossBackward0>)
tensor(2.2243, grad_fn=<NllLossBackward0>)
tensor(2.5025, grad_fn=<NllLossBackward0>)
tensor(2.9104, grad_fn=<NllLossBackward0>)
tensor(2.9843, grad_fn=<NllLossBackward0>)
tensor(4.0509, grad_fn=<NllLossBackward0>)
tensor(3.50

tensor(2.8226, grad_fn=<NllLossBackward0>)
tensor(2.2747, grad_fn=<NllLossBackward0>)
tensor(2.4026, grad_fn=<NllLossBackward0>)
tensor(3.8327, grad_fn=<NllLossBackward0>)
tensor(3.7107, grad_fn=<NllLossBackward0>)
tensor(2.9297, grad_fn=<NllLossBackward0>)
tensor(2.8623, grad_fn=<NllLossBackward0>)
tensor(3.6367, grad_fn=<NllLossBackward0>)
tensor(2.4066, grad_fn=<NllLossBackward0>)
tensor(2.4035, grad_fn=<NllLossBackward0>)
tensor(2.0406, grad_fn=<NllLossBackward0>)
tensor(2.3113, grad_fn=<NllLossBackward0>)
tensor(2.5266, grad_fn=<NllLossBackward0>)
tensor(2.2462, grad_fn=<NllLossBackward0>)
tensor(3.7700, grad_fn=<NllLossBackward0>)
tensor(3.4744, grad_fn=<NllLossBackward0>)
tensor(2.4591, grad_fn=<NllLossBackward0>)
tensor(3.9754, grad_fn=<NllLossBackward0>)
tensor(2.4405, grad_fn=<NllLossBackward0>)
tensor(3.0107, grad_fn=<NllLossBackward0>)
tensor(2.9776, grad_fn=<NllLossBackward0>)
tensor(2.2918, grad_fn=<NllLossBackward0>)
tensor(2.4290, grad_fn=<NllLossBackward0>)
tensor(2.87

tensor(2.4761, grad_fn=<NllLossBackward0>)
tensor(3.5271, grad_fn=<NllLossBackward0>)
tensor(3.4750, grad_fn=<NllLossBackward0>)
tensor(1.8866, grad_fn=<NllLossBackward0>)
tensor(3.5634, grad_fn=<NllLossBackward0>)
tensor(2.9861, grad_fn=<NllLossBackward0>)
tensor(2.6959, grad_fn=<NllLossBackward0>)
tensor(2.5229, grad_fn=<NllLossBackward0>)
tensor(3.1590, grad_fn=<NllLossBackward0>)
tensor(2.8897, grad_fn=<NllLossBackward0>)
tensor(2.8062, grad_fn=<NllLossBackward0>)
tensor(1.8884, grad_fn=<NllLossBackward0>)
tensor(3.3018, grad_fn=<NllLossBackward0>)
tensor(2.9640, grad_fn=<NllLossBackward0>)
tensor(2.4399, grad_fn=<NllLossBackward0>)
tensor(3.5563, grad_fn=<NllLossBackward0>)
tensor(3.7176, grad_fn=<NllLossBackward0>)
tensor(4.5759, grad_fn=<NllLossBackward0>)
tensor(3.0499, grad_fn=<NllLossBackward0>)
tensor(2.9762, grad_fn=<NllLossBackward0>)
tensor(2.4172, grad_fn=<NllLossBackward0>)
tensor(3.3845, grad_fn=<NllLossBackward0>)
tensor(3.5129, grad_fn=<NllLossBackward0>)
tensor(2.44

tensor(3.3972, grad_fn=<NllLossBackward0>)
tensor(2.1750, grad_fn=<NllLossBackward0>)
tensor(2.4202, grad_fn=<NllLossBackward0>)
tensor(2.1638, grad_fn=<NllLossBackward0>)
tensor(2.3207, grad_fn=<NllLossBackward0>)
tensor(1.8715, grad_fn=<NllLossBackward0>)
tensor(3.4726, grad_fn=<NllLossBackward0>)
tensor(2.1259, grad_fn=<NllLossBackward0>)
tensor(2.9088, grad_fn=<NllLossBackward0>)
tensor(3.3541, grad_fn=<NllLossBackward0>)
tensor(2.9956, grad_fn=<NllLossBackward0>)
tensor(2.2877, grad_fn=<NllLossBackward0>)
tensor(2.6533, grad_fn=<NllLossBackward0>)
tensor(2.3271, grad_fn=<NllLossBackward0>)
tensor(2.9994, grad_fn=<NllLossBackward0>)
tensor(1.9555, grad_fn=<NllLossBackward0>)
tensor(2.9896, grad_fn=<NllLossBackward0>)
tensor(2.7089, grad_fn=<NllLossBackward0>)
tensor(4.2205, grad_fn=<NllLossBackward0>)
tensor(1.9453, grad_fn=<NllLossBackward0>)
tensor(2.2517, grad_fn=<NllLossBackward0>)
tensor(2.8675, grad_fn=<NllLossBackward0>)
tensor(2.3580, grad_fn=<NllLossBackward0>)
tensor(2.92

tensor(3.8262, grad_fn=<NllLossBackward0>)
tensor(3.8180, grad_fn=<NllLossBackward0>)
tensor(2.5744, grad_fn=<NllLossBackward0>)
tensor(3.1526, grad_fn=<NllLossBackward0>)
tensor(4.0766, grad_fn=<NllLossBackward0>)
tensor(2.4514, grad_fn=<NllLossBackward0>)
tensor(3.9209, grad_fn=<NllLossBackward0>)
tensor(3.2417, grad_fn=<NllLossBackward0>)
tensor(2.3056, grad_fn=<NllLossBackward0>)
tensor(1.8067, grad_fn=<NllLossBackward0>)
tensor(2.5407, grad_fn=<NllLossBackward0>)
tensor(2.3715, grad_fn=<NllLossBackward0>)
tensor(2.9296, grad_fn=<NllLossBackward0>)
tensor(3.3940, grad_fn=<NllLossBackward0>)
tensor(2.7260, grad_fn=<NllLossBackward0>)
tensor(1.8833, grad_fn=<NllLossBackward0>)
tensor(2.8683, grad_fn=<NllLossBackward0>)
tensor(2.1633, grad_fn=<NllLossBackward0>)
tensor(2.6564, grad_fn=<NllLossBackward0>)
tensor(2.5452, grad_fn=<NllLossBackward0>)
tensor(2.8045, grad_fn=<NllLossBackward0>)
tensor(3.0682, grad_fn=<NllLossBackward0>)
tensor(2.2634, grad_fn=<NllLossBackward0>)
tensor(3.38

tensor(2.3191, grad_fn=<NllLossBackward0>)
tensor(3.5596, grad_fn=<NllLossBackward0>)
tensor(3.1811, grad_fn=<NllLossBackward0>)
tensor(2.4194, grad_fn=<NllLossBackward0>)
tensor(2.7881, grad_fn=<NllLossBackward0>)
tensor(1.9368, grad_fn=<NllLossBackward0>)
tensor(2.7758, grad_fn=<NllLossBackward0>)
tensor(2.6071, grad_fn=<NllLossBackward0>)
tensor(2.9804, grad_fn=<NllLossBackward0>)
tensor(3.3738, grad_fn=<NllLossBackward0>)
tensor(2.5394, grad_fn=<NllLossBackward0>)
tensor(2.5230, grad_fn=<NllLossBackward0>)
tensor(2.4047, grad_fn=<NllLossBackward0>)
tensor(3.9825, grad_fn=<NllLossBackward0>)
tensor(2.0852, grad_fn=<NllLossBackward0>)
tensor(3.7038, grad_fn=<NllLossBackward0>)
tensor(2.3395, grad_fn=<NllLossBackward0>)
tensor(2.0744, grad_fn=<NllLossBackward0>)
tensor(2.3884, grad_fn=<NllLossBackward0>)
tensor(2.2603, grad_fn=<NllLossBackward0>)
tensor(2.9637, grad_fn=<NllLossBackward0>)
tensor(2.8931, grad_fn=<NllLossBackward0>)
tensor(2.5538, grad_fn=<NllLossBackward0>)
tensor(3.03

tensor(2.6268, grad_fn=<NllLossBackward0>)
tensor(3.5974, grad_fn=<NllLossBackward0>)
tensor(3.8358, grad_fn=<NllLossBackward0>)
tensor(2.3493, grad_fn=<NllLossBackward0>)
tensor(3.4373, grad_fn=<NllLossBackward0>)
tensor(2.9495, grad_fn=<NllLossBackward0>)
tensor(2.5062, grad_fn=<NllLossBackward0>)
tensor(2.1742, grad_fn=<NllLossBackward0>)
tensor(1.9263, grad_fn=<NllLossBackward0>)
tensor(4.1088, grad_fn=<NllLossBackward0>)
tensor(4.2987, grad_fn=<NllLossBackward0>)
tensor(2.8484, grad_fn=<NllLossBackward0>)
tensor(2.2073, grad_fn=<NllLossBackward0>)
tensor(3.2315, grad_fn=<NllLossBackward0>)
tensor(2.9220, grad_fn=<NllLossBackward0>)
tensor(2.9333, grad_fn=<NllLossBackward0>)
tensor(2.7690, grad_fn=<NllLossBackward0>)
tensor(2.2716, grad_fn=<NllLossBackward0>)
tensor(2.3796, grad_fn=<NllLossBackward0>)
tensor(2.3556, grad_fn=<NllLossBackward0>)
tensor(2.0850, grad_fn=<NllLossBackward0>)
tensor(1.9997, grad_fn=<NllLossBackward0>)
tensor(2.6172, grad_fn=<NllLossBackward0>)
tensor(4.42

tensor(1.9145, grad_fn=<NllLossBackward0>)
tensor(2.7072, grad_fn=<NllLossBackward0>)
tensor(1.9896, grad_fn=<NllLossBackward0>)
tensor(2.1792, grad_fn=<NllLossBackward0>)
tensor(2.8581, grad_fn=<NllLossBackward0>)
tensor(3.2351, grad_fn=<NllLossBackward0>)
tensor(2.3201, grad_fn=<NllLossBackward0>)
tensor(2.3549, grad_fn=<NllLossBackward0>)
tensor(1.9720, grad_fn=<NllLossBackward0>)
tensor(2.7270, grad_fn=<NllLossBackward0>)
tensor(2.2954, grad_fn=<NllLossBackward0>)
tensor(2.9728, grad_fn=<NllLossBackward0>)
tensor(1.9792, grad_fn=<NllLossBackward0>)
tensor(3.6399, grad_fn=<NllLossBackward0>)
tensor(2.2433, grad_fn=<NllLossBackward0>)
tensor(2.2488, grad_fn=<NllLossBackward0>)
tensor(2.7490, grad_fn=<NllLossBackward0>)
tensor(2.3657, grad_fn=<NllLossBackward0>)
tensor(3.4322, grad_fn=<NllLossBackward0>)
tensor(2.2517, grad_fn=<NllLossBackward0>)
tensor(2.5336, grad_fn=<NllLossBackward0>)
tensor(1.9882, grad_fn=<NllLossBackward0>)
tensor(1.9491, grad_fn=<NllLossBackward0>)
tensor(2.16

tensor(3.5628, grad_fn=<NllLossBackward0>)
tensor(2.4009, grad_fn=<NllLossBackward0>)
tensor(2.4812, grad_fn=<NllLossBackward0>)
tensor(2.4554, grad_fn=<NllLossBackward0>)
tensor(2.4738, grad_fn=<NllLossBackward0>)
tensor(2.5385, grad_fn=<NllLossBackward0>)
tensor(3.5237, grad_fn=<NllLossBackward0>)
tensor(2.4724, grad_fn=<NllLossBackward0>)
tensor(2.8095, grad_fn=<NllLossBackward0>)
tensor(2.9274, grad_fn=<NllLossBackward0>)
tensor(2.1099, grad_fn=<NllLossBackward0>)
tensor(3.0685, grad_fn=<NllLossBackward0>)
tensor(3.7002, grad_fn=<NllLossBackward0>)
tensor(2.4367, grad_fn=<NllLossBackward0>)
tensor(2.9686, grad_fn=<NllLossBackward0>)
tensor(2.3647, grad_fn=<NllLossBackward0>)
tensor(2.7178, grad_fn=<NllLossBackward0>)
tensor(2.1823, grad_fn=<NllLossBackward0>)
tensor(2.7976, grad_fn=<NllLossBackward0>)
tensor(2.6440, grad_fn=<NllLossBackward0>)
tensor(2.1293, grad_fn=<NllLossBackward0>)
tensor(2.3282, grad_fn=<NllLossBackward0>)
tensor(2.8967, grad_fn=<NllLossBackward0>)
tensor(2.95

tensor(2.6753, grad_fn=<NllLossBackward0>)
tensor(2.3157, grad_fn=<NllLossBackward0>)
tensor(2.1963, grad_fn=<NllLossBackward0>)
tensor(2.6961, grad_fn=<NllLossBackward0>)
tensor(2.9595, grad_fn=<NllLossBackward0>)
tensor(2.3860, grad_fn=<NllLossBackward0>)
tensor(2.9044, grad_fn=<NllLossBackward0>)
tensor(2.7131, grad_fn=<NllLossBackward0>)
tensor(2.9273, grad_fn=<NllLossBackward0>)
tensor(2.1790, grad_fn=<NllLossBackward0>)
tensor(2.1070, grad_fn=<NllLossBackward0>)
tensor(2.3940, grad_fn=<NllLossBackward0>)
tensor(2.3385, grad_fn=<NllLossBackward0>)
tensor(3.2433, grad_fn=<NllLossBackward0>)
tensor(2.4123, grad_fn=<NllLossBackward0>)
tensor(2.1893, grad_fn=<NllLossBackward0>)
tensor(2.3009, grad_fn=<NllLossBackward0>)
tensor(2.1604, grad_fn=<NllLossBackward0>)
tensor(2.0498, grad_fn=<NllLossBackward0>)
tensor(2.0570, grad_fn=<NllLossBackward0>)
tensor(2.6488, grad_fn=<NllLossBackward0>)
tensor(3.2599, grad_fn=<NllLossBackward0>)
tensor(2.4816, grad_fn=<NllLossBackward0>)
tensor(2.74

tensor(2.4836, grad_fn=<NllLossBackward0>)
tensor(2.5849, grad_fn=<NllLossBackward0>)
tensor(3.0634, grad_fn=<NllLossBackward0>)
tensor(2.4145, grad_fn=<NllLossBackward0>)
tensor(1.7341, grad_fn=<NllLossBackward0>)
tensor(2.8480, grad_fn=<NllLossBackward0>)
tensor(2.5854, grad_fn=<NllLossBackward0>)
tensor(2.3367, grad_fn=<NllLossBackward0>)
tensor(3.1212, grad_fn=<NllLossBackward0>)
tensor(1.8780, grad_fn=<NllLossBackward0>)
tensor(3.5447, grad_fn=<NllLossBackward0>)
tensor(2.8492, grad_fn=<NllLossBackward0>)
tensor(2.3292, grad_fn=<NllLossBackward0>)
tensor(2.9826, grad_fn=<NllLossBackward0>)
tensor(1.8874, grad_fn=<NllLossBackward0>)
tensor(3.4155, grad_fn=<NllLossBackward0>)
tensor(2.7735, grad_fn=<NllLossBackward0>)
tensor(3.0802, grad_fn=<NllLossBackward0>)
tensor(2.8338, grad_fn=<NllLossBackward0>)
tensor(3.4544, grad_fn=<NllLossBackward0>)
tensor(3.3713, grad_fn=<NllLossBackward0>)
tensor(3.1044, grad_fn=<NllLossBackward0>)
tensor(4.1136, grad_fn=<NllLossBackward0>)
tensor(2.32

tensor(3.4354, grad_fn=<NllLossBackward0>)
tensor(2.4593, grad_fn=<NllLossBackward0>)
tensor(2.2543, grad_fn=<NllLossBackward0>)
tensor(3.6175, grad_fn=<NllLossBackward0>)
tensor(4.3806, grad_fn=<NllLossBackward0>)
tensor(3.9512, grad_fn=<NllLossBackward0>)
tensor(3.0546, grad_fn=<NllLossBackward0>)
tensor(2.0129, grad_fn=<NllLossBackward0>)
tensor(3.5838, grad_fn=<NllLossBackward0>)
tensor(2.9054, grad_fn=<NllLossBackward0>)
tensor(2.7329, grad_fn=<NllLossBackward0>)
tensor(2.5774, grad_fn=<NllLossBackward0>)
tensor(2.6838, grad_fn=<NllLossBackward0>)
tensor(2.0292, grad_fn=<NllLossBackward0>)
tensor(2.2658, grad_fn=<NllLossBackward0>)
tensor(3.1235, grad_fn=<NllLossBackward0>)
tensor(2.0737, grad_fn=<NllLossBackward0>)
tensor(2.3357, grad_fn=<NllLossBackward0>)
tensor(3.7951, grad_fn=<NllLossBackward0>)
tensor(2.8121, grad_fn=<NllLossBackward0>)
tensor(3.1719, grad_fn=<NllLossBackward0>)
tensor(2.4711, grad_fn=<NllLossBackward0>)
tensor(2.1512, grad_fn=<NllLossBackward0>)
tensor(2.33

tensor(2.0033, grad_fn=<NllLossBackward0>)
tensor(2.6762, grad_fn=<NllLossBackward0>)
tensor(1.9824, grad_fn=<NllLossBackward0>)
tensor(2.3167, grad_fn=<NllLossBackward0>)
tensor(2.5401, grad_fn=<NllLossBackward0>)
tensor(1.9917, grad_fn=<NllLossBackward0>)
tensor(2.5610, grad_fn=<NllLossBackward0>)
tensor(2.9129, grad_fn=<NllLossBackward0>)
tensor(2.8139, grad_fn=<NllLossBackward0>)
tensor(4.2689, grad_fn=<NllLossBackward0>)
tensor(4.0691, grad_fn=<NllLossBackward0>)
tensor(1.7560, grad_fn=<NllLossBackward0>)
tensor(2.6329, grad_fn=<NllLossBackward0>)
tensor(2.9174, grad_fn=<NllLossBackward0>)
tensor(2.3375, grad_fn=<NllLossBackward0>)
tensor(2.8202, grad_fn=<NllLossBackward0>)
tensor(2.1785, grad_fn=<NllLossBackward0>)
tensor(4.0905, grad_fn=<NllLossBackward0>)
tensor(1.9768, grad_fn=<NllLossBackward0>)
tensor(3.5534, grad_fn=<NllLossBackward0>)
tensor(3.6037, grad_fn=<NllLossBackward0>)
tensor(2.6783, grad_fn=<NllLossBackward0>)
tensor(3.7235, grad_fn=<NllLossBackward0>)
tensor(1.89

tensor(2.4991, grad_fn=<NllLossBackward0>)
tensor(1.7199, grad_fn=<NllLossBackward0>)
tensor(2.3162, grad_fn=<NllLossBackward0>)
tensor(2.4640, grad_fn=<NllLossBackward0>)
tensor(2.7695, grad_fn=<NllLossBackward0>)
tensor(3.7582, grad_fn=<NllLossBackward0>)
tensor(3.0438, grad_fn=<NllLossBackward0>)
tensor(2.9837, grad_fn=<NllLossBackward0>)
tensor(2.5605, grad_fn=<NllLossBackward0>)
tensor(2.0792, grad_fn=<NllLossBackward0>)
tensor(2.5594, grad_fn=<NllLossBackward0>)
tensor(3.1330, grad_fn=<NllLossBackward0>)
tensor(2.0445, grad_fn=<NllLossBackward0>)
tensor(2.4616, grad_fn=<NllLossBackward0>)
tensor(3.0050, grad_fn=<NllLossBackward0>)
tensor(2.4416, grad_fn=<NllLossBackward0>)
tensor(2.4502, grad_fn=<NllLossBackward0>)
tensor(4.6410, grad_fn=<NllLossBackward0>)
tensor(3.0119, grad_fn=<NllLossBackward0>)
tensor(2.3614, grad_fn=<NllLossBackward0>)
tensor(2.1495, grad_fn=<NllLossBackward0>)
tensor(4.0071, grad_fn=<NllLossBackward0>)
tensor(2.3773, grad_fn=<NllLossBackward0>)
tensor(3.40

tensor(2.3425, grad_fn=<NllLossBackward0>)
tensor(2.2773, grad_fn=<NllLossBackward0>)
tensor(2.4091, grad_fn=<NllLossBackward0>)
tensor(2.3499, grad_fn=<NllLossBackward0>)
tensor(2.4294, grad_fn=<NllLossBackward0>)
tensor(3.4659, grad_fn=<NllLossBackward0>)
tensor(2.1942, grad_fn=<NllLossBackward0>)
tensor(2.6501, grad_fn=<NllLossBackward0>)
tensor(1.9172, grad_fn=<NllLossBackward0>)
tensor(2.5393, grad_fn=<NllLossBackward0>)
tensor(2.9609, grad_fn=<NllLossBackward0>)
tensor(4.0813, grad_fn=<NllLossBackward0>)
tensor(2.6505, grad_fn=<NllLossBackward0>)
tensor(2.4259, grad_fn=<NllLossBackward0>)
tensor(3.5217, grad_fn=<NllLossBackward0>)
tensor(4.4375, grad_fn=<NllLossBackward0>)
tensor(2.9709, grad_fn=<NllLossBackward0>)
tensor(2.9253, grad_fn=<NllLossBackward0>)
tensor(3.5861, grad_fn=<NllLossBackward0>)
tensor(2.7254, grad_fn=<NllLossBackward0>)
tensor(2.5000, grad_fn=<NllLossBackward0>)
tensor(3.5085, grad_fn=<NllLossBackward0>)
tensor(2.1213, grad_fn=<NllLossBackward0>)
tensor(1.75

tensor(2.8898, grad_fn=<NllLossBackward0>)
tensor(2.9480, grad_fn=<NllLossBackward0>)
tensor(2.9538, grad_fn=<NllLossBackward0>)
tensor(2.8850, grad_fn=<NllLossBackward0>)
tensor(2.4484, grad_fn=<NllLossBackward0>)
tensor(2.0752, grad_fn=<NllLossBackward0>)
tensor(2.8522, grad_fn=<NllLossBackward0>)
tensor(2.9614, grad_fn=<NllLossBackward0>)
tensor(3.4918, grad_fn=<NllLossBackward0>)
tensor(4.5372, grad_fn=<NllLossBackward0>)
tensor(2.3292, grad_fn=<NllLossBackward0>)
tensor(2.8062, grad_fn=<NllLossBackward0>)
tensor(2.1647, grad_fn=<NllLossBackward0>)
tensor(3.8448, grad_fn=<NllLossBackward0>)
tensor(2.3321, grad_fn=<NllLossBackward0>)
tensor(2.8382, grad_fn=<NllLossBackward0>)
tensor(4.1952, grad_fn=<NllLossBackward0>)
tensor(1.8988, grad_fn=<NllLossBackward0>)
tensor(1.9692, grad_fn=<NllLossBackward0>)
tensor(3.9568, grad_fn=<NllLossBackward0>)
tensor(3.4879, grad_fn=<NllLossBackward0>)
tensor(3.0925, grad_fn=<NllLossBackward0>)
tensor(2.8730, grad_fn=<NllLossBackward0>)
tensor(2.93

tensor(4.0741, grad_fn=<NllLossBackward0>)
tensor(4.1691, grad_fn=<NllLossBackward0>)
tensor(3.1026, grad_fn=<NllLossBackward0>)
tensor(3.4708, grad_fn=<NllLossBackward0>)
tensor(1.9863, grad_fn=<NllLossBackward0>)
tensor(3.0513, grad_fn=<NllLossBackward0>)
tensor(2.3615, grad_fn=<NllLossBackward0>)
tensor(2.7306, grad_fn=<NllLossBackward0>)
tensor(3.4476, grad_fn=<NllLossBackward0>)
tensor(2.8277, grad_fn=<NllLossBackward0>)
tensor(4.6447, grad_fn=<NllLossBackward0>)
tensor(2.5672, grad_fn=<NllLossBackward0>)
tensor(2.3441, grad_fn=<NllLossBackward0>)
tensor(2.2999, grad_fn=<NllLossBackward0>)
tensor(2.8931, grad_fn=<NllLossBackward0>)
tensor(3.0980, grad_fn=<NllLossBackward0>)
tensor(2.0900, grad_fn=<NllLossBackward0>)
tensor(2.8188, grad_fn=<NllLossBackward0>)
tensor(3.3725, grad_fn=<NllLossBackward0>)
tensor(2.2491, grad_fn=<NllLossBackward0>)
tensor(3.4210, grad_fn=<NllLossBackward0>)
tensor(4.0129, grad_fn=<NllLossBackward0>)
tensor(2.5548, grad_fn=<NllLossBackward0>)
tensor(3.96

tensor(3.3457, grad_fn=<NllLossBackward0>)
tensor(3.4962, grad_fn=<NllLossBackward0>)
tensor(3.1429, grad_fn=<NllLossBackward0>)
tensor(3.3110, grad_fn=<NllLossBackward0>)
tensor(3.3534, grad_fn=<NllLossBackward0>)
tensor(1.6960, grad_fn=<NllLossBackward0>)
tensor(2.5007, grad_fn=<NllLossBackward0>)
tensor(2.8486, grad_fn=<NllLossBackward0>)
tensor(2.4909, grad_fn=<NllLossBackward0>)
tensor(3.5775, grad_fn=<NllLossBackward0>)
tensor(2.4812, grad_fn=<NllLossBackward0>)
tensor(2.3161, grad_fn=<NllLossBackward0>)
tensor(3.4273, grad_fn=<NllLossBackward0>)
tensor(2.4520, grad_fn=<NllLossBackward0>)
tensor(2.8339, grad_fn=<NllLossBackward0>)
tensor(3.2109, grad_fn=<NllLossBackward0>)
tensor(1.9025, grad_fn=<NllLossBackward0>)
tensor(2.2564, grad_fn=<NllLossBackward0>)
tensor(2.3032, grad_fn=<NllLossBackward0>)
tensor(3.5849, grad_fn=<NllLossBackward0>)
tensor(2.5310, grad_fn=<NllLossBackward0>)
tensor(2.9291, grad_fn=<NllLossBackward0>)
tensor(2.5477, grad_fn=<NllLossBackward0>)
tensor(2.33

tensor(2.1393, grad_fn=<NllLossBackward0>)
tensor(2.4827, grad_fn=<NllLossBackward0>)
tensor(2.3425, grad_fn=<NllLossBackward0>)
tensor(1.9304, grad_fn=<NllLossBackward0>)
tensor(3.8757, grad_fn=<NllLossBackward0>)
tensor(2.5083, grad_fn=<NllLossBackward0>)
tensor(2.5844, grad_fn=<NllLossBackward0>)
tensor(3.1970, grad_fn=<NllLossBackward0>)
tensor(2.5775, grad_fn=<NllLossBackward0>)
tensor(2.6768, grad_fn=<NllLossBackward0>)
tensor(3.9985, grad_fn=<NllLossBackward0>)
tensor(3.4888, grad_fn=<NllLossBackward0>)
tensor(2.3583, grad_fn=<NllLossBackward0>)
tensor(3.2773, grad_fn=<NllLossBackward0>)
tensor(3.0477, grad_fn=<NllLossBackward0>)
tensor(2.9528, grad_fn=<NllLossBackward0>)
tensor(2.5326, grad_fn=<NllLossBackward0>)
tensor(2.4936, grad_fn=<NllLossBackward0>)
tensor(2.2170, grad_fn=<NllLossBackward0>)
tensor(1.9254, grad_fn=<NllLossBackward0>)
tensor(3.5843, grad_fn=<NllLossBackward0>)
tensor(2.3378, grad_fn=<NllLossBackward0>)
tensor(2.8025, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(2.2098, grad_fn=<NllLossBackward0>)
tensor(2.0901, grad_fn=<NllLossBackward0>)
tensor(2.9908, grad_fn=<NllLossBackward0>)
tensor(3.7971, grad_fn=<NllLossBackward0>)
tensor(2.5713, grad_fn=<NllLossBackward0>)
tensor(3.1068, grad_fn=<NllLossBackward0>)
tensor(2.1008, grad_fn=<NllLossBackward0>)
tensor(2.8334, grad_fn=<NllLossBackward0>)
tensor(3.0641, grad_fn=<NllLossBackward0>)
tensor(2.6352, grad_fn=<NllLossBackward0>)
tensor(3.4292, grad_fn=<NllLossBackward0>)
tensor(2.3163, grad_fn=<NllLossBackward0>)
tensor(2.2960, grad_fn=<NllLossBackward0>)
tensor(1.9482, grad_fn=<NllLossBackward0>)
tensor(3.3041, grad_fn=<NllLossBackward0>)
tensor(2.6471, grad_fn=<NllLossBackward0>)
tensor(2.8878, grad_fn=<NllLossBackward0>)
tensor(2.7284, grad_fn=<NllLossBackward0>)
tensor(2.7432, grad_fn=<NllLossBackward0>)
tensor(2.8720, grad_fn=<NllLossBackward0>)
tensor(3.8808, grad_fn=<NllLossBackward0>)
tensor(1.8874, grad_fn=<NllLossBackward0>)
tensor(3.4228, grad_fn=<NllLossBackward0>)
tensor(2.37

tensor(2.9455, grad_fn=<NllLossBackward0>)
tensor(2.8398, grad_fn=<NllLossBackward0>)
tensor(2.2059, grad_fn=<NllLossBackward0>)
tensor(2.9524, grad_fn=<NllLossBackward0>)
tensor(3.3377, grad_fn=<NllLossBackward0>)
tensor(3.0152, grad_fn=<NllLossBackward0>)
tensor(2.8921, grad_fn=<NllLossBackward0>)
tensor(1.7582, grad_fn=<NllLossBackward0>)
tensor(2.6550, grad_fn=<NllLossBackward0>)
tensor(3.7545, grad_fn=<NllLossBackward0>)
tensor(3.4588, grad_fn=<NllLossBackward0>)
tensor(2.2572, grad_fn=<NllLossBackward0>)
tensor(3.3574, grad_fn=<NllLossBackward0>)
tensor(2.2941, grad_fn=<NllLossBackward0>)
tensor(2.0003, grad_fn=<NllLossBackward0>)
tensor(2.9901, grad_fn=<NllLossBackward0>)
tensor(3.1690, grad_fn=<NllLossBackward0>)
tensor(4.1086, grad_fn=<NllLossBackward0>)
tensor(2.8785, grad_fn=<NllLossBackward0>)
tensor(2.1705, grad_fn=<NllLossBackward0>)
tensor(2.6855, grad_fn=<NllLossBackward0>)
tensor(2.1141, grad_fn=<NllLossBackward0>)
tensor(3.6304, grad_fn=<NllLossBackward0>)
tensor(3.53

tensor(2.6294, grad_fn=<NllLossBackward0>)
tensor(2.8879, grad_fn=<NllLossBackward0>)
tensor(2.4738, grad_fn=<NllLossBackward0>)
tensor(2.3372, grad_fn=<NllLossBackward0>)
tensor(3.7141, grad_fn=<NllLossBackward0>)
tensor(3.2055, grad_fn=<NllLossBackward0>)
tensor(2.4796, grad_fn=<NllLossBackward0>)
tensor(2.3460, grad_fn=<NllLossBackward0>)
tensor(3.1072, grad_fn=<NllLossBackward0>)
tensor(2.0848, grad_fn=<NllLossBackward0>)
tensor(2.3805, grad_fn=<NllLossBackward0>)
tensor(3.0750, grad_fn=<NllLossBackward0>)
tensor(1.9297, grad_fn=<NllLossBackward0>)
tensor(2.2805, grad_fn=<NllLossBackward0>)
tensor(1.9051, grad_fn=<NllLossBackward0>)
tensor(2.3891, grad_fn=<NllLossBackward0>)
tensor(2.3291, grad_fn=<NllLossBackward0>)
tensor(2.3370, grad_fn=<NllLossBackward0>)
tensor(2.3785, grad_fn=<NllLossBackward0>)
tensor(3.6677, grad_fn=<NllLossBackward0>)
tensor(2.7188, grad_fn=<NllLossBackward0>)
tensor(3.1796, grad_fn=<NllLossBackward0>)
tensor(2.5686, grad_fn=<NllLossBackward0>)
tensor(2.09

tensor(3.4611, grad_fn=<NllLossBackward0>)
tensor(2.0514, grad_fn=<NllLossBackward0>)
tensor(2.5528, grad_fn=<NllLossBackward0>)
tensor(2.6445, grad_fn=<NllLossBackward0>)
tensor(2.5874, grad_fn=<NllLossBackward0>)
tensor(3.4477, grad_fn=<NllLossBackward0>)
tensor(3.0802, grad_fn=<NllLossBackward0>)
tensor(2.2581, grad_fn=<NllLossBackward0>)
tensor(3.2001, grad_fn=<NllLossBackward0>)
tensor(3.6193, grad_fn=<NllLossBackward0>)
tensor(2.5273, grad_fn=<NllLossBackward0>)
tensor(1.7329, grad_fn=<NllLossBackward0>)
tensor(1.8790, grad_fn=<NllLossBackward0>)
tensor(2.3972, grad_fn=<NllLossBackward0>)
tensor(2.9563, grad_fn=<NllLossBackward0>)
tensor(2.4442, grad_fn=<NllLossBackward0>)
tensor(2.9959, grad_fn=<NllLossBackward0>)
tensor(2.0869, grad_fn=<NllLossBackward0>)
tensor(2.0931, grad_fn=<NllLossBackward0>)
tensor(4.8158, grad_fn=<NllLossBackward0>)
tensor(3.2731, grad_fn=<NllLossBackward0>)
tensor(2.9416, grad_fn=<NllLossBackward0>)
tensor(3.0003, grad_fn=<NllLossBackward0>)
tensor(2.16

tensor(2.3905, grad_fn=<NllLossBackward0>)
tensor(2.3462, grad_fn=<NllLossBackward0>)
tensor(2.4646, grad_fn=<NllLossBackward0>)
tensor(2.8414, grad_fn=<NllLossBackward0>)
tensor(2.4477, grad_fn=<NllLossBackward0>)
tensor(2.8599, grad_fn=<NllLossBackward0>)
tensor(2.7132, grad_fn=<NllLossBackward0>)
tensor(1.7411, grad_fn=<NllLossBackward0>)
tensor(2.3395, grad_fn=<NllLossBackward0>)
tensor(3.8182, grad_fn=<NllLossBackward0>)
tensor(2.6601, grad_fn=<NllLossBackward0>)
tensor(2.7777, grad_fn=<NllLossBackward0>)
tensor(1.8076, grad_fn=<NllLossBackward0>)
tensor(3.5237, grad_fn=<NllLossBackward0>)
tensor(2.5353, grad_fn=<NllLossBackward0>)
tensor(2.2163, grad_fn=<NllLossBackward0>)
tensor(3.9151, grad_fn=<NllLossBackward0>)
tensor(3.2430, grad_fn=<NllLossBackward0>)
tensor(2.0832, grad_fn=<NllLossBackward0>)
tensor(2.0489, grad_fn=<NllLossBackward0>)
tensor(2.4524, grad_fn=<NllLossBackward0>)
tensor(2.1618, grad_fn=<NllLossBackward0>)
tensor(2.3112, grad_fn=<NllLossBackward0>)
tensor(1.94

tensor(2.0345, grad_fn=<NllLossBackward0>)
tensor(3.4751, grad_fn=<NllLossBackward0>)
tensor(3.0379, grad_fn=<NllLossBackward0>)
tensor(1.9045, grad_fn=<NllLossBackward0>)
tensor(2.2993, grad_fn=<NllLossBackward0>)
tensor(2.3425, grad_fn=<NllLossBackward0>)
tensor(2.9854, grad_fn=<NllLossBackward0>)
tensor(2.6640, grad_fn=<NllLossBackward0>)
tensor(2.0735, grad_fn=<NllLossBackward0>)
tensor(2.3116, grad_fn=<NllLossBackward0>)
tensor(2.2551, grad_fn=<NllLossBackward0>)
tensor(2.0392, grad_fn=<NllLossBackward0>)
tensor(2.7018, grad_fn=<NllLossBackward0>)
tensor(2.7567, grad_fn=<NllLossBackward0>)
tensor(2.6208, grad_fn=<NllLossBackward0>)
tensor(3.2312, grad_fn=<NllLossBackward0>)
tensor(2.3682, grad_fn=<NllLossBackward0>)
tensor(2.9092, grad_fn=<NllLossBackward0>)
tensor(2.8775, grad_fn=<NllLossBackward0>)
tensor(1.9132, grad_fn=<NllLossBackward0>)
tensor(2.8467, grad_fn=<NllLossBackward0>)
tensor(2.2232, grad_fn=<NllLossBackward0>)
tensor(3.6100, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(2.8950, grad_fn=<NllLossBackward0>)
tensor(3.5255, grad_fn=<NllLossBackward0>)
tensor(3.0732, grad_fn=<NllLossBackward0>)
tensor(2.5056, grad_fn=<NllLossBackward0>)
tensor(1.7535, grad_fn=<NllLossBackward0>)
tensor(3.3881, grad_fn=<NllLossBackward0>)
tensor(3.6269, grad_fn=<NllLossBackward0>)
tensor(2.7738, grad_fn=<NllLossBackward0>)
tensor(2.9461, grad_fn=<NllLossBackward0>)
tensor(3.2897, grad_fn=<NllLossBackward0>)
tensor(2.3360, grad_fn=<NllLossBackward0>)
tensor(3.4032, grad_fn=<NllLossBackward0>)
tensor(1.9720, grad_fn=<NllLossBackward0>)
tensor(2.5173, grad_fn=<NllLossBackward0>)
tensor(1.8902, grad_fn=<NllLossBackward0>)
tensor(2.5707, grad_fn=<NllLossBackward0>)
tensor(3.3187, grad_fn=<NllLossBackward0>)
tensor(2.7365, grad_fn=<NllLossBackward0>)
tensor(4.0736, grad_fn=<NllLossBackward0>)
tensor(2.6838, grad_fn=<NllLossBackward0>)
tensor(3.4750, grad_fn=<NllLossBackward0>)
tensor(2.3484, grad_fn=<NllLossBackward0>)
tensor(3.3770, grad_fn=<NllLossBackward0>)
tensor(3.14

tensor(3.2565, grad_fn=<NllLossBackward0>)
tensor(2.3670, grad_fn=<NllLossBackward0>)
tensor(2.7461, grad_fn=<NllLossBackward0>)
tensor(2.3141, grad_fn=<NllLossBackward0>)
tensor(1.9494, grad_fn=<NllLossBackward0>)
tensor(2.1474, grad_fn=<NllLossBackward0>)
tensor(2.4036, grad_fn=<NllLossBackward0>)
tensor(1.9713, grad_fn=<NllLossBackward0>)
tensor(2.5254, grad_fn=<NllLossBackward0>)
tensor(2.8307, grad_fn=<NllLossBackward0>)
tensor(3.4546, grad_fn=<NllLossBackward0>)
tensor(3.4904, grad_fn=<NllLossBackward0>)
tensor(2.5078, grad_fn=<NllLossBackward0>)
tensor(2.6089, grad_fn=<NllLossBackward0>)
tensor(1.8498, grad_fn=<NllLossBackward0>)
tensor(4.4343, grad_fn=<NllLossBackward0>)
tensor(2.3152, grad_fn=<NllLossBackward0>)
tensor(2.6375, grad_fn=<NllLossBackward0>)
tensor(1.9484, grad_fn=<NllLossBackward0>)
tensor(2.4560, grad_fn=<NllLossBackward0>)
tensor(1.8988, grad_fn=<NllLossBackward0>)
tensor(2.1861, grad_fn=<NllLossBackward0>)
tensor(3.5116, grad_fn=<NllLossBackward0>)
tensor(3.92

tensor(2.8237, grad_fn=<NllLossBackward0>)
tensor(3.2040, grad_fn=<NllLossBackward0>)
tensor(2.5396, grad_fn=<NllLossBackward0>)
tensor(2.3026, grad_fn=<NllLossBackward0>)
tensor(3.5577, grad_fn=<NllLossBackward0>)
tensor(4.0951, grad_fn=<NllLossBackward0>)
tensor(3.4868, grad_fn=<NllLossBackward0>)
tensor(2.4385, grad_fn=<NllLossBackward0>)
tensor(2.1521, grad_fn=<NllLossBackward0>)
tensor(2.4930, grad_fn=<NllLossBackward0>)
tensor(2.8817, grad_fn=<NllLossBackward0>)
tensor(2.3751, grad_fn=<NllLossBackward0>)
tensor(3.7141, grad_fn=<NllLossBackward0>)
tensor(2.8251, grad_fn=<NllLossBackward0>)
tensor(3.2273, grad_fn=<NllLossBackward0>)
tensor(2.9214, grad_fn=<NllLossBackward0>)
tensor(3.8519, grad_fn=<NllLossBackward0>)
tensor(1.9533, grad_fn=<NllLossBackward0>)
tensor(1.7545, grad_fn=<NllLossBackward0>)
tensor(2.2739, grad_fn=<NllLossBackward0>)
tensor(2.5587, grad_fn=<NllLossBackward0>)
tensor(3.5997, grad_fn=<NllLossBackward0>)
tensor(2.2191, grad_fn=<NllLossBackward0>)
tensor(2.32

tensor(2.8433, grad_fn=<NllLossBackward0>)
tensor(2.7675, grad_fn=<NllLossBackward0>)
tensor(3.4358, grad_fn=<NllLossBackward0>)
tensor(2.9042, grad_fn=<NllLossBackward0>)
tensor(2.2908, grad_fn=<NllLossBackward0>)
tensor(3.1002, grad_fn=<NllLossBackward0>)
tensor(3.0515, grad_fn=<NllLossBackward0>)
tensor(2.6528, grad_fn=<NllLossBackward0>)
tensor(2.8162, grad_fn=<NllLossBackward0>)
tensor(2.9153, grad_fn=<NllLossBackward0>)
tensor(2.4274, grad_fn=<NllLossBackward0>)
tensor(2.8871, grad_fn=<NllLossBackward0>)
tensor(1.9666, grad_fn=<NllLossBackward0>)
tensor(3.6469, grad_fn=<NllLossBackward0>)
tensor(2.7947, grad_fn=<NllLossBackward0>)
tensor(2.7181, grad_fn=<NllLossBackward0>)
tensor(3.3466, grad_fn=<NllLossBackward0>)
tensor(2.7164, grad_fn=<NllLossBackward0>)
tensor(2.2162, grad_fn=<NllLossBackward0>)
tensor(2.9297, grad_fn=<NllLossBackward0>)
tensor(4.0832, grad_fn=<NllLossBackward0>)
tensor(2.3940, grad_fn=<NllLossBackward0>)
tensor(3.3369, grad_fn=<NllLossBackward0>)
tensor(2.10

tensor(2.4921, grad_fn=<NllLossBackward0>)
tensor(2.4800, grad_fn=<NllLossBackward0>)
tensor(2.4260, grad_fn=<NllLossBackward0>)
tensor(2.8431, grad_fn=<NllLossBackward0>)
tensor(1.8822, grad_fn=<NllLossBackward0>)
tensor(3.2469, grad_fn=<NllLossBackward0>)
tensor(2.4528, grad_fn=<NllLossBackward0>)
tensor(2.4371, grad_fn=<NllLossBackward0>)
tensor(3.7796, grad_fn=<NllLossBackward0>)
tensor(2.5918, grad_fn=<NllLossBackward0>)
tensor(2.5539, grad_fn=<NllLossBackward0>)
tensor(3.5346, grad_fn=<NllLossBackward0>)
tensor(2.9748, grad_fn=<NllLossBackward0>)
tensor(1.7670, grad_fn=<NllLossBackward0>)
tensor(2.5423, grad_fn=<NllLossBackward0>)
tensor(2.7418, grad_fn=<NllLossBackward0>)
tensor(3.8148, grad_fn=<NllLossBackward0>)
tensor(3.5297, grad_fn=<NllLossBackward0>)
tensor(3.4013, grad_fn=<NllLossBackward0>)
tensor(1.9557, grad_fn=<NllLossBackward0>)
tensor(3.1737, grad_fn=<NllLossBackward0>)
tensor(3.0000, grad_fn=<NllLossBackward0>)
tensor(1.8764, grad_fn=<NllLossBackward0>)
tensor(2.18

tensor(3.6562, grad_fn=<NllLossBackward0>)
tensor(2.4128, grad_fn=<NllLossBackward0>)
tensor(2.3998, grad_fn=<NllLossBackward0>)
tensor(2.8683, grad_fn=<NllLossBackward0>)
tensor(3.3687, grad_fn=<NllLossBackward0>)
tensor(2.1131, grad_fn=<NllLossBackward0>)
tensor(1.9972, grad_fn=<NllLossBackward0>)
tensor(2.9833, grad_fn=<NllLossBackward0>)
tensor(2.2246, grad_fn=<NllLossBackward0>)
tensor(3.2549, grad_fn=<NllLossBackward0>)
tensor(2.3797, grad_fn=<NllLossBackward0>)
tensor(2.7145, grad_fn=<NllLossBackward0>)
tensor(2.9514, grad_fn=<NllLossBackward0>)
tensor(2.5886, grad_fn=<NllLossBackward0>)
tensor(2.8894, grad_fn=<NllLossBackward0>)
tensor(2.2413, grad_fn=<NllLossBackward0>)
tensor(2.9975, grad_fn=<NllLossBackward0>)
tensor(2.3559, grad_fn=<NllLossBackward0>)
tensor(2.2224, grad_fn=<NllLossBackward0>)
tensor(3.2669, grad_fn=<NllLossBackward0>)
tensor(2.7073, grad_fn=<NllLossBackward0>)
tensor(1.8045, grad_fn=<NllLossBackward0>)
tensor(1.8999, grad_fn=<NllLossBackward0>)
tensor(2.31

tensor(4.0125, grad_fn=<NllLossBackward0>)
tensor(2.7357, grad_fn=<NllLossBackward0>)
tensor(3.3545, grad_fn=<NllLossBackward0>)
tensor(2.3183, grad_fn=<NllLossBackward0>)
tensor(2.2023, grad_fn=<NllLossBackward0>)
tensor(2.0980, grad_fn=<NllLossBackward0>)
tensor(3.5859, grad_fn=<NllLossBackward0>)
tensor(1.8767, grad_fn=<NllLossBackward0>)
tensor(2.9607, grad_fn=<NllLossBackward0>)
tensor(2.1154, grad_fn=<NllLossBackward0>)
tensor(2.9794, grad_fn=<NllLossBackward0>)
tensor(2.1204, grad_fn=<NllLossBackward0>)
tensor(2.4119, grad_fn=<NllLossBackward0>)
tensor(2.9588, grad_fn=<NllLossBackward0>)
tensor(2.8277, grad_fn=<NllLossBackward0>)
tensor(2.1581, grad_fn=<NllLossBackward0>)
tensor(2.6755, grad_fn=<NllLossBackward0>)
tensor(3.3513, grad_fn=<NllLossBackward0>)
tensor(2.9075, grad_fn=<NllLossBackward0>)
tensor(3.3517, grad_fn=<NllLossBackward0>)
tensor(2.1289, grad_fn=<NllLossBackward0>)
tensor(1.8376, grad_fn=<NllLossBackward0>)
tensor(2.6023, grad_fn=<NllLossBackward0>)
tensor(2.13

tensor(2.7528, grad_fn=<NllLossBackward0>)
tensor(2.5627, grad_fn=<NllLossBackward0>)
tensor(2.4305, grad_fn=<NllLossBackward0>)
tensor(3.0215, grad_fn=<NllLossBackward0>)
tensor(2.8066, grad_fn=<NllLossBackward0>)
tensor(3.2729, grad_fn=<NllLossBackward0>)
tensor(1.9081, grad_fn=<NllLossBackward0>)
tensor(2.6470, grad_fn=<NllLossBackward0>)
tensor(2.3307, grad_fn=<NllLossBackward0>)
tensor(2.4928, grad_fn=<NllLossBackward0>)
tensor(3.2277, grad_fn=<NllLossBackward0>)
tensor(3.5512, grad_fn=<NllLossBackward0>)
tensor(2.5037, grad_fn=<NllLossBackward0>)
tensor(2.4179, grad_fn=<NllLossBackward0>)
tensor(2.9066, grad_fn=<NllLossBackward0>)
tensor(2.5255, grad_fn=<NllLossBackward0>)
tensor(3.6336, grad_fn=<NllLossBackward0>)
tensor(1.6964, grad_fn=<NllLossBackward0>)
tensor(4.0409, grad_fn=<NllLossBackward0>)
tensor(2.7190, grad_fn=<NllLossBackward0>)
tensor(2.2123, grad_fn=<NllLossBackward0>)
tensor(2.3713, grad_fn=<NllLossBackward0>)
tensor(2.7189, grad_fn=<NllLossBackward0>)
tensor(2.44

tensor(2.2096, grad_fn=<NllLossBackward0>)
tensor(2.7381, grad_fn=<NllLossBackward0>)
tensor(2.2500, grad_fn=<NllLossBackward0>)
tensor(2.0607, grad_fn=<NllLossBackward0>)
tensor(3.9148, grad_fn=<NllLossBackward0>)
tensor(2.6705, grad_fn=<NllLossBackward0>)
tensor(3.0851, grad_fn=<NllLossBackward0>)
tensor(2.3886, grad_fn=<NllLossBackward0>)
tensor(2.2043, grad_fn=<NllLossBackward0>)
tensor(2.8676, grad_fn=<NllLossBackward0>)
tensor(2.0832, grad_fn=<NllLossBackward0>)
tensor(3.9299, grad_fn=<NllLossBackward0>)
tensor(2.3405, grad_fn=<NllLossBackward0>)
tensor(2.0560, grad_fn=<NllLossBackward0>)
tensor(2.3151, grad_fn=<NllLossBackward0>)
tensor(3.9497, grad_fn=<NllLossBackward0>)
tensor(2.4400, grad_fn=<NllLossBackward0>)
tensor(1.8534, grad_fn=<NllLossBackward0>)
tensor(2.8523, grad_fn=<NllLossBackward0>)
tensor(2.6242, grad_fn=<NllLossBackward0>)
tensor(2.6708, grad_fn=<NllLossBackward0>)
tensor(2.2182, grad_fn=<NllLossBackward0>)
tensor(2.6604, grad_fn=<NllLossBackward0>)
tensor(2.51

tensor(1.7057, grad_fn=<NllLossBackward0>)
tensor(2.0193, grad_fn=<NllLossBackward0>)
tensor(3.5196, grad_fn=<NllLossBackward0>)
tensor(2.3236, grad_fn=<NllLossBackward0>)
tensor(2.6405, grad_fn=<NllLossBackward0>)
tensor(3.1104, grad_fn=<NllLossBackward0>)
tensor(2.5476, grad_fn=<NllLossBackward0>)
tensor(2.9893, grad_fn=<NllLossBackward0>)
tensor(2.4961, grad_fn=<NllLossBackward0>)
tensor(2.4394, grad_fn=<NllLossBackward0>)
tensor(1.8577, grad_fn=<NllLossBackward0>)
tensor(1.8923, grad_fn=<NllLossBackward0>)
tensor(3.3142, grad_fn=<NllLossBackward0>)
tensor(1.9020, grad_fn=<NllLossBackward0>)
tensor(2.7202, grad_fn=<NllLossBackward0>)
tensor(2.0831, grad_fn=<NllLossBackward0>)
tensor(2.2144, grad_fn=<NllLossBackward0>)
tensor(1.8726, grad_fn=<NllLossBackward0>)
tensor(2.6179, grad_fn=<NllLossBackward0>)
tensor(3.3545, grad_fn=<NllLossBackward0>)
tensor(1.7136, grad_fn=<NllLossBackward0>)
tensor(2.1240, grad_fn=<NllLossBackward0>)
tensor(1.6962, grad_fn=<NllLossBackward0>)
tensor(4.29

tensor(2.3540, grad_fn=<NllLossBackward0>)
tensor(2.6964, grad_fn=<NllLossBackward0>)
tensor(2.1607, grad_fn=<NllLossBackward0>)
tensor(2.1990, grad_fn=<NllLossBackward0>)
tensor(1.7894, grad_fn=<NllLossBackward0>)
tensor(3.9732, grad_fn=<NllLossBackward0>)
tensor(1.9325, grad_fn=<NllLossBackward0>)
tensor(2.5524, grad_fn=<NllLossBackward0>)
tensor(2.6299, grad_fn=<NllLossBackward0>)
tensor(2.4798, grad_fn=<NllLossBackward0>)
tensor(2.9829, grad_fn=<NllLossBackward0>)
tensor(2.9106, grad_fn=<NllLossBackward0>)
tensor(3.3317, grad_fn=<NllLossBackward0>)
tensor(2.9968, grad_fn=<NllLossBackward0>)
tensor(2.6692, grad_fn=<NllLossBackward0>)
tensor(4.6705, grad_fn=<NllLossBackward0>)
tensor(2.4000, grad_fn=<NllLossBackward0>)
tensor(1.9294, grad_fn=<NllLossBackward0>)
tensor(2.5444, grad_fn=<NllLossBackward0>)
tensor(2.0631, grad_fn=<NllLossBackward0>)
tensor(2.3126, grad_fn=<NllLossBackward0>)
tensor(2.2795, grad_fn=<NllLossBackward0>)
tensor(2.3042, grad_fn=<NllLossBackward0>)
tensor(2.85

tensor(2.7704, grad_fn=<NllLossBackward0>)
tensor(3.4568, grad_fn=<NllLossBackward0>)
tensor(3.1753, grad_fn=<NllLossBackward0>)
tensor(1.9176, grad_fn=<NllLossBackward0>)
tensor(2.3215, grad_fn=<NllLossBackward0>)
tensor(2.5445, grad_fn=<NllLossBackward0>)
tensor(2.7516, grad_fn=<NllLossBackward0>)
tensor(2.8593, grad_fn=<NllLossBackward0>)
tensor(2.5345, grad_fn=<NllLossBackward0>)
tensor(2.7198, grad_fn=<NllLossBackward0>)
tensor(2.4599, grad_fn=<NllLossBackward0>)
tensor(2.4995, grad_fn=<NllLossBackward0>)
tensor(2.6299, grad_fn=<NllLossBackward0>)
tensor(3.5497, grad_fn=<NllLossBackward0>)
tensor(2.1221, grad_fn=<NllLossBackward0>)
tensor(2.3026, grad_fn=<NllLossBackward0>)
tensor(4.1354, grad_fn=<NllLossBackward0>)
tensor(3.6437, grad_fn=<NllLossBackward0>)
tensor(2.5408, grad_fn=<NllLossBackward0>)
tensor(2.0591, grad_fn=<NllLossBackward0>)
tensor(3.7732, grad_fn=<NllLossBackward0>)
tensor(3.4280, grad_fn=<NllLossBackward0>)
tensor(2.0478, grad_fn=<NllLossBackward0>)
tensor(3.53

tensor(2.2623, grad_fn=<NllLossBackward0>)
tensor(2.9350, grad_fn=<NllLossBackward0>)
tensor(3.0758, grad_fn=<NllLossBackward0>)
tensor(2.3881, grad_fn=<NllLossBackward0>)
tensor(2.8816, grad_fn=<NllLossBackward0>)
tensor(3.3423, grad_fn=<NllLossBackward0>)
tensor(1.9265, grad_fn=<NllLossBackward0>)
tensor(2.7331, grad_fn=<NllLossBackward0>)
tensor(3.6745, grad_fn=<NllLossBackward0>)
tensor(2.5285, grad_fn=<NllLossBackward0>)
tensor(2.5435, grad_fn=<NllLossBackward0>)
tensor(2.9474, grad_fn=<NllLossBackward0>)
tensor(2.3781, grad_fn=<NllLossBackward0>)
tensor(1.9793, grad_fn=<NllLossBackward0>)
tensor(2.2949, grad_fn=<NllLossBackward0>)
tensor(3.4670, grad_fn=<NllLossBackward0>)
tensor(2.7878, grad_fn=<NllLossBackward0>)
tensor(2.1056, grad_fn=<NllLossBackward0>)
tensor(2.0247, grad_fn=<NllLossBackward0>)
tensor(3.8952, grad_fn=<NllLossBackward0>)
tensor(1.8874, grad_fn=<NllLossBackward0>)
tensor(3.2685, grad_fn=<NllLossBackward0>)
tensor(2.0981, grad_fn=<NllLossBackward0>)
tensor(2.61

tensor(2.9528, grad_fn=<NllLossBackward0>)
tensor(2.5485, grad_fn=<NllLossBackward0>)
tensor(3.6091, grad_fn=<NllLossBackward0>)
tensor(2.9298, grad_fn=<NllLossBackward0>)
tensor(2.5463, grad_fn=<NllLossBackward0>)
tensor(3.7722, grad_fn=<NllLossBackward0>)
tensor(2.6820, grad_fn=<NllLossBackward0>)
tensor(2.3652, grad_fn=<NllLossBackward0>)
tensor(1.7851, grad_fn=<NllLossBackward0>)
tensor(2.9425, grad_fn=<NllLossBackward0>)
tensor(2.7206, grad_fn=<NllLossBackward0>)
tensor(2.6667, grad_fn=<NllLossBackward0>)
tensor(4.1013, grad_fn=<NllLossBackward0>)
tensor(2.0231, grad_fn=<NllLossBackward0>)
tensor(2.5401, grad_fn=<NllLossBackward0>)
tensor(2.0853, grad_fn=<NllLossBackward0>)
tensor(1.9824, grad_fn=<NllLossBackward0>)
tensor(4.0844, grad_fn=<NllLossBackward0>)
tensor(3.6662, grad_fn=<NllLossBackward0>)
tensor(2.7780, grad_fn=<NllLossBackward0>)
tensor(3.6611, grad_fn=<NllLossBackward0>)
tensor(2.6429, grad_fn=<NllLossBackward0>)
tensor(2.8802, grad_fn=<NllLossBackward0>)
tensor(3.48

tensor(2.3165, grad_fn=<NllLossBackward0>)
tensor(2.9195, grad_fn=<NllLossBackward0>)
tensor(3.0606, grad_fn=<NllLossBackward0>)
tensor(2.5813, grad_fn=<NllLossBackward0>)
tensor(2.2018, grad_fn=<NllLossBackward0>)
tensor(2.0052, grad_fn=<NllLossBackward0>)
tensor(2.0322, grad_fn=<NllLossBackward0>)
tensor(3.2311, grad_fn=<NllLossBackward0>)
tensor(3.5074, grad_fn=<NllLossBackward0>)
tensor(2.0838, grad_fn=<NllLossBackward0>)
tensor(2.5573, grad_fn=<NllLossBackward0>)
tensor(2.6069, grad_fn=<NllLossBackward0>)
tensor(3.9966, grad_fn=<NllLossBackward0>)
tensor(2.3134, grad_fn=<NllLossBackward0>)
tensor(2.8065, grad_fn=<NllLossBackward0>)
tensor(3.3450, grad_fn=<NllLossBackward0>)
tensor(2.7960, grad_fn=<NllLossBackward0>)
tensor(1.9082, grad_fn=<NllLossBackward0>)
tensor(3.1997, grad_fn=<NllLossBackward0>)
tensor(3.1224, grad_fn=<NllLossBackward0>)
tensor(2.6196, grad_fn=<NllLossBackward0>)
tensor(3.5011, grad_fn=<NllLossBackward0>)
tensor(2.7371, grad_fn=<NllLossBackward0>)
tensor(3.25

tensor(2.2710, grad_fn=<NllLossBackward0>)
tensor(1.6598, grad_fn=<NllLossBackward0>)
tensor(3.7371, grad_fn=<NllLossBackward0>)
tensor(1.9524, grad_fn=<NllLossBackward0>)
tensor(3.3389, grad_fn=<NllLossBackward0>)
tensor(2.7312, grad_fn=<NllLossBackward0>)
tensor(3.7307, grad_fn=<NllLossBackward0>)
tensor(2.8301, grad_fn=<NllLossBackward0>)
tensor(2.9252, grad_fn=<NllLossBackward0>)
tensor(2.5281, grad_fn=<NllLossBackward0>)
tensor(2.4205, grad_fn=<NllLossBackward0>)
tensor(1.7626, grad_fn=<NllLossBackward0>)
tensor(2.2019, grad_fn=<NllLossBackward0>)
tensor(2.2151, grad_fn=<NllLossBackward0>)
tensor(2.4543, grad_fn=<NllLossBackward0>)
tensor(1.8774, grad_fn=<NllLossBackward0>)
tensor(2.9645, grad_fn=<NllLossBackward0>)
tensor(3.1622, grad_fn=<NllLossBackward0>)
tensor(2.3310, grad_fn=<NllLossBackward0>)
tensor(2.0242, grad_fn=<NllLossBackward0>)
tensor(4.0163, grad_fn=<NllLossBackward0>)
tensor(4.0030, grad_fn=<NllLossBackward0>)
tensor(2.1240, grad_fn=<NllLossBackward0>)
tensor(2.11

tensor(2.6035, grad_fn=<NllLossBackward0>)
tensor(3.0283, grad_fn=<NllLossBackward0>)
tensor(2.2164, grad_fn=<NllLossBackward0>)
tensor(1.9540, grad_fn=<NllLossBackward0>)
tensor(2.5267, grad_fn=<NllLossBackward0>)
tensor(3.0052, grad_fn=<NllLossBackward0>)
tensor(2.4616, grad_fn=<NllLossBackward0>)
tensor(3.3676, grad_fn=<NllLossBackward0>)
tensor(2.1114, grad_fn=<NllLossBackward0>)
tensor(2.7429, grad_fn=<NllLossBackward0>)
tensor(2.9555, grad_fn=<NllLossBackward0>)
tensor(2.4931, grad_fn=<NllLossBackward0>)
tensor(1.9440, grad_fn=<NllLossBackward0>)
tensor(2.7929, grad_fn=<NllLossBackward0>)
tensor(2.8402, grad_fn=<NllLossBackward0>)
tensor(2.2715, grad_fn=<NllLossBackward0>)
tensor(2.1105, grad_fn=<NllLossBackward0>)
tensor(2.7594, grad_fn=<NllLossBackward0>)
tensor(2.9813, grad_fn=<NllLossBackward0>)
tensor(2.1621, grad_fn=<NllLossBackward0>)
tensor(4.0890, grad_fn=<NllLossBackward0>)
tensor(2.4298, grad_fn=<NllLossBackward0>)
tensor(4.5241, grad_fn=<NllLossBackward0>)
tensor(2.37

tensor(2.8883, grad_fn=<NllLossBackward0>)
tensor(2.5345, grad_fn=<NllLossBackward0>)
tensor(2.3234, grad_fn=<NllLossBackward0>)
tensor(2.5673, grad_fn=<NllLossBackward0>)
tensor(3.0376, grad_fn=<NllLossBackward0>)
tensor(2.8830, grad_fn=<NllLossBackward0>)
tensor(3.7914, grad_fn=<NllLossBackward0>)
tensor(3.2079, grad_fn=<NllLossBackward0>)
tensor(4.5458, grad_fn=<NllLossBackward0>)
tensor(3.6137, grad_fn=<NllLossBackward0>)
tensor(1.6891, grad_fn=<NllLossBackward0>)
tensor(1.7193, grad_fn=<NllLossBackward0>)
tensor(2.9909, grad_fn=<NllLossBackward0>)
tensor(2.2158, grad_fn=<NllLossBackward0>)
tensor(1.9844, grad_fn=<NllLossBackward0>)
tensor(2.4557, grad_fn=<NllLossBackward0>)
tensor(3.2376, grad_fn=<NllLossBackward0>)
tensor(3.3109, grad_fn=<NllLossBackward0>)
tensor(1.8467, grad_fn=<NllLossBackward0>)
tensor(2.5940, grad_fn=<NllLossBackward0>)
tensor(2.9166, grad_fn=<NllLossBackward0>)
tensor(2.2606, grad_fn=<NllLossBackward0>)
tensor(4.2192, grad_fn=<NllLossBackward0>)
tensor(1.86

tensor(1.8130, grad_fn=<NllLossBackward0>)
tensor(3.6244, grad_fn=<NllLossBackward0>)
tensor(3.9057, grad_fn=<NllLossBackward0>)
tensor(2.0663, grad_fn=<NllLossBackward0>)
tensor(2.4786, grad_fn=<NllLossBackward0>)
tensor(3.3564, grad_fn=<NllLossBackward0>)
tensor(4.0110, grad_fn=<NllLossBackward0>)
tensor(2.4006, grad_fn=<NllLossBackward0>)
tensor(2.1857, grad_fn=<NllLossBackward0>)
tensor(2.7326, grad_fn=<NllLossBackward0>)
tensor(2.5517, grad_fn=<NllLossBackward0>)
tensor(1.7689, grad_fn=<NllLossBackward0>)
tensor(3.4656, grad_fn=<NllLossBackward0>)
tensor(2.4412, grad_fn=<NllLossBackward0>)
tensor(1.7453, grad_fn=<NllLossBackward0>)
tensor(3.5637, grad_fn=<NllLossBackward0>)
tensor(1.8519, grad_fn=<NllLossBackward0>)
tensor(3.2450, grad_fn=<NllLossBackward0>)
tensor(2.2792, grad_fn=<NllLossBackward0>)
tensor(1.8980, grad_fn=<NllLossBackward0>)
tensor(2.6985, grad_fn=<NllLossBackward0>)
tensor(2.9400, grad_fn=<NllLossBackward0>)
tensor(2.0475, grad_fn=<NllLossBackward0>)
tensor(3.58

tensor(2.5300, grad_fn=<NllLossBackward0>)
tensor(1.9033, grad_fn=<NllLossBackward0>)
tensor(2.1632, grad_fn=<NllLossBackward0>)
tensor(1.8824, grad_fn=<NllLossBackward0>)
tensor(2.9535, grad_fn=<NllLossBackward0>)
tensor(2.8839, grad_fn=<NllLossBackward0>)
tensor(3.9536, grad_fn=<NllLossBackward0>)
tensor(3.3578, grad_fn=<NllLossBackward0>)
tensor(3.8546, grad_fn=<NllLossBackward0>)
tensor(3.8022, grad_fn=<NllLossBackward0>)
tensor(3.2190, grad_fn=<NllLossBackward0>)
tensor(2.9410, grad_fn=<NllLossBackward0>)
tensor(3.3647, grad_fn=<NllLossBackward0>)
tensor(2.4060, grad_fn=<NllLossBackward0>)
tensor(2.2595, grad_fn=<NllLossBackward0>)
tensor(2.0522, grad_fn=<NllLossBackward0>)
tensor(2.3247, grad_fn=<NllLossBackward0>)
tensor(3.5133, grad_fn=<NllLossBackward0>)
tensor(1.9546, grad_fn=<NllLossBackward0>)
tensor(2.9472, grad_fn=<NllLossBackward0>)
tensor(3.5566, grad_fn=<NllLossBackward0>)
tensor(2.4301, grad_fn=<NllLossBackward0>)
tensor(3.3886, grad_fn=<NllLossBackward0>)
tensor(2.04

tensor(2.2555, grad_fn=<NllLossBackward0>)
tensor(2.9257, grad_fn=<NllLossBackward0>)
tensor(2.7584, grad_fn=<NllLossBackward0>)
tensor(4.0482, grad_fn=<NllLossBackward0>)
tensor(4.4384, grad_fn=<NllLossBackward0>)
tensor(2.6008, grad_fn=<NllLossBackward0>)
tensor(3.2958, grad_fn=<NllLossBackward0>)
tensor(2.6770, grad_fn=<NllLossBackward0>)
tensor(2.6699, grad_fn=<NllLossBackward0>)
tensor(2.6055, grad_fn=<NllLossBackward0>)
tensor(2.9101, grad_fn=<NllLossBackward0>)
tensor(2.1133, grad_fn=<NllLossBackward0>)
tensor(2.2527, grad_fn=<NllLossBackward0>)
tensor(3.6487, grad_fn=<NllLossBackward0>)
tensor(2.4227, grad_fn=<NllLossBackward0>)
tensor(2.9928, grad_fn=<NllLossBackward0>)
tensor(2.3491, grad_fn=<NllLossBackward0>)
tensor(2.8300, grad_fn=<NllLossBackward0>)
tensor(2.9314, grad_fn=<NllLossBackward0>)
tensor(2.5298, grad_fn=<NllLossBackward0>)
tensor(3.0649, grad_fn=<NllLossBackward0>)
tensor(4.2968, grad_fn=<NllLossBackward0>)
tensor(3.1472, grad_fn=<NllLossBackward0>)
tensor(3.00

tensor(2.8712, grad_fn=<NllLossBackward0>)
tensor(3.5675, grad_fn=<NllLossBackward0>)
tensor(1.8975, grad_fn=<NllLossBackward0>)
tensor(2.9554, grad_fn=<NllLossBackward0>)
tensor(3.2398, grad_fn=<NllLossBackward0>)
tensor(1.9426, grad_fn=<NllLossBackward0>)
tensor(2.1805, grad_fn=<NllLossBackward0>)
tensor(2.9352, grad_fn=<NllLossBackward0>)
tensor(3.1160, grad_fn=<NllLossBackward0>)
tensor(2.2247, grad_fn=<NllLossBackward0>)
tensor(1.9960, grad_fn=<NllLossBackward0>)
tensor(2.2070, grad_fn=<NllLossBackward0>)
tensor(2.7940, grad_fn=<NllLossBackward0>)
tensor(2.8894, grad_fn=<NllLossBackward0>)
tensor(1.8011, grad_fn=<NllLossBackward0>)
tensor(2.1239, grad_fn=<NllLossBackward0>)
tensor(3.1253, grad_fn=<NllLossBackward0>)
tensor(1.9162, grad_fn=<NllLossBackward0>)
tensor(3.7062, grad_fn=<NllLossBackward0>)
tensor(2.1608, grad_fn=<NllLossBackward0>)
tensor(2.2742, grad_fn=<NllLossBackward0>)
tensor(2.8768, grad_fn=<NllLossBackward0>)
tensor(2.2238, grad_fn=<NllLossBackward0>)
tensor(3.62

tensor(2.3205, grad_fn=<NllLossBackward0>)
tensor(2.0096, grad_fn=<NllLossBackward0>)
tensor(2.2019, grad_fn=<NllLossBackward0>)
tensor(2.5361, grad_fn=<NllLossBackward0>)
tensor(2.7214, grad_fn=<NllLossBackward0>)
tensor(2.5083, grad_fn=<NllLossBackward0>)
tensor(3.0151, grad_fn=<NllLossBackward0>)
tensor(2.6252, grad_fn=<NllLossBackward0>)
tensor(2.4771, grad_fn=<NllLossBackward0>)
tensor(3.8300, grad_fn=<NllLossBackward0>)
tensor(3.0447, grad_fn=<NllLossBackward0>)
tensor(2.0474, grad_fn=<NllLossBackward0>)
tensor(2.4503, grad_fn=<NllLossBackward0>)
tensor(4.4621, grad_fn=<NllLossBackward0>)
tensor(2.9535, grad_fn=<NllLossBackward0>)
tensor(2.9649, grad_fn=<NllLossBackward0>)
tensor(1.9266, grad_fn=<NllLossBackward0>)
tensor(3.6961, grad_fn=<NllLossBackward0>)
tensor(2.2759, grad_fn=<NllLossBackward0>)
tensor(1.9542, grad_fn=<NllLossBackward0>)
tensor(1.8901, grad_fn=<NllLossBackward0>)
tensor(3.5124, grad_fn=<NllLossBackward0>)
tensor(2.1225, grad_fn=<NllLossBackward0>)
tensor(3.23

tensor(2.9970, grad_fn=<NllLossBackward0>)
tensor(2.0565, grad_fn=<NllLossBackward0>)
tensor(2.3934, grad_fn=<NllLossBackward0>)
tensor(3.4691, grad_fn=<NllLossBackward0>)
tensor(3.1796, grad_fn=<NllLossBackward0>)
tensor(1.7172, grad_fn=<NllLossBackward0>)
tensor(2.7654, grad_fn=<NllLossBackward0>)
tensor(2.7790, grad_fn=<NllLossBackward0>)
tensor(2.8643, grad_fn=<NllLossBackward0>)
tensor(3.2819, grad_fn=<NllLossBackward0>)
tensor(1.9337, grad_fn=<NllLossBackward0>)
tensor(2.3232, grad_fn=<NllLossBackward0>)
tensor(1.9198, grad_fn=<NllLossBackward0>)
tensor(2.9760, grad_fn=<NllLossBackward0>)
tensor(4.3011, grad_fn=<NllLossBackward0>)
tensor(2.2378, grad_fn=<NllLossBackward0>)
tensor(3.9446, grad_fn=<NllLossBackward0>)
tensor(2.5031, grad_fn=<NllLossBackward0>)
tensor(2.9887, grad_fn=<NllLossBackward0>)
tensor(2.2812, grad_fn=<NllLossBackward0>)
tensor(2.3104, grad_fn=<NllLossBackward0>)
tensor(1.8627, grad_fn=<NllLossBackward0>)
tensor(3.6348, grad_fn=<NllLossBackward0>)
tensor(2.44

tensor(3.7956, grad_fn=<NllLossBackward0>)
tensor(2.9119, grad_fn=<NllLossBackward0>)
tensor(3.0673, grad_fn=<NllLossBackward0>)
tensor(2.7456, grad_fn=<NllLossBackward0>)
tensor(2.7900, grad_fn=<NllLossBackward0>)
tensor(2.2072, grad_fn=<NllLossBackward0>)
tensor(2.6064, grad_fn=<NllLossBackward0>)
tensor(4.1023, grad_fn=<NllLossBackward0>)
tensor(2.7791, grad_fn=<NllLossBackward0>)
tensor(3.5412, grad_fn=<NllLossBackward0>)
tensor(2.1833, grad_fn=<NllLossBackward0>)
tensor(2.1243, grad_fn=<NllLossBackward0>)
tensor(2.5559, grad_fn=<NllLossBackward0>)
tensor(2.3513, grad_fn=<NllLossBackward0>)
tensor(4.5609, grad_fn=<NllLossBackward0>)
tensor(3.2526, grad_fn=<NllLossBackward0>)
tensor(2.3578, grad_fn=<NllLossBackward0>)
tensor(2.6789, grad_fn=<NllLossBackward0>)
tensor(2.4534, grad_fn=<NllLossBackward0>)
tensor(3.9870, grad_fn=<NllLossBackward0>)
tensor(2.4973, grad_fn=<NllLossBackward0>)
tensor(2.0266, grad_fn=<NllLossBackward0>)
tensor(2.7558, grad_fn=<NllLossBackward0>)
tensor(3.52

tensor(2.4521, grad_fn=<NllLossBackward0>)
tensor(3.3326, grad_fn=<NllLossBackward0>)
tensor(3.3283, grad_fn=<NllLossBackward0>)
tensor(3.0685, grad_fn=<NllLossBackward0>)
tensor(2.1819, grad_fn=<NllLossBackward0>)
tensor(1.9915, grad_fn=<NllLossBackward0>)
tensor(2.2143, grad_fn=<NllLossBackward0>)
tensor(4.0107, grad_fn=<NllLossBackward0>)
tensor(2.6123, grad_fn=<NllLossBackward0>)
tensor(3.5041, grad_fn=<NllLossBackward0>)
tensor(2.8644, grad_fn=<NllLossBackward0>)
tensor(2.4412, grad_fn=<NllLossBackward0>)
tensor(2.3105, grad_fn=<NllLossBackward0>)
tensor(2.9728, grad_fn=<NllLossBackward0>)
tensor(1.7002, grad_fn=<NllLossBackward0>)
tensor(2.0158, grad_fn=<NllLossBackward0>)
tensor(1.8261, grad_fn=<NllLossBackward0>)
tensor(2.5582, grad_fn=<NllLossBackward0>)
tensor(1.7672, grad_fn=<NllLossBackward0>)
tensor(2.5658, grad_fn=<NllLossBackward0>)
tensor(2.8935, grad_fn=<NllLossBackward0>)
tensor(2.9305, grad_fn=<NllLossBackward0>)
tensor(3.2799, grad_fn=<NllLossBackward0>)
tensor(3.38

tensor(2.8794, grad_fn=<NllLossBackward0>)
tensor(3.8001, grad_fn=<NllLossBackward0>)
tensor(2.8791, grad_fn=<NllLossBackward0>)
tensor(3.5303, grad_fn=<NllLossBackward0>)
tensor(2.9495, grad_fn=<NllLossBackward0>)
tensor(2.9111, grad_fn=<NllLossBackward0>)
tensor(3.0094, grad_fn=<NllLossBackward0>)
tensor(2.2254, grad_fn=<NllLossBackward0>)
tensor(3.3061, grad_fn=<NllLossBackward0>)
tensor(2.8952, grad_fn=<NllLossBackward0>)
tensor(2.1184, grad_fn=<NllLossBackward0>)
tensor(2.4755, grad_fn=<NllLossBackward0>)
tensor(1.9110, grad_fn=<NllLossBackward0>)
tensor(3.4941, grad_fn=<NllLossBackward0>)
tensor(2.7039, grad_fn=<NllLossBackward0>)
tensor(2.9747, grad_fn=<NllLossBackward0>)
tensor(2.1984, grad_fn=<NllLossBackward0>)
tensor(3.8745, grad_fn=<NllLossBackward0>)
tensor(2.3591, grad_fn=<NllLossBackward0>)
tensor(2.0170, grad_fn=<NllLossBackward0>)
tensor(2.9470, grad_fn=<NllLossBackward0>)
tensor(3.0594, grad_fn=<NllLossBackward0>)
tensor(2.7035, grad_fn=<NllLossBackward0>)
tensor(2.86

tensor(1.8220, grad_fn=<NllLossBackward0>)
tensor(2.8708, grad_fn=<NllLossBackward0>)
tensor(2.4927, grad_fn=<NllLossBackward0>)
tensor(3.6493, grad_fn=<NllLossBackward0>)
tensor(4.4900, grad_fn=<NllLossBackward0>)
tensor(2.0791, grad_fn=<NllLossBackward0>)
tensor(2.5544, grad_fn=<NllLossBackward0>)
tensor(3.2235, grad_fn=<NllLossBackward0>)
tensor(2.7008, grad_fn=<NllLossBackward0>)
tensor(1.6862, grad_fn=<NllLossBackward0>)
tensor(2.0680, grad_fn=<NllLossBackward0>)
tensor(2.8555, grad_fn=<NllLossBackward0>)
tensor(2.3249, grad_fn=<NllLossBackward0>)
tensor(1.9332, grad_fn=<NllLossBackward0>)
tensor(4.0991, grad_fn=<NllLossBackward0>)
tensor(1.7891, grad_fn=<NllLossBackward0>)
tensor(2.5361, grad_fn=<NllLossBackward0>)
tensor(2.8786, grad_fn=<NllLossBackward0>)
tensor(2.3925, grad_fn=<NllLossBackward0>)
tensor(2.3481, grad_fn=<NllLossBackward0>)
tensor(2.9942, grad_fn=<NllLossBackward0>)
tensor(2.5463, grad_fn=<NllLossBackward0>)
tensor(1.8683, grad_fn=<NllLossBackward0>)
tensor(4.50

tensor(3.6982, grad_fn=<NllLossBackward0>)
tensor(2.1804, grad_fn=<NllLossBackward0>)
tensor(2.0486, grad_fn=<NllLossBackward0>)
tensor(1.8444, grad_fn=<NllLossBackward0>)
tensor(2.8700, grad_fn=<NllLossBackward0>)
tensor(2.4086, grad_fn=<NllLossBackward0>)
tensor(2.2789, grad_fn=<NllLossBackward0>)
tensor(2.1224, grad_fn=<NllLossBackward0>)
tensor(1.8434, grad_fn=<NllLossBackward0>)
tensor(2.2849, grad_fn=<NllLossBackward0>)
tensor(2.4185, grad_fn=<NllLossBackward0>)
tensor(3.5183, grad_fn=<NllLossBackward0>)
tensor(2.7246, grad_fn=<NllLossBackward0>)
tensor(2.5842, grad_fn=<NllLossBackward0>)
tensor(2.0569, grad_fn=<NllLossBackward0>)
tensor(1.7786, grad_fn=<NllLossBackward0>)
tensor(2.3260, grad_fn=<NllLossBackward0>)
tensor(2.9644, grad_fn=<NllLossBackward0>)
tensor(5.0004, grad_fn=<NllLossBackward0>)
tensor(3.3413, grad_fn=<NllLossBackward0>)
tensor(3.2057, grad_fn=<NllLossBackward0>)
tensor(1.9390, grad_fn=<NllLossBackward0>)
tensor(2.3700, grad_fn=<NllLossBackward0>)
tensor(2.10

tensor(2.4359, grad_fn=<NllLossBackward0>)
tensor(2.3151, grad_fn=<NllLossBackward0>)
tensor(2.9631, grad_fn=<NllLossBackward0>)
tensor(2.9804, grad_fn=<NllLossBackward0>)
tensor(3.4061, grad_fn=<NllLossBackward0>)
tensor(2.2750, grad_fn=<NllLossBackward0>)
tensor(3.5146, grad_fn=<NllLossBackward0>)
tensor(2.4136, grad_fn=<NllLossBackward0>)
tensor(2.9156, grad_fn=<NllLossBackward0>)
tensor(3.5428, grad_fn=<NllLossBackward0>)
tensor(2.7539, grad_fn=<NllLossBackward0>)
tensor(2.9306, grad_fn=<NllLossBackward0>)
tensor(2.8952, grad_fn=<NllLossBackward0>)
tensor(2.2063, grad_fn=<NllLossBackward0>)
tensor(1.7605, grad_fn=<NllLossBackward0>)
tensor(2.7789, grad_fn=<NllLossBackward0>)
tensor(2.8554, grad_fn=<NllLossBackward0>)
tensor(2.3608, grad_fn=<NllLossBackward0>)
tensor(1.7758, grad_fn=<NllLossBackward0>)
tensor(2.9730, grad_fn=<NllLossBackward0>)
tensor(3.3111, grad_fn=<NllLossBackward0>)
tensor(2.4530, grad_fn=<NllLossBackward0>)
tensor(3.2524, grad_fn=<NllLossBackward0>)
tensor(1.96

tensor(3.3306, grad_fn=<NllLossBackward0>)
tensor(3.4500, grad_fn=<NllLossBackward0>)
tensor(3.7181, grad_fn=<NllLossBackward0>)
tensor(3.1697, grad_fn=<NllLossBackward0>)
tensor(2.4603, grad_fn=<NllLossBackward0>)
tensor(2.4158, grad_fn=<NllLossBackward0>)
tensor(1.9281, grad_fn=<NllLossBackward0>)
tensor(2.8280, grad_fn=<NllLossBackward0>)
tensor(3.7214, grad_fn=<NllLossBackward0>)
tensor(1.9465, grad_fn=<NllLossBackward0>)
tensor(3.2995, grad_fn=<NllLossBackward0>)
tensor(1.9841, grad_fn=<NllLossBackward0>)
tensor(2.5013, grad_fn=<NllLossBackward0>)
tensor(3.2924, grad_fn=<NllLossBackward0>)
tensor(3.5849, grad_fn=<NllLossBackward0>)
tensor(2.7949, grad_fn=<NllLossBackward0>)
tensor(4.1495, grad_fn=<NllLossBackward0>)
tensor(3.1061, grad_fn=<NllLossBackward0>)
tensor(2.3848, grad_fn=<NllLossBackward0>)
tensor(2.4879, grad_fn=<NllLossBackward0>)
tensor(1.9792, grad_fn=<NllLossBackward0>)
tensor(2.8393, grad_fn=<NllLossBackward0>)
tensor(2.3977, grad_fn=<NllLossBackward0>)
tensor(3.11

tensor(2.4948, grad_fn=<NllLossBackward0>)
tensor(5.2283, grad_fn=<NllLossBackward0>)
tensor(2.3460, grad_fn=<NllLossBackward0>)
tensor(2.3907, grad_fn=<NllLossBackward0>)
tensor(1.9311, grad_fn=<NllLossBackward0>)
tensor(2.3687, grad_fn=<NllLossBackward0>)
tensor(2.7613, grad_fn=<NllLossBackward0>)
tensor(2.1789, grad_fn=<NllLossBackward0>)
tensor(2.3321, grad_fn=<NllLossBackward0>)
tensor(4.2066, grad_fn=<NllLossBackward0>)
tensor(2.5674, grad_fn=<NllLossBackward0>)
tensor(2.0944, grad_fn=<NllLossBackward0>)
tensor(2.3179, grad_fn=<NllLossBackward0>)
tensor(2.9554, grad_fn=<NllLossBackward0>)
tensor(2.4445, grad_fn=<NllLossBackward0>)
tensor(3.5378, grad_fn=<NllLossBackward0>)
tensor(3.2461, grad_fn=<NllLossBackward0>)
tensor(2.1559, grad_fn=<NllLossBackward0>)
tensor(2.4350, grad_fn=<NllLossBackward0>)
tensor(1.9818, grad_fn=<NllLossBackward0>)
tensor(3.6018, grad_fn=<NllLossBackward0>)
tensor(2.9943, grad_fn=<NllLossBackward0>)
tensor(3.9272, grad_fn=<NllLossBackward0>)
tensor(2.29

tensor(2.7335, grad_fn=<NllLossBackward0>)
tensor(2.0839, grad_fn=<NllLossBackward0>)
tensor(2.3115, grad_fn=<NllLossBackward0>)
tensor(2.3782, grad_fn=<NllLossBackward0>)
tensor(3.6104, grad_fn=<NllLossBackward0>)
tensor(3.3125, grad_fn=<NllLossBackward0>)
tensor(2.1208, grad_fn=<NllLossBackward0>)
tensor(2.5253, grad_fn=<NllLossBackward0>)
tensor(2.2690, grad_fn=<NllLossBackward0>)
tensor(3.3619, grad_fn=<NllLossBackward0>)
tensor(2.7307, grad_fn=<NllLossBackward0>)
tensor(1.7595, grad_fn=<NllLossBackward0>)
tensor(2.3636, grad_fn=<NllLossBackward0>)
tensor(2.3625, grad_fn=<NllLossBackward0>)
tensor(2.5035, grad_fn=<NllLossBackward0>)
tensor(1.8441, grad_fn=<NllLossBackward0>)
tensor(3.3851, grad_fn=<NllLossBackward0>)
tensor(1.9786, grad_fn=<NllLossBackward0>)
tensor(2.9442, grad_fn=<NllLossBackward0>)
tensor(2.2845, grad_fn=<NllLossBackward0>)
tensor(2.3471, grad_fn=<NllLossBackward0>)
tensor(2.8132, grad_fn=<NllLossBackward0>)
tensor(2.0933, grad_fn=<NllLossBackward0>)
tensor(1.97

tensor(2.0513, grad_fn=<NllLossBackward0>)
tensor(3.4983, grad_fn=<NllLossBackward0>)
tensor(2.4648, grad_fn=<NllLossBackward0>)
tensor(2.8198, grad_fn=<NllLossBackward0>)
tensor(2.6652, grad_fn=<NllLossBackward0>)
tensor(3.1909, grad_fn=<NllLossBackward0>)
tensor(1.9821, grad_fn=<NllLossBackward0>)
tensor(2.2528, grad_fn=<NllLossBackward0>)
tensor(2.5750, grad_fn=<NllLossBackward0>)
tensor(2.7651, grad_fn=<NllLossBackward0>)
tensor(3.1155, grad_fn=<NllLossBackward0>)
tensor(1.7331, grad_fn=<NllLossBackward0>)
tensor(2.7699, grad_fn=<NllLossBackward0>)
tensor(2.3828, grad_fn=<NllLossBackward0>)
tensor(2.3582, grad_fn=<NllLossBackward0>)
tensor(4.0256, grad_fn=<NllLossBackward0>)
tensor(2.9679, grad_fn=<NllLossBackward0>)
tensor(2.1420, grad_fn=<NllLossBackward0>)
tensor(2.8403, grad_fn=<NllLossBackward0>)
tensor(2.4598, grad_fn=<NllLossBackward0>)
tensor(3.0021, grad_fn=<NllLossBackward0>)
tensor(2.0785, grad_fn=<NllLossBackward0>)
tensor(2.2827, grad_fn=<NllLossBackward0>)
tensor(2.06

tensor(2.7737, grad_fn=<NllLossBackward0>)
tensor(2.4631, grad_fn=<NllLossBackward0>)
tensor(4.0517, grad_fn=<NllLossBackward0>)
tensor(3.3983, grad_fn=<NllLossBackward0>)
tensor(2.3298, grad_fn=<NllLossBackward0>)
tensor(2.3422, grad_fn=<NllLossBackward0>)
tensor(2.8453, grad_fn=<NllLossBackward0>)
tensor(2.7593, grad_fn=<NllLossBackward0>)
tensor(3.8412, grad_fn=<NllLossBackward0>)
tensor(1.7481, grad_fn=<NllLossBackward0>)
tensor(3.5717, grad_fn=<NllLossBackward0>)
tensor(2.4235, grad_fn=<NllLossBackward0>)
tensor(3.1408, grad_fn=<NllLossBackward0>)
tensor(3.1973, grad_fn=<NllLossBackward0>)
tensor(3.5631, grad_fn=<NllLossBackward0>)
tensor(2.9002, grad_fn=<NllLossBackward0>)
tensor(2.3682, grad_fn=<NllLossBackward0>)
tensor(2.6066, grad_fn=<NllLossBackward0>)
tensor(4.2640, grad_fn=<NllLossBackward0>)
tensor(3.5083, grad_fn=<NllLossBackward0>)
tensor(3.0102, grad_fn=<NllLossBackward0>)
tensor(1.9557, grad_fn=<NllLossBackward0>)
tensor(2.6162, grad_fn=<NllLossBackward0>)
tensor(2.06

tensor(3.4709, grad_fn=<NllLossBackward0>)
tensor(2.6202, grad_fn=<NllLossBackward0>)
tensor(3.0296, grad_fn=<NllLossBackward0>)
tensor(1.9217, grad_fn=<NllLossBackward0>)
tensor(2.6947, grad_fn=<NllLossBackward0>)
tensor(3.7281, grad_fn=<NllLossBackward0>)
tensor(2.8058, grad_fn=<NllLossBackward0>)
tensor(2.6719, grad_fn=<NllLossBackward0>)
tensor(2.7790, grad_fn=<NllLossBackward0>)
tensor(4.4992, grad_fn=<NllLossBackward0>)
tensor(2.6471, grad_fn=<NllLossBackward0>)
tensor(2.9538, grad_fn=<NllLossBackward0>)
tensor(3.0683, grad_fn=<NllLossBackward0>)
tensor(1.6144, grad_fn=<NllLossBackward0>)
tensor(2.8735, grad_fn=<NllLossBackward0>)
tensor(1.7185, grad_fn=<NllLossBackward0>)
tensor(3.4297, grad_fn=<NllLossBackward0>)
tensor(2.1417, grad_fn=<NllLossBackward0>)
tensor(2.5655, grad_fn=<NllLossBackward0>)
tensor(2.3436, grad_fn=<NllLossBackward0>)
tensor(2.0180, grad_fn=<NllLossBackward0>)
tensor(2.9298, grad_fn=<NllLossBackward0>)
tensor(2.0837, grad_fn=<NllLossBackward0>)
tensor(1.80

tensor(2.4278, grad_fn=<NllLossBackward0>)
tensor(3.3441, grad_fn=<NllLossBackward0>)
tensor(3.3294, grad_fn=<NllLossBackward0>)
tensor(3.3576, grad_fn=<NllLossBackward0>)
tensor(2.9984, grad_fn=<NllLossBackward0>)
tensor(2.8669, grad_fn=<NllLossBackward0>)
tensor(3.0004, grad_fn=<NllLossBackward0>)
tensor(1.8910, grad_fn=<NllLossBackward0>)
tensor(3.3266, grad_fn=<NllLossBackward0>)
tensor(3.5726, grad_fn=<NllLossBackward0>)
tensor(2.5547, grad_fn=<NllLossBackward0>)
tensor(2.4318, grad_fn=<NllLossBackward0>)
tensor(2.2538, grad_fn=<NllLossBackward0>)
tensor(2.2367, grad_fn=<NllLossBackward0>)
tensor(2.8674, grad_fn=<NllLossBackward0>)
tensor(3.2736, grad_fn=<NllLossBackward0>)
tensor(2.3049, grad_fn=<NllLossBackward0>)
tensor(2.5842, grad_fn=<NllLossBackward0>)
tensor(2.7437, grad_fn=<NllLossBackward0>)
tensor(2.5902, grad_fn=<NllLossBackward0>)
tensor(2.7675, grad_fn=<NllLossBackward0>)
tensor(2.0130, grad_fn=<NllLossBackward0>)
tensor(2.5113, grad_fn=<NllLossBackward0>)
tensor(3.34

tensor(3.1223, grad_fn=<NllLossBackward0>)
tensor(2.4770, grad_fn=<NllLossBackward0>)
tensor(2.8509, grad_fn=<NllLossBackward0>)
tensor(2.5843, grad_fn=<NllLossBackward0>)
tensor(2.2972, grad_fn=<NllLossBackward0>)
tensor(4.0362, grad_fn=<NllLossBackward0>)
tensor(3.4194, grad_fn=<NllLossBackward0>)
tensor(2.5370, grad_fn=<NllLossBackward0>)
tensor(3.4962, grad_fn=<NllLossBackward0>)
tensor(2.8093, grad_fn=<NllLossBackward0>)
tensor(3.7859, grad_fn=<NllLossBackward0>)
tensor(2.2679, grad_fn=<NllLossBackward0>)
tensor(2.1591, grad_fn=<NllLossBackward0>)
tensor(2.5548, grad_fn=<NllLossBackward0>)
tensor(1.8394, grad_fn=<NllLossBackward0>)
tensor(2.4978, grad_fn=<NllLossBackward0>)
tensor(2.3818, grad_fn=<NllLossBackward0>)
tensor(2.2515, grad_fn=<NllLossBackward0>)
tensor(2.7240, grad_fn=<NllLossBackward0>)
tensor(3.1512, grad_fn=<NllLossBackward0>)
tensor(2.4519, grad_fn=<NllLossBackward0>)
tensor(2.8797, grad_fn=<NllLossBackward0>)
tensor(3.6753, grad_fn=<NllLossBackward0>)
tensor(1.75

tensor(1.9096, grad_fn=<NllLossBackward0>)
tensor(2.2586, grad_fn=<NllLossBackward0>)
tensor(2.6037, grad_fn=<NllLossBackward0>)
tensor(2.3987, grad_fn=<NllLossBackward0>)
tensor(2.5733, grad_fn=<NllLossBackward0>)
tensor(3.9535, grad_fn=<NllLossBackward0>)
tensor(3.0775, grad_fn=<NllLossBackward0>)
tensor(1.7394, grad_fn=<NllLossBackward0>)
tensor(4.2564, grad_fn=<NllLossBackward0>)
tensor(2.4873, grad_fn=<NllLossBackward0>)
tensor(1.7960, grad_fn=<NllLossBackward0>)
tensor(2.9733, grad_fn=<NllLossBackward0>)
tensor(2.1698, grad_fn=<NllLossBackward0>)
tensor(2.3041, grad_fn=<NllLossBackward0>)
tensor(2.5510, grad_fn=<NllLossBackward0>)
tensor(3.3878, grad_fn=<NllLossBackward0>)
tensor(1.9321, grad_fn=<NllLossBackward0>)
tensor(2.3020, grad_fn=<NllLossBackward0>)
tensor(3.1260, grad_fn=<NllLossBackward0>)
tensor(2.7608, grad_fn=<NllLossBackward0>)
tensor(2.9523, grad_fn=<NllLossBackward0>)
tensor(2.2499, grad_fn=<NllLossBackward0>)
tensor(2.4928, grad_fn=<NllLossBackward0>)
tensor(4.94

tensor(4.0341, grad_fn=<NllLossBackward0>)
tensor(1.8361, grad_fn=<NllLossBackward0>)
tensor(3.8423, grad_fn=<NllLossBackward0>)
tensor(2.0562, grad_fn=<NllLossBackward0>)
tensor(3.9707, grad_fn=<NllLossBackward0>)
tensor(2.5978, grad_fn=<NllLossBackward0>)
tensor(2.8330, grad_fn=<NllLossBackward0>)
tensor(2.5922, grad_fn=<NllLossBackward0>)
tensor(4.0391, grad_fn=<NllLossBackward0>)
tensor(3.7518, grad_fn=<NllLossBackward0>)
tensor(4.0549, grad_fn=<NllLossBackward0>)
tensor(2.5523, grad_fn=<NllLossBackward0>)
tensor(2.4812, grad_fn=<NllLossBackward0>)
tensor(2.8304, grad_fn=<NllLossBackward0>)
tensor(2.0815, grad_fn=<NllLossBackward0>)
tensor(2.7739, grad_fn=<NllLossBackward0>)
tensor(3.4649, grad_fn=<NllLossBackward0>)
tensor(1.8931, grad_fn=<NllLossBackward0>)
tensor(1.9388, grad_fn=<NllLossBackward0>)
tensor(1.8664, grad_fn=<NllLossBackward0>)
tensor(3.4580, grad_fn=<NllLossBackward0>)
tensor(2.2902, grad_fn=<NllLossBackward0>)
tensor(3.6736, grad_fn=<NllLossBackward0>)
tensor(2.35

tensor(2.7136, grad_fn=<NllLossBackward0>)
tensor(3.2811, grad_fn=<NllLossBackward0>)
tensor(2.5139, grad_fn=<NllLossBackward0>)
tensor(2.4889, grad_fn=<NllLossBackward0>)
tensor(2.2894, grad_fn=<NllLossBackward0>)
tensor(2.8661, grad_fn=<NllLossBackward0>)
tensor(4.7984, grad_fn=<NllLossBackward0>)
tensor(1.9842, grad_fn=<NllLossBackward0>)
tensor(1.8841, grad_fn=<NllLossBackward0>)
tensor(2.5239, grad_fn=<NllLossBackward0>)
tensor(2.6254, grad_fn=<NllLossBackward0>)
tensor(2.2687, grad_fn=<NllLossBackward0>)
tensor(1.9927, grad_fn=<NllLossBackward0>)
tensor(2.0078, grad_fn=<NllLossBackward0>)
tensor(2.3144, grad_fn=<NllLossBackward0>)
tensor(2.0831, grad_fn=<NllLossBackward0>)
tensor(2.9147, grad_fn=<NllLossBackward0>)
tensor(2.4232, grad_fn=<NllLossBackward0>)
tensor(3.7394, grad_fn=<NllLossBackward0>)
tensor(2.6517, grad_fn=<NllLossBackward0>)
tensor(2.2468, grad_fn=<NllLossBackward0>)
tensor(2.6196, grad_fn=<NllLossBackward0>)
tensor(2.9762, grad_fn=<NllLossBackward0>)
tensor(2.51

tensor(2.5846, grad_fn=<NllLossBackward0>)
tensor(2.5927, grad_fn=<NllLossBackward0>)
tensor(2.2986, grad_fn=<NllLossBackward0>)
tensor(4.2813, grad_fn=<NllLossBackward0>)
tensor(2.3550, grad_fn=<NllLossBackward0>)
tensor(2.1079, grad_fn=<NllLossBackward0>)
tensor(2.8956, grad_fn=<NllLossBackward0>)
tensor(3.1344, grad_fn=<NllLossBackward0>)
tensor(2.5754, grad_fn=<NllLossBackward0>)
tensor(2.5977, grad_fn=<NllLossBackward0>)
tensor(1.7258, grad_fn=<NllLossBackward0>)
tensor(2.6407, grad_fn=<NllLossBackward0>)
tensor(4.0507, grad_fn=<NllLossBackward0>)
tensor(2.5652, grad_fn=<NllLossBackward0>)
tensor(2.3186, grad_fn=<NllLossBackward0>)
tensor(2.2384, grad_fn=<NllLossBackward0>)
tensor(4.0335, grad_fn=<NllLossBackward0>)
tensor(2.4895, grad_fn=<NllLossBackward0>)
tensor(2.5108, grad_fn=<NllLossBackward0>)
tensor(2.9552, grad_fn=<NllLossBackward0>)
tensor(3.5142, grad_fn=<NllLossBackward0>)
tensor(2.7967, grad_fn=<NllLossBackward0>)
tensor(1.9881, grad_fn=<NllLossBackward0>)
tensor(2.98

tensor(2.5270, grad_fn=<NllLossBackward0>)
tensor(1.6761, grad_fn=<NllLossBackward0>)
tensor(2.0752, grad_fn=<NllLossBackward0>)
tensor(2.3876, grad_fn=<NllLossBackward0>)
tensor(3.1368, grad_fn=<NllLossBackward0>)
tensor(2.1037, grad_fn=<NllLossBackward0>)
tensor(2.6084, grad_fn=<NllLossBackward0>)
tensor(2.1256, grad_fn=<NllLossBackward0>)
tensor(2.0829, grad_fn=<NllLossBackward0>)
tensor(3.7284, grad_fn=<NllLossBackward0>)
tensor(3.3051, grad_fn=<NllLossBackward0>)
tensor(2.2894, grad_fn=<NllLossBackward0>)
tensor(2.3933, grad_fn=<NllLossBackward0>)
tensor(2.9617, grad_fn=<NllLossBackward0>)
tensor(1.9248, grad_fn=<NllLossBackward0>)
tensor(2.3127, grad_fn=<NllLossBackward0>)
tensor(2.2988, grad_fn=<NllLossBackward0>)
tensor(2.5551, grad_fn=<NllLossBackward0>)
tensor(2.5561, grad_fn=<NllLossBackward0>)
tensor(2.0461, grad_fn=<NllLossBackward0>)
tensor(2.2972, grad_fn=<NllLossBackward0>)
tensor(2.4628, grad_fn=<NllLossBackward0>)
tensor(3.2212, grad_fn=<NllLossBackward0>)
tensor(2.36

tensor(4.4684, grad_fn=<NllLossBackward0>)
tensor(1.8725, grad_fn=<NllLossBackward0>)
tensor(1.9677, grad_fn=<NllLossBackward0>)
tensor(2.1389, grad_fn=<NllLossBackward0>)
tensor(2.8345, grad_fn=<NllLossBackward0>)
tensor(3.4703, grad_fn=<NllLossBackward0>)
tensor(2.5395, grad_fn=<NllLossBackward0>)
tensor(2.6815, grad_fn=<NllLossBackward0>)
tensor(3.3175, grad_fn=<NllLossBackward0>)
tensor(3.5033, grad_fn=<NllLossBackward0>)
tensor(1.9618, grad_fn=<NllLossBackward0>)
tensor(1.8710, grad_fn=<NllLossBackward0>)
tensor(2.2074, grad_fn=<NllLossBackward0>)
tensor(2.9786, grad_fn=<NllLossBackward0>)
tensor(2.3842, grad_fn=<NllLossBackward0>)
tensor(2.8622, grad_fn=<NllLossBackward0>)
tensor(2.4669, grad_fn=<NllLossBackward0>)
tensor(2.3599, grad_fn=<NllLossBackward0>)
tensor(2.0232, grad_fn=<NllLossBackward0>)
tensor(1.9117, grad_fn=<NllLossBackward0>)
tensor(3.2019, grad_fn=<NllLossBackward0>)
tensor(2.5783, grad_fn=<NllLossBackward0>)
tensor(2.4642, grad_fn=<NllLossBackward0>)
tensor(2.30

tensor(2.3172, grad_fn=<NllLossBackward0>)
tensor(2.2930, grad_fn=<NllLossBackward0>)
tensor(3.4578, grad_fn=<NllLossBackward0>)
tensor(2.5238, grad_fn=<NllLossBackward0>)
tensor(2.0047, grad_fn=<NllLossBackward0>)
tensor(3.6586, grad_fn=<NllLossBackward0>)
tensor(1.8082, grad_fn=<NllLossBackward0>)
tensor(2.3429, grad_fn=<NllLossBackward0>)
tensor(4.4589, grad_fn=<NllLossBackward0>)
tensor(2.9105, grad_fn=<NllLossBackward0>)
tensor(2.5270, grad_fn=<NllLossBackward0>)
tensor(2.4793, grad_fn=<NllLossBackward0>)
tensor(1.7635, grad_fn=<NllLossBackward0>)
tensor(1.9847, grad_fn=<NllLossBackward0>)
tensor(2.8069, grad_fn=<NllLossBackward0>)
tensor(2.1806, grad_fn=<NllLossBackward0>)
tensor(3.1000, grad_fn=<NllLossBackward0>)
tensor(2.6593, grad_fn=<NllLossBackward0>)
tensor(2.2533, grad_fn=<NllLossBackward0>)
tensor(4.2978, grad_fn=<NllLossBackward0>)
tensor(2.7073, grad_fn=<NllLossBackward0>)
tensor(3.1048, grad_fn=<NllLossBackward0>)
tensor(3.7231, grad_fn=<NllLossBackward0>)
tensor(1.77

tensor(3.0792, grad_fn=<NllLossBackward0>)
tensor(2.1539, grad_fn=<NllLossBackward0>)
tensor(2.2330, grad_fn=<NllLossBackward0>)
tensor(3.2842, grad_fn=<NllLossBackward0>)
tensor(2.5064, grad_fn=<NllLossBackward0>)
tensor(3.4402, grad_fn=<NllLossBackward0>)
tensor(2.7982, grad_fn=<NllLossBackward0>)
tensor(3.6294, grad_fn=<NllLossBackward0>)
tensor(2.3997, grad_fn=<NllLossBackward0>)
tensor(3.0486, grad_fn=<NllLossBackward0>)
tensor(2.4727, grad_fn=<NllLossBackward0>)
tensor(2.2005, grad_fn=<NllLossBackward0>)
tensor(3.2961, grad_fn=<NllLossBackward0>)
tensor(3.2636, grad_fn=<NllLossBackward0>)
tensor(1.9819, grad_fn=<NllLossBackward0>)
tensor(4.0098, grad_fn=<NllLossBackward0>)
tensor(2.9042, grad_fn=<NllLossBackward0>)
tensor(3.2035, grad_fn=<NllLossBackward0>)
tensor(2.7128, grad_fn=<NllLossBackward0>)
tensor(3.2502, grad_fn=<NllLossBackward0>)
tensor(3.0261, grad_fn=<NllLossBackward0>)
tensor(2.4439, grad_fn=<NllLossBackward0>)
tensor(2.2997, grad_fn=<NllLossBackward0>)
tensor(2.94

tensor(3.2670, grad_fn=<NllLossBackward0>)
tensor(3.8663, grad_fn=<NllLossBackward0>)
tensor(2.2199, grad_fn=<NllLossBackward0>)
tensor(1.9882, grad_fn=<NllLossBackward0>)
tensor(1.8491, grad_fn=<NllLossBackward0>)
tensor(2.7998, grad_fn=<NllLossBackward0>)
tensor(3.9011, grad_fn=<NllLossBackward0>)
tensor(2.6302, grad_fn=<NllLossBackward0>)
tensor(2.3828, grad_fn=<NllLossBackward0>)
tensor(2.8978, grad_fn=<NllLossBackward0>)
tensor(1.7758, grad_fn=<NllLossBackward0>)
tensor(2.4121, grad_fn=<NllLossBackward0>)
tensor(2.8130, grad_fn=<NllLossBackward0>)
tensor(1.9571, grad_fn=<NllLossBackward0>)
tensor(3.8798, grad_fn=<NllLossBackward0>)
tensor(2.6813, grad_fn=<NllLossBackward0>)
tensor(1.9568, grad_fn=<NllLossBackward0>)
tensor(3.4886, grad_fn=<NllLossBackward0>)
tensor(2.4424, grad_fn=<NllLossBackward0>)
tensor(2.3576, grad_fn=<NllLossBackward0>)
tensor(2.1631, grad_fn=<NllLossBackward0>)
tensor(1.9528, grad_fn=<NllLossBackward0>)
tensor(2.5635, grad_fn=<NllLossBackward0>)
tensor(2.29

tensor(2.2324, grad_fn=<NllLossBackward0>)
tensor(4.2984, grad_fn=<NllLossBackward0>)
tensor(1.7651, grad_fn=<NllLossBackward0>)
tensor(2.2556, grad_fn=<NllLossBackward0>)
tensor(2.1454, grad_fn=<NllLossBackward0>)
tensor(2.3562, grad_fn=<NllLossBackward0>)
tensor(2.6031, grad_fn=<NllLossBackward0>)
tensor(2.7539, grad_fn=<NllLossBackward0>)
tensor(2.2558, grad_fn=<NllLossBackward0>)
tensor(2.7292, grad_fn=<NllLossBackward0>)
tensor(4.5036, grad_fn=<NllLossBackward0>)
tensor(2.8566, grad_fn=<NllLossBackward0>)
tensor(2.3337, grad_fn=<NllLossBackward0>)
tensor(2.1334, grad_fn=<NllLossBackward0>)
tensor(2.9614, grad_fn=<NllLossBackward0>)
tensor(2.4955, grad_fn=<NllLossBackward0>)
tensor(2.8791, grad_fn=<NllLossBackward0>)
tensor(2.5268, grad_fn=<NllLossBackward0>)
tensor(2.5805, grad_fn=<NllLossBackward0>)
tensor(3.9946, grad_fn=<NllLossBackward0>)
tensor(2.9194, grad_fn=<NllLossBackward0>)
tensor(2.1838, grad_fn=<NllLossBackward0>)
tensor(3.2396, grad_fn=<NllLossBackward0>)
tensor(2.86

tensor(2.0541, grad_fn=<NllLossBackward0>)
tensor(2.1854, grad_fn=<NllLossBackward0>)
tensor(2.2177, grad_fn=<NllLossBackward0>)
tensor(2.8623, grad_fn=<NllLossBackward0>)
tensor(2.8824, grad_fn=<NllLossBackward0>)
tensor(2.0233, grad_fn=<NllLossBackward0>)
tensor(2.9486, grad_fn=<NllLossBackward0>)
tensor(3.9193, grad_fn=<NllLossBackward0>)
tensor(2.4634, grad_fn=<NllLossBackward0>)
tensor(1.6686, grad_fn=<NllLossBackward0>)
tensor(2.8000, grad_fn=<NllLossBackward0>)
tensor(2.3009, grad_fn=<NllLossBackward0>)
tensor(1.8397, grad_fn=<NllLossBackward0>)
tensor(3.0495, grad_fn=<NllLossBackward0>)
tensor(3.0253, grad_fn=<NllLossBackward0>)
tensor(2.7100, grad_fn=<NllLossBackward0>)
tensor(2.1663, grad_fn=<NllLossBackward0>)
tensor(2.4657, grad_fn=<NllLossBackward0>)
tensor(2.2376, grad_fn=<NllLossBackward0>)
tensor(2.4851, grad_fn=<NllLossBackward0>)
tensor(3.0909, grad_fn=<NllLossBackward0>)
tensor(3.9314, grad_fn=<NllLossBackward0>)
tensor(3.4201, grad_fn=<NllLossBackward0>)
tensor(3.60

tensor(1.7166, grad_fn=<NllLossBackward0>)
tensor(2.5684, grad_fn=<NllLossBackward0>)
tensor(2.1765, grad_fn=<NllLossBackward0>)
tensor(2.0125, grad_fn=<NllLossBackward0>)
tensor(2.9638, grad_fn=<NllLossBackward0>)
tensor(2.8977, grad_fn=<NllLossBackward0>)
tensor(2.4726, grad_fn=<NllLossBackward0>)
tensor(2.4668, grad_fn=<NllLossBackward0>)
tensor(3.1522, grad_fn=<NllLossBackward0>)
tensor(2.3481, grad_fn=<NllLossBackward0>)
tensor(2.4571, grad_fn=<NllLossBackward0>)
tensor(2.3906, grad_fn=<NllLossBackward0>)
tensor(3.6135, grad_fn=<NllLossBackward0>)
tensor(1.9151, grad_fn=<NllLossBackward0>)
tensor(2.4348, grad_fn=<NllLossBackward0>)
tensor(2.9707, grad_fn=<NllLossBackward0>)
tensor(2.2628, grad_fn=<NllLossBackward0>)
tensor(2.8271, grad_fn=<NllLossBackward0>)
tensor(3.3222, grad_fn=<NllLossBackward0>)
tensor(3.0222, grad_fn=<NllLossBackward0>)
tensor(2.4582, grad_fn=<NllLossBackward0>)
tensor(3.2631, grad_fn=<NllLossBackward0>)
tensor(2.1195, grad_fn=<NllLossBackward0>)
tensor(3.54

tensor(2.4892, grad_fn=<NllLossBackward0>)
tensor(2.8172, grad_fn=<NllLossBackward0>)
tensor(2.6282, grad_fn=<NllLossBackward0>)
tensor(2.9397, grad_fn=<NllLossBackward0>)
tensor(3.6384, grad_fn=<NllLossBackward0>)
tensor(2.9894, grad_fn=<NllLossBackward0>)
tensor(4.5814, grad_fn=<NllLossBackward0>)
tensor(1.7517, grad_fn=<NllLossBackward0>)
tensor(2.4950, grad_fn=<NllLossBackward0>)
tensor(2.4059, grad_fn=<NllLossBackward0>)
tensor(3.1671, grad_fn=<NllLossBackward0>)
tensor(2.0416, grad_fn=<NllLossBackward0>)
tensor(2.4239, grad_fn=<NllLossBackward0>)
tensor(2.2743, grad_fn=<NllLossBackward0>)
tensor(2.8452, grad_fn=<NllLossBackward0>)
tensor(1.9919, grad_fn=<NllLossBackward0>)
tensor(2.7329, grad_fn=<NllLossBackward0>)
tensor(2.7705, grad_fn=<NllLossBackward0>)
tensor(2.1223, grad_fn=<NllLossBackward0>)
tensor(3.4819, grad_fn=<NllLossBackward0>)
tensor(2.0310, grad_fn=<NllLossBackward0>)
tensor(2.3154, grad_fn=<NllLossBackward0>)
tensor(4.9431, grad_fn=<NllLossBackward0>)
tensor(2.05

tensor(2.8150, grad_fn=<NllLossBackward0>)
tensor(2.2846, grad_fn=<NllLossBackward0>)
tensor(1.7391, grad_fn=<NllLossBackward0>)
tensor(2.0146, grad_fn=<NllLossBackward0>)
tensor(3.1339, grad_fn=<NllLossBackward0>)
tensor(2.4322, grad_fn=<NllLossBackward0>)
tensor(3.1167, grad_fn=<NllLossBackward0>)
tensor(2.5771, grad_fn=<NllLossBackward0>)
tensor(3.3225, grad_fn=<NllLossBackward0>)
tensor(3.3894, grad_fn=<NllLossBackward0>)
tensor(2.7871, grad_fn=<NllLossBackward0>)
tensor(3.6152, grad_fn=<NllLossBackward0>)
tensor(2.4086, grad_fn=<NllLossBackward0>)
tensor(2.2424, grad_fn=<NllLossBackward0>)
tensor(2.8316, grad_fn=<NllLossBackward0>)
tensor(2.1058, grad_fn=<NllLossBackward0>)
tensor(3.3192, grad_fn=<NllLossBackward0>)
tensor(2.5703, grad_fn=<NllLossBackward0>)
tensor(3.2244, grad_fn=<NllLossBackward0>)
tensor(2.2577, grad_fn=<NllLossBackward0>)
tensor(2.0609, grad_fn=<NllLossBackward0>)
tensor(2.6219, grad_fn=<NllLossBackward0>)
tensor(2.9417, grad_fn=<NllLossBackward0>)
tensor(2.77

tensor(1.8308, grad_fn=<NllLossBackward0>)
tensor(2.9194, grad_fn=<NllLossBackward0>)
tensor(4.0851, grad_fn=<NllLossBackward0>)
tensor(2.3793, grad_fn=<NllLossBackward0>)
tensor(2.4258, grad_fn=<NllLossBackward0>)
tensor(2.8600, grad_fn=<NllLossBackward0>)
tensor(1.8954, grad_fn=<NllLossBackward0>)
tensor(2.8226, grad_fn=<NllLossBackward0>)
tensor(2.2397, grad_fn=<NllLossBackward0>)
tensor(2.9228, grad_fn=<NllLossBackward0>)
tensor(2.5569, grad_fn=<NllLossBackward0>)
tensor(2.5835, grad_fn=<NllLossBackward0>)
tensor(2.9066, grad_fn=<NllLossBackward0>)
tensor(2.9673, grad_fn=<NllLossBackward0>)
tensor(2.6026, grad_fn=<NllLossBackward0>)
tensor(2.7769, grad_fn=<NllLossBackward0>)
tensor(2.2264, grad_fn=<NllLossBackward0>)
tensor(2.6400, grad_fn=<NllLossBackward0>)
tensor(3.5888, grad_fn=<NllLossBackward0>)
tensor(2.0458, grad_fn=<NllLossBackward0>)
tensor(2.3662, grad_fn=<NllLossBackward0>)
tensor(4.3923, grad_fn=<NllLossBackward0>)
tensor(2.2329, grad_fn=<NllLossBackward0>)
tensor(1.95

tensor(2.2996, grad_fn=<NllLossBackward0>)
tensor(2.8028, grad_fn=<NllLossBackward0>)
tensor(2.2346, grad_fn=<NllLossBackward0>)
tensor(3.7543, grad_fn=<NllLossBackward0>)
tensor(3.6459, grad_fn=<NllLossBackward0>)
tensor(2.2330, grad_fn=<NllLossBackward0>)
tensor(2.8411, grad_fn=<NllLossBackward0>)
tensor(2.1277, grad_fn=<NllLossBackward0>)
tensor(1.9115, grad_fn=<NllLossBackward0>)
tensor(2.2134, grad_fn=<NllLossBackward0>)
tensor(3.5134, grad_fn=<NllLossBackward0>)
tensor(2.9690, grad_fn=<NllLossBackward0>)
tensor(3.6328, grad_fn=<NllLossBackward0>)
tensor(1.7029, grad_fn=<NllLossBackward0>)
tensor(3.0492, grad_fn=<NllLossBackward0>)
tensor(2.1604, grad_fn=<NllLossBackward0>)
tensor(2.9750, grad_fn=<NllLossBackward0>)
tensor(2.5916, grad_fn=<NllLossBackward0>)
tensor(2.2475, grad_fn=<NllLossBackward0>)
tensor(3.2709, grad_fn=<NllLossBackward0>)
tensor(1.8493, grad_fn=<NllLossBackward0>)
tensor(1.9585, grad_fn=<NllLossBackward0>)
tensor(2.8638, grad_fn=<NllLossBackward0>)
tensor(2.22

tensor(2.5733, grad_fn=<NllLossBackward0>)
tensor(2.4516, grad_fn=<NllLossBackward0>)
tensor(2.2420, grad_fn=<NllLossBackward0>)
tensor(1.8682, grad_fn=<NllLossBackward0>)
tensor(3.1515, grad_fn=<NllLossBackward0>)
tensor(2.9241, grad_fn=<NllLossBackward0>)
tensor(2.5163, grad_fn=<NllLossBackward0>)
tensor(2.7615, grad_fn=<NllLossBackward0>)
tensor(3.3725, grad_fn=<NllLossBackward0>)
tensor(2.5923, grad_fn=<NllLossBackward0>)
tensor(4.8221, grad_fn=<NllLossBackward0>)
tensor(1.9567, grad_fn=<NllLossBackward0>)
tensor(2.8830, grad_fn=<NllLossBackward0>)
tensor(2.1884, grad_fn=<NllLossBackward0>)
tensor(3.4555, grad_fn=<NllLossBackward0>)
tensor(3.3412, grad_fn=<NllLossBackward0>)
tensor(2.3621, grad_fn=<NllLossBackward0>)
tensor(2.0039, grad_fn=<NllLossBackward0>)
tensor(2.5097, grad_fn=<NllLossBackward0>)
tensor(2.9043, grad_fn=<NllLossBackward0>)
tensor(2.7242, grad_fn=<NllLossBackward0>)
tensor(2.4477, grad_fn=<NllLossBackward0>)
tensor(3.5443, grad_fn=<NllLossBackward0>)
tensor(2.61

tensor(2.3845, grad_fn=<NllLossBackward0>)
tensor(1.9293, grad_fn=<NllLossBackward0>)
tensor(2.7181, grad_fn=<NllLossBackward0>)
tensor(2.3361, grad_fn=<NllLossBackward0>)
tensor(1.9014, grad_fn=<NllLossBackward0>)
tensor(4.9225, grad_fn=<NllLossBackward0>)
tensor(2.9992, grad_fn=<NllLossBackward0>)
tensor(3.3084, grad_fn=<NllLossBackward0>)
tensor(2.5041, grad_fn=<NllLossBackward0>)
tensor(3.4549, grad_fn=<NllLossBackward0>)
tensor(2.3639, grad_fn=<NllLossBackward0>)
tensor(2.1599, grad_fn=<NllLossBackward0>)
tensor(2.2074, grad_fn=<NllLossBackward0>)
tensor(1.7994, grad_fn=<NllLossBackward0>)
tensor(1.9769, grad_fn=<NllLossBackward0>)
tensor(3.2021, grad_fn=<NllLossBackward0>)
tensor(2.8907, grad_fn=<NllLossBackward0>)
tensor(2.5074, grad_fn=<NllLossBackward0>)
tensor(2.7898, grad_fn=<NllLossBackward0>)
tensor(4.0061, grad_fn=<NllLossBackward0>)
tensor(1.8251, grad_fn=<NllLossBackward0>)
tensor(2.4145, grad_fn=<NllLossBackward0>)
tensor(3.1549, grad_fn=<NllLossBackward0>)
tensor(3.60

tensor(2.1537, grad_fn=<NllLossBackward0>)
tensor(2.9116, grad_fn=<NllLossBackward0>)
tensor(2.0793, grad_fn=<NllLossBackward0>)
tensor(2.9081, grad_fn=<NllLossBackward0>)
tensor(2.5411, grad_fn=<NllLossBackward0>)
tensor(2.9451, grad_fn=<NllLossBackward0>)
tensor(2.4241, grad_fn=<NllLossBackward0>)
tensor(2.4949, grad_fn=<NllLossBackward0>)
tensor(2.0905, grad_fn=<NllLossBackward0>)
tensor(2.0929, grad_fn=<NllLossBackward0>)
tensor(2.6848, grad_fn=<NllLossBackward0>)
tensor(1.9434, grad_fn=<NllLossBackward0>)
tensor(2.4586, grad_fn=<NllLossBackward0>)
tensor(3.1167, grad_fn=<NllLossBackward0>)
tensor(2.5944, grad_fn=<NllLossBackward0>)
tensor(2.8000, grad_fn=<NllLossBackward0>)
tensor(2.9277, grad_fn=<NllLossBackward0>)
tensor(3.3255, grad_fn=<NllLossBackward0>)
tensor(2.3837, grad_fn=<NllLossBackward0>)
tensor(3.4544, grad_fn=<NllLossBackward0>)
tensor(3.4731, grad_fn=<NllLossBackward0>)
tensor(3.2926, grad_fn=<NllLossBackward0>)
tensor(1.9469, grad_fn=<NllLossBackward0>)
tensor(3.79

tensor(2.2174, grad_fn=<NllLossBackward0>)
tensor(2.1181, grad_fn=<NllLossBackward0>)
tensor(2.9331, grad_fn=<NllLossBackward0>)
tensor(2.5859, grad_fn=<NllLossBackward0>)
tensor(2.0192, grad_fn=<NllLossBackward0>)
tensor(2.5541, grad_fn=<NllLossBackward0>)
tensor(3.3129, grad_fn=<NllLossBackward0>)
tensor(2.1924, grad_fn=<NllLossBackward0>)
tensor(2.1877, grad_fn=<NllLossBackward0>)
tensor(2.3553, grad_fn=<NllLossBackward0>)
tensor(2.7943, grad_fn=<NllLossBackward0>)
tensor(2.8106, grad_fn=<NllLossBackward0>)
tensor(1.8453, grad_fn=<NllLossBackward0>)
tensor(2.1204, grad_fn=<NllLossBackward0>)
tensor(2.5117, grad_fn=<NllLossBackward0>)
tensor(3.6350, grad_fn=<NllLossBackward0>)
tensor(2.9297, grad_fn=<NllLossBackward0>)
tensor(5.5473, grad_fn=<NllLossBackward0>)
tensor(2.7306, grad_fn=<NllLossBackward0>)
tensor(2.6758, grad_fn=<NllLossBackward0>)
tensor(2.8162, grad_fn=<NllLossBackward0>)
tensor(2.5543, grad_fn=<NllLossBackward0>)
tensor(2.1967, grad_fn=<NllLossBackward0>)
tensor(2.86

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 1024])