In [1]:
import cv2 as cv
import numpy as np
import random
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets,transforms
import torch.nn.functional as F
from torchvision import models
import time
import os
%matplotlib inline
from torch.utils.checkpoint import checkpoint_sequential
import matplotlib.pyplot as plt
from PIL import Image, ImageFilter, ImageDraw, ImageStat

In [2]:
dataset_dir = './dataset/'
char_lst = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
province_lst = ['赣', '辽', '吉', '黑', '蒙', '冀', '晋', '陕', '宁', '鲁', '皖', '苏', '浙', '渝', '沪', '津', '京', '川', '贵', '鄂', '桂', '闽', '青', '琼', '湘', '新', '豫', '粤', '云', '藏']
file_names = os.listdir(dataset_dir)
print(len(file_names))

img_set = []
protag_set = []
tag_set = []

for file_name in file_names:
    with Image.open(dataset_dir + file_name).convert('L').resize((440, 140),Image.BICUBIC) as im:
        img_set.append(im)
#         print(im.size)
        raw_tag = file_name[:7]
        protag = [province_lst.index(raw_tag[0])]
        tag = [char_lst.index(raw_tag[1])]
        
        for ch in raw_tag[2:]:
            tag.append(char_lst.index(ch))
        
        protag_set.append(protag)
        tag_set.append(tag)

protag_set = np.array(protag_set)
tag_set = np.array(tag_set)
print(protag_set.shape)
print(tag_set.shape)

388
(388, 1)
(388, 6)


In [3]:
args  = {
    'embedding_feature': 50,
    'vocab_size': 66,
    'num_gru_layer': 2,
    'gru_hidden_feature': 7,
    'gpu': None,
    'epochs': 250,
    'lr': 0.002,
    'batch_size': 32,
    'workers': 4,
    'seed': time.time(),
    'print_freq': 2,
    'pre':None,
#     'pre': './best_model/CPRNet_model_best.pth.tar',
    'test_freq': 5
}

In [4]:
#utils.py
def save_net(fname, net):
    with h5py.File(fname, 'w') as h5f:
        for k, v in net.state_dict().items():
            h5f.create_dataset(k, data=v.cpu().numpy())
def load_net(fname, net):
    with h5py.File(fname, 'r') as h5f:
        for k, v in net.state_dict().items():        
            param = torch.from_numpy(np.asarray(h5f[k]))         
            v.copy_(param)
            
def save_checkpoint(state, is_best, task_id, filename='checkpoint.pth.tar'):
    if is_best:
        torch.save(state, './best_model/' + task_id + '_model_best.pth.tar')
        
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 100 epochs"""
    lr = args['lr'] * (0.1 ** (epoch // 100))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count  

In [5]:
class list_dataset():
    def __init__(self, root, shape=None, shuffle=True, transform=None, train=False, batch_size=1, num_workers=4):
        
        random.shuffle(root)
        
        self.nSamples = len(root)
        self.transform = transform
        self.line = root
        self.train = train
        self.shape = shape
        self.batch_size = batch_size
        self.num_workers = num_workers
        
    def __len__(self):
        return self.nSamples
    def __getitem__(self, index):
        assert index <= len(self), 'index range error'
        
        img = img_set[index]
#         print(img.size)
        img = self.transform(img)
        protag = protag_set[index]
        tag = tag_set[index]
#         print(img.size())
        
        return img, protag, tag

In [6]:
# model.py
class CPR_Net(nn.Module):
    def __init__(self):
        super(CPR_Net, self).__init__()
        self.gru = nn.GRU(440, args['gru_hidden_feature'], args['num_gru_layer'], batch_first=True, bidirectional=False)
        self.pro_fc = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(province_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(province_lst), len(province_lst)),
        )
        self.lic_fc1 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        self.lic_fc2 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        self.lic_fc3 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        self.lic_fc4 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        self.lic_fc5 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        self.lic_fc6 = nn.Sequential(
            nn.BatchNorm1d(140),
            nn.Linear(140, len(char_lst)),
            nn.ReLU(inplace=True),
            nn.Linear(len(char_lst), len(char_lst)),
        )
        
    def forward(self, img):
        img = img.squeeze(1)
        gru_feature, _ = self.gru(img)
        raw_province_feature = gru_feature[:,:,0]
        raw_license_feature = gru_feature[:,:,1:]
        province_feature = F.softmax(self.pro_fc(raw_province_feature), dim=1)
        
        license_feature = F.softmax(self.lic_fc1(raw_license_feature[:,:,0]), dim=1).unsqueeze(1)
        license_feature = torch.cat((license_feature, F.softmax(self.lic_fc2(raw_license_feature[:,:,1]), dim=1).unsqueeze(1)), 1)
        license_feature = torch.cat((license_feature, F.softmax(self.lic_fc3(raw_license_feature[:,:,2]), dim=1).unsqueeze(1)), 1)
        license_feature = torch.cat((license_feature, F.softmax(self.lic_fc4(raw_license_feature[:,:,3]), dim=1).unsqueeze(1)), 1)
        license_feature = torch.cat((license_feature, F.softmax(self.lic_fc5(raw_license_feature[:,:,4]), dim=1).unsqueeze(1)), 1)
        license_feature = torch.cat((license_feature, F.softmax(self.lic_fc6(raw_license_feature[:,:,5]), dim=1).unsqueeze(1)), 1)
        
        return province_feature, license_feature

In [7]:
model = CPR_Net()
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_func_protag = torch.nn.CrossEntropyLoss()
loss_func_tags = [torch.nn.CrossEntropyLoss(), torch.nn.CrossEntropyLoss(),torch.nn.CrossEntropyLoss(),torch.nn.CrossEntropyLoss(),torch.nn.CrossEntropyLoss(),torch.nn.CrossEntropyLoss()]

In [8]:
len(list(model.parameters()))

50

In [9]:
def train(train_list, model, optimizer, epoch):
    losses = AverageMeter()
    batch_time = AverageMeter()
    
    if epoch != 0:
        adjust_learning_rate(optimizer, epoch)
    
    train_loader = torch.utils.data.DataLoader(
        list_dataset(train_list,
                        shuffle=True,
                        transform=transforms.Compose([
                        transforms.ToTensor(),
                        ]), 
                        train=True, 
                        batch_size=1,
                        num_workers=args['workers']),
        batch_size=args['batch_size'],
        drop_last=True)
    
    model.train()
    end = time.time()
    

    for i, (img, protag, tag) in enumerate(train_loader):
        img = img.type(torch.FloatTensor)
        protag = protag.type(torch.LongTensor).squeeze()
        tag = tag.type(torch.LongTensor)
        
        province_feature, license_feature = model(img)
        loss_protag = loss_func_protag(province_feature, protag)
        for kk in range(6):
            if kk == 0:
                loss_tag = loss_func_tags[kk](license_feature[:, kk], tag[:, kk])
            else:
                loss_tag += loss_func_tags[kk](license_feature[:, kk], tag[:, kk])
        
        loss = loss_protag + loss_tag
        
        losses.update(loss.item(), img.size(0))
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()   
        
        batch_time.update(time.time() - end)
        end = time.time()
            
        if i % args['print_freq'] == 0:
            print(loss_protag)
            print(loss_tag)
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  .format(epoch, i, len(train_loader), batch_time=batch_time, loss=losses))

In [10]:
if args['pre']:
    if os.path.isfile(args['pre']):
        print("=> loading checkpoint '{}'".format(args['pre']))
        checkpoint = torch.load(args['pre'])
        args['start_epoch'] = checkpoint['epoch']
        best_acc = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
                .format(args['pre'], checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args['pre']))

In [11]:
train_lst = [i for i in range(len(file_names))]

for epoch in range(args["epochs"]):
    train(train_lst, model, optimizer, epoch)
    if (epoch+1) % 10 == 0:
        save_checkpoint({
        'epoch': epoch + 1,
        'arch': args['pre'],
        'state_dict': model.state_dict(),
        'best_prec1': 1,
        'optimizer' : optimizer.state_dict(),
    }, 1,str('CPRNet'))
    print('Model saved!')

tensor(3.4010, grad_fn=<NllLossBackward>)
tensor(21.5000, grad_fn=<AddBackward0>)
Epoch: [0][0/12]	Time 0.157 (0.157)	Loss 24.9009 (24.9009)	
tensor(3.4001, grad_fn=<NllLossBackward>)
tensor(21.4865, grad_fn=<AddBackward0>)
Epoch: [0][2/12]	Time 0.088 (0.116)	Loss 24.8865 (24.8937)	
tensor(3.4002, grad_fn=<NllLossBackward>)
tensor(21.4792, grad_fn=<AddBackward0>)
Epoch: [0][4/12]	Time 0.100 (0.111)	Loss 24.8794 (24.8891)	
tensor(3.3992, grad_fn=<NllLossBackward>)
tensor(21.4560, grad_fn=<AddBackward0>)
Epoch: [0][6/12]	Time 0.099 (0.108)	Loss 24.8553 (24.8823)	
tensor(3.3990, grad_fn=<NllLossBackward>)
tensor(21.4383, grad_fn=<AddBackward0>)
Epoch: [0][8/12]	Time 0.188 (0.120)	Loss 24.8373 (24.8763)	
tensor(3.4015, grad_fn=<NllLossBackward>)
tensor(21.4100, grad_fn=<AddBackward0>)
Epoch: [0][10/12]	Time 0.138 (0.125)	Loss 24.8115 (24.8642)	
Model saved!
tensor(3.3937, grad_fn=<NllLossBackward>)
tensor(21.3634, grad_fn=<AddBackward0>)
Epoch: [1][0/12]	Time 0.151 (0.151)	Loss 24.7571 (24

tensor(3.2613, grad_fn=<NllLossBackward>)
tensor(19.8705, grad_fn=<AddBackward0>)
Epoch: [9][6/12]	Time 0.080 (0.092)	Loss 23.1318 (23.3539)	
tensor(3.3222, grad_fn=<NllLossBackward>)
tensor(19.8821, grad_fn=<AddBackward0>)
Epoch: [9][8/12]	Time 0.086 (0.089)	Loss 23.2043 (23.3600)	
tensor(3.3421, grad_fn=<NllLossBackward>)
tensor(19.9368, grad_fn=<AddBackward0>)
Epoch: [9][10/12]	Time 0.145 (0.095)	Loss 23.2788 (23.3074)	
Model saved!
tensor(3.2400, grad_fn=<NllLossBackward>)
tensor(19.7351, grad_fn=<AddBackward0>)
Epoch: [10][0/12]	Time 0.113 (0.113)	Loss 22.9751 (22.9751)	
tensor(3.2599, grad_fn=<NllLossBackward>)
tensor(20.0268, grad_fn=<AddBackward0>)
Epoch: [10][2/12]	Time 0.104 (0.113)	Loss 23.2868 (23.2537)	
tensor(3.2728, grad_fn=<NllLossBackward>)
tensor(20.2315, grad_fn=<AddBackward0>)
Epoch: [10][4/12]	Time 0.073 (0.100)	Loss 23.5044 (23.2956)	
tensor(3.2568, grad_fn=<NllLossBackward>)
tensor(19.7789, grad_fn=<AddBackward0>)
Epoch: [10][6/12]	Time 0.073 (0.093)	Loss 23.0357

tensor(3.1314, grad_fn=<NllLossBackward>)
tensor(19.0783, grad_fn=<AddBackward0>)
Epoch: [19][0/12]	Time 0.082 (0.082)	Loss 22.2097 (22.2097)	
tensor(3.2050, grad_fn=<NllLossBackward>)
tensor(19.5104, grad_fn=<AddBackward0>)
Epoch: [19][2/12]	Time 0.101 (0.096)	Loss 22.7154 (22.6166)	
tensor(3.1468, grad_fn=<NllLossBackward>)
tensor(19.5476, grad_fn=<AddBackward0>)
Epoch: [19][4/12]	Time 0.106 (0.102)	Loss 22.6944 (22.6676)	
tensor(3.1695, grad_fn=<NllLossBackward>)
tensor(19.2943, grad_fn=<AddBackward0>)
Epoch: [19][6/12]	Time 0.112 (0.104)	Loss 22.4638 (22.6156)	
tensor(3.2258, grad_fn=<NllLossBackward>)
tensor(19.4245, grad_fn=<AddBackward0>)
Epoch: [19][8/12]	Time 0.088 (0.101)	Loss 22.6503 (22.6727)	
tensor(3.2802, grad_fn=<NllLossBackward>)
tensor(19.4846, grad_fn=<AddBackward0>)
Epoch: [19][10/12]	Time 0.100 (0.102)	Loss 22.7648 (22.6671)	
Model saved!
tensor(3.1179, grad_fn=<NllLossBackward>)
tensor(18.9700, grad_fn=<AddBackward0>)
Epoch: [20][0/12]	Time 0.073 (0.073)	Loss 22.0

tensor(3.1000, grad_fn=<NllLossBackward>)
tensor(18.8945, grad_fn=<AddBackward0>)
Epoch: [28][6/12]	Time 0.086 (0.088)	Loss 21.9945 (22.2441)	
tensor(3.1408, grad_fn=<NllLossBackward>)
tensor(19.2128, grad_fn=<AddBackward0>)
Epoch: [28][8/12]	Time 0.078 (0.088)	Loss 22.3536 (22.3039)	
tensor(3.1758, grad_fn=<NllLossBackward>)
tensor(19.2210, grad_fn=<AddBackward0>)
Epoch: [28][10/12]	Time 0.088 (0.089)	Loss 22.3968 (22.2872)	
Model saved!
tensor(3.0882, grad_fn=<NllLossBackward>)
tensor(18.7114, grad_fn=<AddBackward0>)
Epoch: [29][0/12]	Time 0.078 (0.078)	Loss 21.7997 (21.7997)	
tensor(3.0682, grad_fn=<NllLossBackward>)
tensor(19.0751, grad_fn=<AddBackward0>)
Epoch: [29][2/12]	Time 0.102 (0.087)	Loss 22.1433 (22.1418)	
tensor(3.0981, grad_fn=<NllLossBackward>)
tensor(19.3085, grad_fn=<AddBackward0>)
Epoch: [29][4/12]	Time 0.083 (0.086)	Loss 22.4065 (22.2353)	
tensor(3.1142, grad_fn=<NllLossBackward>)
tensor(19.0401, grad_fn=<AddBackward0>)
Epoch: [29][6/12]	Time 0.127 (0.097)	Loss 22.1

tensor(3.0209, grad_fn=<NllLossBackward>)
tensor(18.8587, grad_fn=<AddBackward0>)
Epoch: [38][2/12]	Time 0.078 (0.080)	Loss 21.8796 (21.8630)	
tensor(3.0098, grad_fn=<NllLossBackward>)
tensor(18.9212, grad_fn=<AddBackward0>)
Epoch: [38][4/12]	Time 0.073 (0.079)	Loss 21.9310 (21.9275)	
tensor(3.0725, grad_fn=<NllLossBackward>)
tensor(18.6132, grad_fn=<AddBackward0>)
Epoch: [38][6/12]	Time 0.081 (0.080)	Loss 21.6857 (21.8931)	
tensor(3.0653, grad_fn=<NllLossBackward>)
tensor(18.9576, grad_fn=<AddBackward0>)
Epoch: [38][8/12]	Time 0.083 (0.080)	Loss 22.0229 (21.9555)	
tensor(3.0657, grad_fn=<NllLossBackward>)
tensor(18.8445, grad_fn=<AddBackward0>)
Epoch: [38][10/12]	Time 0.077 (0.080)	Loss 21.9102 (21.9185)	
Model saved!
tensor(3.0523, grad_fn=<NllLossBackward>)
tensor(18.4016, grad_fn=<AddBackward0>)
Epoch: [39][0/12]	Time 0.080 (0.080)	Loss 21.4539 (21.4539)	
tensor(3.0197, grad_fn=<NllLossBackward>)
tensor(18.8590, grad_fn=<AddBackward0>)
Epoch: [39][2/12]	Time 0.085 (0.086)	Loss 21.8

tensor(3.0531, grad_fn=<NllLossBackward>)
tensor(18.7756, grad_fn=<AddBackward0>)
Epoch: [47][10/12]	Time 0.128 (0.091)	Loss 21.8286 (21.8158)	
Model saved!
tensor(3.0706, grad_fn=<NllLossBackward>)
tensor(18.3335, grad_fn=<AddBackward0>)
Epoch: [48][0/12]	Time 0.137 (0.137)	Loss 21.4042 (21.4042)	
tensor(2.9715, grad_fn=<NllLossBackward>)
tensor(18.7641, grad_fn=<AddBackward0>)
Epoch: [48][2/12]	Time 0.123 (0.131)	Loss 21.7356 (21.7457)	
tensor(3.0011, grad_fn=<NllLossBackward>)
tensor(18.8441, grad_fn=<AddBackward0>)
Epoch: [48][4/12]	Time 0.106 (0.123)	Loss 21.8452 (21.8208)	
tensor(3.0603, grad_fn=<NllLossBackward>)
tensor(18.5301, grad_fn=<AddBackward0>)
Epoch: [48][6/12]	Time 0.092 (0.116)	Loss 21.5904 (21.7788)	
tensor(3.0383, grad_fn=<NllLossBackward>)
tensor(18.8623, grad_fn=<AddBackward0>)
Epoch: [48][8/12]	Time 0.092 (0.111)	Loss 21.9005 (21.8448)	
tensor(3.0244, grad_fn=<NllLossBackward>)
tensor(18.7515, grad_fn=<AddBackward0>)
Epoch: [48][10/12]	Time 0.084 (0.107)	Loss 21.

tensor(3.0471, grad_fn=<NllLossBackward>)
tensor(18.4151, grad_fn=<AddBackward0>)
Epoch: [57][6/12]	Time 0.086 (0.089)	Loss 21.4622 (21.6339)	
tensor(3.0243, grad_fn=<NllLossBackward>)
tensor(18.7342, grad_fn=<AddBackward0>)
Epoch: [57][8/12]	Time 0.091 (0.088)	Loss 21.7586 (21.6921)	
tensor(3.0192, grad_fn=<NllLossBackward>)
tensor(18.5697, grad_fn=<AddBackward0>)
Epoch: [57][10/12]	Time 0.088 (0.089)	Loss 21.5889 (21.6474)	
Model saved!
tensor(3.0475, grad_fn=<NllLossBackward>)
tensor(18.1976, grad_fn=<AddBackward0>)
Epoch: [58][0/12]	Time 0.082 (0.082)	Loss 21.2451 (21.2451)	
tensor(2.9572, grad_fn=<NllLossBackward>)
tensor(18.7171, grad_fn=<AddBackward0>)
Epoch: [58][2/12]	Time 0.088 (0.086)	Loss 21.6744 (21.6286)	
tensor(2.9894, grad_fn=<NllLossBackward>)
tensor(18.6091, grad_fn=<AddBackward0>)
Epoch: [58][4/12]	Time 0.099 (0.089)	Loss 21.5986 (21.6638)	
tensor(3.0469, grad_fn=<NllLossBackward>)
tensor(18.4109, grad_fn=<AddBackward0>)
Epoch: [58][6/12]	Time 0.091 (0.094)	Loss 21.4

tensor(2.9565, grad_fn=<NllLossBackward>)
tensor(18.6261, grad_fn=<AddBackward0>)
Epoch: [67][2/12]	Time 0.086 (0.084)	Loss 21.5826 (21.5365)	
tensor(2.9888, grad_fn=<NllLossBackward>)
tensor(18.5635, grad_fn=<AddBackward0>)
Epoch: [67][4/12]	Time 0.109 (0.088)	Loss 21.5523 (21.5910)	
tensor(3.0460, grad_fn=<NllLossBackward>)
tensor(18.3759, grad_fn=<AddBackward0>)
Epoch: [67][6/12]	Time 0.088 (0.089)	Loss 21.4219 (21.5637)	
tensor(3.0217, grad_fn=<NllLossBackward>)
tensor(18.6587, grad_fn=<AddBackward0>)
Epoch: [67][8/12]	Time 0.079 (0.087)	Loss 21.6804 (21.6269)	
tensor(3.0180, grad_fn=<NllLossBackward>)
tensor(18.5389, grad_fn=<AddBackward0>)
Epoch: [67][10/12]	Time 0.096 (0.088)	Loss 21.5569 (21.5806)	
Model saved!
tensor(3.0467, grad_fn=<NllLossBackward>)
tensor(18.1297, grad_fn=<AddBackward0>)
Epoch: [68][0/12]	Time 0.080 (0.080)	Loss 21.1764 (21.1764)	
tensor(2.9579, grad_fn=<NllLossBackward>)
tensor(18.6250, grad_fn=<AddBackward0>)
Epoch: [68][2/12]	Time 0.074 (0.081)	Loss 21.5

tensor(3.0172, grad_fn=<NllLossBackward>)
tensor(18.5188, grad_fn=<AddBackward0>)
Epoch: [76][10/12]	Time 0.083 (0.087)	Loss 21.5360 (21.5367)	
Model saved!
tensor(3.0469, grad_fn=<NllLossBackward>)
tensor(18.0860, grad_fn=<AddBackward0>)
Epoch: [77][0/12]	Time 0.072 (0.072)	Loss 21.1329 (21.1329)	
tensor(2.9472, grad_fn=<NllLossBackward>)
tensor(18.6178, grad_fn=<AddBackward0>)
Epoch: [77][2/12]	Time 0.091 (0.087)	Loss 21.5649 (21.4980)	
tensor(2.9870, grad_fn=<NllLossBackward>)
tensor(18.5264, grad_fn=<AddBackward0>)
Epoch: [77][4/12]	Time 0.078 (0.084)	Loss 21.5134 (21.5456)	
tensor(3.0458, grad_fn=<NllLossBackward>)
tensor(18.3397, grad_fn=<AddBackward0>)
Epoch: [77][6/12]	Time 0.087 (0.086)	Loss 21.3855 (21.5226)	
tensor(3.0190, grad_fn=<NllLossBackward>)
tensor(18.5928, grad_fn=<AddBackward0>)
Epoch: [77][8/12]	Time 0.092 (0.088)	Loss 21.6118 (21.5774)	
tensor(3.0173, grad_fn=<NllLossBackward>)
tensor(18.4993, grad_fn=<AddBackward0>)
Epoch: [77][10/12]	Time 0.089 (0.089)	Loss 21.

tensor(3.0455, grad_fn=<NllLossBackward>)
tensor(18.3396, grad_fn=<AddBackward0>)
Epoch: [86][6/12]	Time 0.130 (0.105)	Loss 21.3851 (21.4699)	
tensor(3.0172, grad_fn=<NllLossBackward>)
tensor(18.6154, grad_fn=<AddBackward0>)
Epoch: [86][8/12]	Time 0.106 (0.105)	Loss 21.6326 (21.5274)	
tensor(3.0164, grad_fn=<NllLossBackward>)
tensor(18.4668, grad_fn=<AddBackward0>)
Epoch: [86][10/12]	Time 0.164 (0.112)	Loss 21.4832 (21.4855)	
Model saved!
tensor(3.0458, grad_fn=<NllLossBackward>)
tensor(18.0638, grad_fn=<AddBackward0>)
Epoch: [87][0/12]	Time 0.089 (0.089)	Loss 21.1096 (21.1096)	
tensor(2.9238, grad_fn=<NllLossBackward>)
tensor(18.5565, grad_fn=<AddBackward0>)
Epoch: [87][2/12]	Time 0.088 (0.088)	Loss 21.4802 (21.4414)	
tensor(2.9870, grad_fn=<NllLossBackward>)
tensor(18.4856, grad_fn=<AddBackward0>)
Epoch: [87][4/12]	Time 0.117 (0.095)	Loss 21.4726 (21.4827)	
tensor(3.0453, grad_fn=<NllLossBackward>)
tensor(18.3373, grad_fn=<AddBackward0>)
Epoch: [87][6/12]	Time 0.130 (0.110)	Loss 21.3

tensor(3.0441, grad_fn=<NllLossBackward>)
tensor(18.0597, grad_fn=<AddBackward0>)
Epoch: [96][0/12]	Time 0.104 (0.104)	Loss 21.1038 (21.1038)	
tensor(2.9252, grad_fn=<NllLossBackward>)
tensor(18.5230, grad_fn=<AddBackward0>)
Epoch: [96][2/12]	Time 0.114 (0.103)	Loss 21.4482 (21.4272)	
tensor(2.9874, grad_fn=<NllLossBackward>)
tensor(18.4627, grad_fn=<AddBackward0>)
Epoch: [96][4/12]	Time 0.110 (0.108)	Loss 21.4501 (21.4607)	
tensor(3.0454, grad_fn=<NllLossBackward>)
tensor(18.3077, grad_fn=<AddBackward0>)
Epoch: [96][6/12]	Time 0.088 (0.103)	Loss 21.3532 (21.4395)	
tensor(3.0168, grad_fn=<NllLossBackward>)
tensor(18.5873, grad_fn=<AddBackward0>)
Epoch: [96][8/12]	Time 0.084 (0.101)	Loss 21.6041 (21.4918)	
tensor(3.0166, grad_fn=<NllLossBackward>)
tensor(18.3709, grad_fn=<AddBackward0>)
Epoch: [96][10/12]	Time 0.083 (0.098)	Loss 21.3875 (21.4471)	
Model saved!
tensor(3.0427, grad_fn=<NllLossBackward>)
tensor(18.0594, grad_fn=<AddBackward0>)
Epoch: [97][0/12]	Time 0.076 (0.076)	Loss 21.1

tensor(3.0424, grad_fn=<NllLossBackward>)
tensor(18.3032, grad_fn=<AddBackward0>)
Epoch: [105][6/12]	Time 0.080 (0.084)	Loss 21.3456 (21.4150)	
tensor(3.0158, grad_fn=<NllLossBackward>)
tensor(18.5527, grad_fn=<AddBackward0>)
Epoch: [105][8/12]	Time 0.082 (0.084)	Loss 21.5685 (21.4668)	
tensor(3.0162, grad_fn=<NllLossBackward>)
tensor(18.3683, grad_fn=<AddBackward0>)
Epoch: [105][10/12]	Time 0.079 (0.083)	Loss 21.3845 (21.4236)	
Model saved!
tensor(3.0221, grad_fn=<NllLossBackward>)
tensor(18.0572, grad_fn=<AddBackward0>)
Epoch: [106][0/12]	Time 0.079 (0.079)	Loss 21.0793 (21.0793)	
tensor(2.9238, grad_fn=<NllLossBackward>)
tensor(18.5194, grad_fn=<AddBackward0>)
Epoch: [106][2/12]	Time 0.089 (0.086)	Loss 21.4432 (21.4162)	
tensor(2.9867, grad_fn=<NllLossBackward>)
tensor(18.4308, grad_fn=<AddBackward0>)
Epoch: [106][4/12]	Time 0.074 (0.084)	Loss 21.4175 (21.4348)	
tensor(3.0416, grad_fn=<NllLossBackward>)
tensor(18.3031, grad_fn=<AddBackward0>)
Epoch: [106][6/12]	Time 0.082 (0.084)	Lo

tensor(2.9242, grad_fn=<NllLossBackward>)
tensor(18.5003, grad_fn=<AddBackward0>)
Epoch: [115][2/12]	Time 0.077 (0.075)	Loss 21.4244 (21.4083)	
tensor(2.9863, grad_fn=<NllLossBackward>)
tensor(18.4296, grad_fn=<AddBackward0>)
Epoch: [115][4/12]	Time 0.082 (0.078)	Loss 21.4159 (21.4297)	
tensor(3.0175, grad_fn=<NllLossBackward>)
tensor(18.3027, grad_fn=<AddBackward0>)
Epoch: [115][6/12]	Time 0.083 (0.080)	Loss 21.3202 (21.4074)	
tensor(3.0157, grad_fn=<NllLossBackward>)
tensor(18.5521, grad_fn=<AddBackward0>)
Epoch: [115][8/12]	Time 0.073 (0.080)	Loss 21.5678 (21.4606)	
tensor(3.0163, grad_fn=<NllLossBackward>)
tensor(18.3694, grad_fn=<AddBackward0>)
Epoch: [115][10/12]	Time 0.084 (0.080)	Loss 21.3857 (21.4187)	
Model saved!
tensor(3.0188, grad_fn=<NllLossBackward>)
tensor(18.0560, grad_fn=<AddBackward0>)
Epoch: [116][0/12]	Time 0.084 (0.084)	Loss 21.0749 (21.0749)	
tensor(2.9241, grad_fn=<NllLossBackward>)
tensor(18.4971, grad_fn=<AddBackward0>)
Epoch: [116][2/12]	Time 0.079 (0.084)	Lo

tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.5511, grad_fn=<AddBackward0>)
Epoch: [124][8/12]	Time 0.085 (0.086)	Loss 21.5667 (21.4586)	
tensor(3.0161, grad_fn=<NllLossBackward>)
tensor(18.3680, grad_fn=<AddBackward0>)
Epoch: [124][10/12]	Time 0.072 (0.084)	Loss 21.3841 (21.4168)	
Model saved!
tensor(3.0179, grad_fn=<NllLossBackward>)
tensor(18.0555, grad_fn=<AddBackward0>)
Epoch: [125][0/12]	Time 0.070 (0.070)	Loss 21.0733 (21.0733)	
tensor(2.9239, grad_fn=<NllLossBackward>)
tensor(18.4915, grad_fn=<AddBackward0>)
Epoch: [125][2/12]	Time 0.081 (0.075)	Loss 21.4154 (21.4045)	
tensor(2.9861, grad_fn=<NllLossBackward>)
tensor(18.4289, grad_fn=<AddBackward0>)
Epoch: [125][4/12]	Time 0.074 (0.076)	Loss 21.4151 (21.4270)	
tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.3019, grad_fn=<AddBackward0>)
Epoch: [125][6/12]	Time 0.077 (0.078)	Loss 21.3172 (21.4050)	
tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.5509, grad_fn=<AddBackward0>)
Epoch: [125][8/12]	Time 0.091 (0.081)	Lo

tensor(2.9237, grad_fn=<NllLossBackward>)
tensor(18.4902, grad_fn=<AddBackward0>)
Epoch: [134][2/12]	Time 0.077 (0.077)	Loss 21.4139 (21.4035)	
tensor(2.9860, grad_fn=<NllLossBackward>)
tensor(18.4283, grad_fn=<AddBackward0>)
Epoch: [134][4/12]	Time 0.076 (0.076)	Loss 21.4143 (21.4262)	
tensor(3.0148, grad_fn=<NllLossBackward>)
tensor(18.3014, grad_fn=<AddBackward0>)
Epoch: [134][6/12]	Time 0.074 (0.077)	Loss 21.3162 (21.4042)	
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.5434, grad_fn=<AddBackward0>)
Epoch: [134][8/12]	Time 0.076 (0.077)	Loss 21.5588 (21.4569)	
tensor(3.0160, grad_fn=<NllLossBackward>)
tensor(18.3674, grad_fn=<AddBackward0>)
Epoch: [134][10/12]	Time 0.075 (0.077)	Loss 21.3834 (21.4152)	
Model saved!
tensor(3.0173, grad_fn=<NllLossBackward>)
tensor(18.0551, grad_fn=<AddBackward0>)
Epoch: [135][0/12]	Time 0.075 (0.075)	Loss 21.0724 (21.0724)	
tensor(2.9237, grad_fn=<NllLossBackward>)
tensor(18.4902, grad_fn=<AddBackward0>)
Epoch: [135][2/12]	Time 0.069 (0.074)	Lo

tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.3667, grad_fn=<AddBackward0>)
Epoch: [143][10/12]	Time 0.076 (0.080)	Loss 21.3826 (21.4119)	
Model saved!
tensor(3.0168, grad_fn=<NllLossBackward>)
tensor(18.0548, grad_fn=<AddBackward0>)
Epoch: [144][0/12]	Time 0.072 (0.072)	Loss 21.0716 (21.0716)	
tensor(2.9235, grad_fn=<NllLossBackward>)
tensor(18.4894, grad_fn=<AddBackward0>)
Epoch: [144][2/12]	Time 0.076 (0.076)	Loss 21.4130 (21.4028)	
tensor(2.9858, grad_fn=<NllLossBackward>)
tensor(18.4281, grad_fn=<AddBackward0>)
Epoch: [144][4/12]	Time 0.091 (0.090)	Loss 21.4139 (21.4256)	
tensor(3.0145, grad_fn=<NllLossBackward>)
tensor(18.3008, grad_fn=<AddBackward0>)
Epoch: [144][6/12]	Time 0.086 (0.089)	Loss 21.3154 (21.4036)	
tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.5224, grad_fn=<AddBackward0>)
Epoch: [144][8/12]	Time 0.078 (0.088)	Loss 21.5378 (21.4540)	
tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.3666, grad_fn=<AddBackward0>)
Epoch: [144][10/12]	Time 0.076 (0.086)	L

tensor(2.9234, grad_fn=<NllLossBackward>)
tensor(18.4888, grad_fn=<AddBackward0>)
Epoch: [153][2/12]	Time 0.073 (0.076)	Loss 21.4121 (21.4022)	
tensor(2.9858, grad_fn=<NllLossBackward>)
tensor(18.4278, grad_fn=<AddBackward0>)
Epoch: [153][4/12]	Time 0.079 (0.079)	Loss 21.4136 (21.4250)	
tensor(3.0144, grad_fn=<NllLossBackward>)
tensor(18.3002, grad_fn=<AddBackward0>)
Epoch: [153][6/12]	Time 0.082 (0.079)	Loss 21.3146 (21.4030)	
tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.5211, grad_fn=<AddBackward0>)
Epoch: [153][8/12]	Time 0.087 (0.080)	Loss 21.5365 (21.4530)	
tensor(3.0158, grad_fn=<NllLossBackward>)
tensor(18.3662, grad_fn=<AddBackward0>)
Epoch: [153][10/12]	Time 0.073 (0.080)	Loss 21.3820 (21.4091)	
Model saved!
tensor(3.0164, grad_fn=<NllLossBackward>)
tensor(18.0541, grad_fn=<AddBackward0>)
Epoch: [154][0/12]	Time 0.075 (0.075)	Loss 21.0706 (21.0706)	
tensor(2.9233, grad_fn=<NllLossBackward>)
tensor(18.4887, grad_fn=<AddBackward0>)
Epoch: [154][2/12]	Time 0.076 (0.077)	Lo

tensor(3.0157, grad_fn=<NllLossBackward>)
tensor(18.3662, grad_fn=<AddBackward0>)
Epoch: [162][10/12]	Time 0.077 (0.085)	Loss 21.3819 (21.4048)	
Model saved!
tensor(3.0161, grad_fn=<NllLossBackward>)
tensor(18.0249, grad_fn=<AddBackward0>)
Epoch: [163][0/12]	Time 0.086 (0.086)	Loss 21.0410 (21.0410)	
tensor(2.9232, grad_fn=<NllLossBackward>)
tensor(18.4885, grad_fn=<AddBackward0>)
Epoch: [163][2/12]	Time 0.080 (0.090)	Loss 21.4117 (21.3921)	
tensor(2.9857, grad_fn=<NllLossBackward>)
tensor(18.4273, grad_fn=<AddBackward0>)
Epoch: [163][4/12]	Time 0.084 (0.088)	Loss 21.4130 (21.4188)	
tensor(3.0142, grad_fn=<NllLossBackward>)
tensor(18.2994, grad_fn=<AddBackward0>)
Epoch: [163][6/12]	Time 0.076 (0.087)	Loss 21.3137 (21.3983)	
tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.5206, grad_fn=<AddBackward0>)
Epoch: [163][8/12]	Time 0.076 (0.084)	Loss 21.5359 (21.4475)	
tensor(3.0156, grad_fn=<NllLossBackward>)
tensor(18.3662, grad_fn=<AddBackward0>)
Epoch: [163][10/12]	Time 0.076 (0.083)	L

tensor(2.9231, grad_fn=<NllLossBackward>)
tensor(18.4820, grad_fn=<AddBackward0>)
Epoch: [172][2/12]	Time 0.087 (0.082)	Loss 21.4051 (21.3895)	
tensor(2.9857, grad_fn=<NllLossBackward>)
tensor(18.4272, grad_fn=<AddBackward0>)
Epoch: [172][4/12]	Time 0.080 (0.082)	Loss 21.4129 (21.4171)	
tensor(3.0142, grad_fn=<NllLossBackward>)
tensor(18.2917, grad_fn=<AddBackward0>)
Epoch: [172][6/12]	Time 0.077 (0.082)	Loss 21.3059 (21.3958)	
tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.5202, grad_fn=<AddBackward0>)
Epoch: [172][8/12]	Time 0.079 (0.082)	Loss 21.5357 (21.4447)	
tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.3662, grad_fn=<AddBackward0>)
Epoch: [172][10/12]	Time 0.073 (0.082)	Loss 21.3817 (21.4022)	
Model saved!
tensor(3.0160, grad_fn=<NllLossBackward>)
tensor(18.0241, grad_fn=<AddBackward0>)
Epoch: [173][0/12]	Time 0.076 (0.076)	Loss 21.0401 (21.0401)	
tensor(2.9231, grad_fn=<NllLossBackward>)
tensor(18.4706, grad_fn=<AddBackward0>)
Epoch: [173][2/12]	Time 0.079 (0.080)	Lo

tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.3660, grad_fn=<AddBackward0>)
Epoch: [181][10/12]	Time 0.090 (0.086)	Loss 21.3815 (21.3969)	
Model saved!
tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.0236, grad_fn=<AddBackward0>)
Epoch: [182][0/12]	Time 0.082 (0.082)	Loss 21.0395 (21.0395)	
tensor(2.9231, grad_fn=<NllLossBackward>)
tensor(18.4581, grad_fn=<AddBackward0>)
Epoch: [182][2/12]	Time 0.078 (0.084)	Loss 21.3812 (21.3815)	
tensor(2.9857, grad_fn=<NllLossBackward>)
tensor(18.4270, grad_fn=<AddBackward0>)
Epoch: [182][4/12]	Time 0.074 (0.080)	Loss 21.4127 (21.4121)	
tensor(3.0142, grad_fn=<NllLossBackward>)
tensor(18.2694, grad_fn=<AddBackward0>)
Epoch: [182][6/12]	Time 0.080 (0.080)	Loss 21.2836 (21.3871)	
tensor(3.0170, grad_fn=<NllLossBackward>)
tensor(18.5199, grad_fn=<AddBackward0>)
Epoch: [182][8/12]	Time 0.073 (0.080)	Loss 21.5369 (21.4379)	
tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.3660, grad_fn=<AddBackward0>)
Epoch: [182][10/12]	Time 0.077 (0.080)	L

tensor(2.9228, grad_fn=<NllLossBackward>)
tensor(18.4570, grad_fn=<AddBackward0>)
Epoch: [191][2/12]	Time 0.170 (0.127)	Loss 21.3798 (21.3807)	
tensor(2.9855, grad_fn=<NllLossBackward>)
tensor(18.4267, grad_fn=<AddBackward0>)
Epoch: [191][4/12]	Time 0.126 (0.128)	Loss 21.4122 (21.4114)	
tensor(3.0141, grad_fn=<NllLossBackward>)
tensor(18.2395, grad_fn=<AddBackward0>)
Epoch: [191][6/12]	Time 0.136 (0.132)	Loss 21.2536 (21.3808)	
tensor(3.0167, grad_fn=<NllLossBackward>)
tensor(18.5195, grad_fn=<AddBackward0>)
Epoch: [191][8/12]	Time 0.131 (0.133)	Loss 21.5361 (21.4328)	
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.3660, grad_fn=<AddBackward0>)
Epoch: [191][10/12]	Time 0.114 (0.129)	Loss 21.3814 (21.3923)	
Model saved!
tensor(3.0156, grad_fn=<NllLossBackward>)
tensor(18.0230, grad_fn=<AddBackward0>)
Epoch: [192][0/12]	Time 0.100 (0.100)	Loss 21.0387 (21.0387)	
tensor(2.9227, grad_fn=<NllLossBackward>)
tensor(18.4569, grad_fn=<AddBackward0>)
Epoch: [192][2/12]	Time 0.114 (0.107)	Lo

tensor(3.0161, grad_fn=<NllLossBackward>)
tensor(18.5194, grad_fn=<AddBackward0>)
Epoch: [200][8/12]	Time 0.101 (0.105)	Loss 21.5355 (21.4291)	
tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3658, grad_fn=<AddBackward0>)
Epoch: [200][10/12]	Time 0.099 (0.103)	Loss 21.3810 (21.3892)	
Model saved!
tensor(3.0155, grad_fn=<NllLossBackward>)
tensor(18.0229, grad_fn=<AddBackward0>)
Epoch: [201][0/12]	Time 0.084 (0.084)	Loss 21.0384 (21.0384)	
tensor(2.9226, grad_fn=<NllLossBackward>)
tensor(18.4565, grad_fn=<AddBackward0>)
Epoch: [201][2/12]	Time 0.102 (0.093)	Loss 21.3791 (21.3801)	
tensor(2.9853, grad_fn=<NllLossBackward>)
tensor(18.4266, grad_fn=<AddBackward0>)
Epoch: [201][4/12]	Time 0.115 (0.100)	Loss 21.4118 (21.4056)	
tensor(3.0139, grad_fn=<NllLossBackward>)
tensor(18.2385, grad_fn=<AddBackward0>)
Epoch: [201][6/12]	Time 0.100 (0.102)	Loss 21.2525 (21.3762)	
tensor(3.0161, grad_fn=<NllLossBackward>)
tensor(18.5194, grad_fn=<AddBackward0>)
Epoch: [201][8/12]	Time 0.082 (0.098)	Lo

tensor(2.9226, grad_fn=<NllLossBackward>)
tensor(18.4564, grad_fn=<AddBackward0>)
Epoch: [210][2/12]	Time 0.086 (0.082)	Loss 21.3790 (21.3800)	
tensor(2.9853, grad_fn=<NllLossBackward>)
tensor(18.4264, grad_fn=<AddBackward0>)
Epoch: [210][4/12]	Time 0.087 (0.083)	Loss 21.4117 (21.4053)	
tensor(3.0139, grad_fn=<NllLossBackward>)
tensor(18.2383, grad_fn=<AddBackward0>)
Epoch: [210][6/12]	Time 0.093 (0.084)	Loss 21.2521 (21.3759)	
tensor(3.0160, grad_fn=<NllLossBackward>)
tensor(18.5192, grad_fn=<AddBackward0>)
Epoch: [210][8/12]	Time 0.132 (0.092)	Loss 21.5352 (21.4288)	
tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3657, grad_fn=<AddBackward0>)
Epoch: [210][10/12]	Time 0.082 (0.091)	Loss 21.3809 (21.3890)	
Model saved!
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.0228, grad_fn=<AddBackward0>)
Epoch: [211][0/12]	Time 0.078 (0.078)	Loss 21.0383 (21.0383)	
tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4564, grad_fn=<AddBackward0>)
Epoch: [211][2/12]	Time 0.080 (0.081)	Lo

tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3656, grad_fn=<AddBackward0>)
Epoch: [219][10/12]	Time 0.071 (0.082)	Loss 21.3808 (21.3888)	
Model saved!
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.0227, grad_fn=<AddBackward0>)
Epoch: [220][0/12]	Time 0.069 (0.069)	Loss 21.0381 (21.0381)	
tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4564, grad_fn=<AddBackward0>)
Epoch: [220][2/12]	Time 0.074 (0.073)	Loss 21.3789 (21.3799)	
tensor(2.9853, grad_fn=<NllLossBackward>)
tensor(18.4263, grad_fn=<AddBackward0>)
Epoch: [220][4/12]	Time 0.073 (0.073)	Loss 21.4116 (21.4051)	
tensor(3.0139, grad_fn=<NllLossBackward>)
tensor(18.2381, grad_fn=<AddBackward0>)
Epoch: [220][6/12]	Time 0.074 (0.074)	Loss 21.2520 (21.3757)	
tensor(3.0160, grad_fn=<NllLossBackward>)
tensor(18.5189, grad_fn=<AddBackward0>)
Epoch: [220][8/12]	Time 0.072 (0.074)	Loss 21.5349 (21.4286)	
tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3656, grad_fn=<AddBackward0>)
Epoch: [220][10/12]	Time 0.079 (0.075)	L

tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4563, grad_fn=<AddBackward0>)
Epoch: [229][2/12]	Time 0.087 (0.083)	Loss 21.3788 (21.3798)	
tensor(2.9852, grad_fn=<NllLossBackward>)
tensor(18.4263, grad_fn=<AddBackward0>)
Epoch: [229][4/12]	Time 0.073 (0.079)	Loss 21.4115 (21.4050)	
tensor(3.0139, grad_fn=<NllLossBackward>)
tensor(18.2381, grad_fn=<AddBackward0>)
Epoch: [229][6/12]	Time 0.075 (0.079)	Loss 21.2519 (21.3756)	
tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.5186, grad_fn=<AddBackward0>)
Epoch: [229][8/12]	Time 0.078 (0.079)	Loss 21.5345 (21.4284)	
tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3656, grad_fn=<AddBackward0>)
Epoch: [229][10/12]	Time 0.077 (0.080)	Loss 21.3808 (21.3887)	
Model saved!
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.0226, grad_fn=<AddBackward0>)
Epoch: [230][0/12]	Time 0.074 (0.074)	Loss 21.0380 (21.0380)	
tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4563, grad_fn=<AddBackward0>)
Epoch: [230][2/12]	Time 0.092 (0.085)	Lo

tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.5182, grad_fn=<AddBackward0>)
Epoch: [238][8/12]	Time 0.083 (0.087)	Loss 21.5341 (21.4283)	
tensor(3.0152, grad_fn=<NllLossBackward>)
tensor(18.3656, grad_fn=<AddBackward0>)
Epoch: [238][10/12]	Time 0.085 (0.087)	Loss 21.3807 (21.3885)	
Model saved!
tensor(3.0154, grad_fn=<NllLossBackward>)
tensor(18.0225, grad_fn=<AddBackward0>)
Epoch: [239][0/12]	Time 0.071 (0.071)	Loss 21.0379 (21.0379)	
tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4562, grad_fn=<AddBackward0>)
Epoch: [239][2/12]	Time 0.106 (0.088)	Loss 21.3787 (21.3797)	
tensor(2.9852, grad_fn=<NllLossBackward>)
tensor(18.4262, grad_fn=<AddBackward0>)
Epoch: [239][4/12]	Time 0.081 (0.087)	Loss 21.4115 (21.4048)	
tensor(3.0138, grad_fn=<NllLossBackward>)
tensor(18.2380, grad_fn=<AddBackward0>)
Epoch: [239][6/12]	Time 0.090 (0.088)	Loss 21.2518 (21.3754)	
tensor(3.0159, grad_fn=<NllLossBackward>)
tensor(18.5181, grad_fn=<AddBackward0>)
Epoch: [239][8/12]	Time 0.076 (0.086)	Lo

tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.0224, grad_fn=<AddBackward0>)
Epoch: [248][0/12]	Time 0.153 (0.153)	Loss 21.0378 (21.0378)	
tensor(2.9225, grad_fn=<NllLossBackward>)
tensor(18.4562, grad_fn=<AddBackward0>)
Epoch: [248][2/12]	Time 0.151 (0.152)	Loss 21.3787 (21.3796)	
tensor(2.9852, grad_fn=<NllLossBackward>)
tensor(18.4262, grad_fn=<AddBackward0>)
Epoch: [248][4/12]	Time 0.118 (0.145)	Loss 21.4114 (21.4047)	
tensor(3.0138, grad_fn=<NllLossBackward>)
tensor(18.2379, grad_fn=<AddBackward0>)
Epoch: [248][6/12]	Time 0.133 (0.137)	Loss 21.2517 (21.3753)	
tensor(3.0158, grad_fn=<NllLossBackward>)
tensor(18.5172, grad_fn=<AddBackward0>)
Epoch: [248][8/12]	Time 0.090 (0.132)	Loss 21.5330 (21.4280)	
tensor(3.0151, grad_fn=<NllLossBackward>)
tensor(18.3655, grad_fn=<AddBackward0>)
Epoch: [248][10/12]	Time 0.134 (0.128)	Loss 21.3807 (21.3883)	
Model saved!
tensor(3.0153, grad_fn=<NllLossBackward>)
tensor(18.0224, grad_fn=<AddBackward0>)
Epoch: [249][0/12]	Time 0.104 (0.104)	Lo