# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework


# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [1]:
#! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip

In [2]:
#! unzip food11.zip

# Training

In [3]:
_exp_name = "sample"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [5]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [6]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=1), 
    transforms.RandomVerticalFlip(p=1),    
    transforms.RandomGrayscale(0.5), 
    transforms.RandomSolarize(threshold=192.0),
    transforms.ColorJitter(brightness=.5,hue=0.5), 
    transforms.RandomRotation(degrees=(0, 180)), 
    transforms.RandomInvert(),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [8]:
class Residual_Block(nn.Module):
    def __init__(self, i_channel, o_channel, stride=1, down_sample=None):
        super(Residual_Block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=i_channel, 
                    out_channels=o_channel, 
                    kernel_size=3, 
                    stride=stride, 
                    padding=1,
                    bias=False)
        self.bn1 = nn.BatchNorm2d(o_channel)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels=o_channel, 
                    out_channels=o_channel, 
                    kernel_size=3, 
                    stride=1, 
                    padding=1,
                    bias=False)
        self.bn2 = nn.BatchNorm2d(o_channel)
        self.down_sample = down_sample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        if self.down_sample:
            residual = self.down_sample(x)
        out += residual
        out = self.relu(out)

        return out

In [9]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=11):
        super(ResNet, self).__init__()
        self.conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)
        self.in_channels = 16
        self.bn = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[0], 2)
        self.layer3 = self.make_layer(block, 64, layers[1], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc1 = nn.Linear(1024, 256)
        self.relu2 = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(256, num_classes)
        
    def make_layer(self, block, out_channels, blocks, stride=1):  
        down_sample = None
        if (stride != 1) or (self.in_channels != out_channels):
            down_sample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
                nn.BatchNorm2d(out_channels)
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, down_sample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu1(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size()[0], -1)
        out = self.fc1(out)
        out = self.relu2(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

In [10]:
batch_size = 128
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11/training sample ./food11/training/0_0.jpg
One ./food11/validation sample ./food11/validation/0_0.jpg


In [11]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 500
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision.models as v_models

model = ResNet(Residual_Block, [2, 2, 2, 2]).to(device)


# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 001/500 ] loss = 2.29373, acc = 0.17196


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 001/500 ] loss = 2.20680, acc = 0.21097
[ Valid | 001/500 ] loss = 2.20680, acc = 0.21097 -> best
Best model found at epoch 0, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 002/500 ] loss = 2.21981, acc = 0.20843


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 002/500 ] loss = 2.58002, acc = 0.07358
[ Valid | 002/500 ] loss = 2.58002, acc = 0.07358


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 003/500 ] loss = 2.18569, acc = 0.22212


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 003/500 ] loss = 2.10909, acc = 0.26112
[ Valid | 003/500 ] loss = 2.10909, acc = 0.26112 -> best
Best model found at epoch 2, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 004/500 ] loss = 2.09323, acc = 0.25455


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 004/500 ] loss = 2.03614, acc = 0.27616
[ Valid | 004/500 ] loss = 2.03614, acc = 0.27616 -> best
Best model found at epoch 3, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 005/500 ] loss = 2.02671, acc = 0.28127


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 005/500 ] loss = 2.09548, acc = 0.25743
[ Valid | 005/500 ] loss = 2.09548, acc = 0.25743


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 006/500 ] loss = 2.01003, acc = 0.28638


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 006/500 ] loss = 2.07470, acc = 0.28954
[ Valid | 006/500 ] loss = 2.07470, acc = 0.28954 -> best
Best model found at epoch 5, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 007/500 ] loss = 1.96121, acc = 0.31082


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 007/500 ] loss = 1.94590, acc = 0.32390
[ Valid | 007/500 ] loss = 1.94590, acc = 0.32390 -> best
Best model found at epoch 6, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 008/500 ] loss = 1.93954, acc = 0.31593


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 008/500 ] loss = 1.88721, acc = 0.33670
[ Valid | 008/500 ] loss = 1.88721, acc = 0.33670 -> best
Best model found at epoch 7, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 009/500 ] loss = 1.91979, acc = 0.31595


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 009/500 ] loss = 1.99030, acc = 0.30762
[ Valid | 009/500 ] loss = 1.99030, acc = 0.30762


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 010/500 ] loss = 1.89028, acc = 0.33508


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 010/500 ] loss = 1.82868, acc = 0.35016
[ Valid | 010/500 ] loss = 1.82868, acc = 0.35016 -> best
Best model found at epoch 9, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 011/500 ] loss = 1.87564, acc = 0.33742


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 011/500 ] loss = 1.81027, acc = 0.35993
[ Valid | 011/500 ] loss = 1.81027, acc = 0.35993 -> best
Best model found at epoch 10, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 012/500 ] loss = 1.87176, acc = 0.33960


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 012/500 ] loss = 1.87392, acc = 0.34178
[ Valid | 012/500 ] loss = 1.87392, acc = 0.34178


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 013/500 ] loss = 1.83651, acc = 0.35078


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 013/500 ] loss = 2.00620, acc = 0.31218
[ Valid | 013/500 ] loss = 2.00620, acc = 0.31218


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 014/500 ] loss = 1.80654, acc = 0.35861


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 014/500 ] loss = 1.79052, acc = 0.37534
[ Valid | 014/500 ] loss = 1.79052, acc = 0.37534 -> best
Best model found at epoch 13, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 015/500 ] loss = 1.80604, acc = 0.36022


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 015/500 ] loss = 1.74227, acc = 0.39002
[ Valid | 015/500 ] loss = 1.74227, acc = 0.39002 -> best
Best model found at epoch 14, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 016/500 ] loss = 1.78535, acc = 0.37069


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 016/500 ] loss = 1.80882, acc = 0.37476
[ Valid | 016/500 ] loss = 1.80882, acc = 0.37476


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 017/500 ] loss = 1.76104, acc = 0.37666


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 017/500 ] loss = 1.74337, acc = 0.37911
[ Valid | 017/500 ] loss = 1.74337, acc = 0.37911


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 018/500 ] loss = 1.74940, acc = 0.37642


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 018/500 ] loss = 1.78065, acc = 0.38439
[ Valid | 018/500 ] loss = 1.78065, acc = 0.38439


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 019/500 ] loss = 1.73105, acc = 0.38520


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 019/500 ] loss = 1.94872, acc = 0.33822
[ Valid | 019/500 ] loss = 1.94872, acc = 0.33822


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 020/500 ] loss = 1.72570, acc = 0.38257


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 020/500 ] loss = 1.65583, acc = 0.42418
[ Valid | 020/500 ] loss = 1.65583, acc = 0.42418 -> best
Best model found at epoch 19, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 021/500 ] loss = 1.69903, acc = 0.39990


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 021/500 ] loss = 1.75068, acc = 0.39726
[ Valid | 021/500 ] loss = 1.75068, acc = 0.39726


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 022/500 ] loss = 1.70405, acc = 0.40160


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 022/500 ] loss = 1.87768, acc = 0.34886
[ Valid | 022/500 ] loss = 1.87768, acc = 0.34886


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 023/500 ] loss = 1.66277, acc = 0.41565


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 023/500 ] loss = 1.77382, acc = 0.39068
[ Valid | 023/500 ] loss = 1.77382, acc = 0.39068


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 024/500 ] loss = 1.66410, acc = 0.41621


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 024/500 ] loss = 1.75487, acc = 0.39149
[ Valid | 024/500 ] loss = 1.75487, acc = 0.39149


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 025/500 ] loss = 1.64225, acc = 0.42314


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 025/500 ] loss = 1.68578, acc = 0.41898
[ Valid | 025/500 ] loss = 1.68578, acc = 0.41898


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 026/500 ] loss = 1.62521, acc = 0.42460


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 026/500 ] loss = 1.63640, acc = 0.42281
[ Valid | 026/500 ] loss = 1.63640, acc = 0.42281


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 027/500 ] loss = 1.61450, acc = 0.43081


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 027/500 ] loss = 1.58230, acc = 0.44553
[ Valid | 027/500 ] loss = 1.58230, acc = 0.44553 -> best
Best model found at epoch 26, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 028/500 ] loss = 1.60451, acc = 0.43009


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 028/500 ] loss = 1.68389, acc = 0.40017
[ Valid | 028/500 ] loss = 1.68389, acc = 0.40017


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 029/500 ] loss = 1.61083, acc = 0.43147


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 029/500 ] loss = 1.62547, acc = 0.42325
[ Valid | 029/500 ] loss = 1.62547, acc = 0.42325


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 030/500 ] loss = 1.58963, acc = 0.44109


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 030/500 ] loss = 1.67911, acc = 0.41382
[ Valid | 030/500 ] loss = 1.67911, acc = 0.41382


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 031/500 ] loss = 1.55938, acc = 0.45495


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 031/500 ] loss = 1.57263, acc = 0.44958
[ Valid | 031/500 ] loss = 1.57263, acc = 0.44958 -> best
Best model found at epoch 30, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 032/500 ] loss = 1.56390, acc = 0.45170


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 032/500 ] loss = 1.60015, acc = 0.44762
[ Valid | 032/500 ] loss = 1.60015, acc = 0.44762


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 033/500 ] loss = 1.53721, acc = 0.45441


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 033/500 ] loss = 1.59425, acc = 0.44059
[ Valid | 033/500 ] loss = 1.59425, acc = 0.44059


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 034/500 ] loss = 1.53958, acc = 0.45645


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 034/500 ] loss = 1.64145, acc = 0.43487
[ Valid | 034/500 ] loss = 1.64145, acc = 0.43487


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 035/500 ] loss = 1.51817, acc = 0.46372


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 035/500 ] loss = 1.53785, acc = 0.47091
[ Valid | 035/500 ] loss = 1.53785, acc = 0.47091 -> best
Best model found at epoch 34, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 036/500 ] loss = 1.51714, acc = 0.46881


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 036/500 ] loss = 1.51781, acc = 0.47156
[ Valid | 036/500 ] loss = 1.51781, acc = 0.47156 -> best
Best model found at epoch 35, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 037/500 ] loss = 1.50433, acc = 0.47945


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 037/500 ] loss = 1.51872, acc = 0.47380
[ Valid | 037/500 ] loss = 1.51872, acc = 0.47380 -> best
Best model found at epoch 36, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 038/500 ] loss = 1.48752, acc = 0.47430


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 038/500 ] loss = 1.61960, acc = 0.43323
[ Valid | 038/500 ] loss = 1.61960, acc = 0.43323


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 039/500 ] loss = 1.49146, acc = 0.47977


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 039/500 ] loss = 1.53409, acc = 0.46658
[ Valid | 039/500 ] loss = 1.53409, acc = 0.46658


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 040/500 ] loss = 1.49307, acc = 0.47039


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 040/500 ] loss = 1.48360, acc = 0.48719
[ Valid | 040/500 ] loss = 1.48360, acc = 0.48719 -> best
Best model found at epoch 39, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 041/500 ] loss = 1.48690, acc = 0.47859


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 041/500 ] loss = 1.54871, acc = 0.47049
[ Valid | 041/500 ] loss = 1.54871, acc = 0.47049


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 042/500 ] loss = 1.47954, acc = 0.48452


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 042/500 ] loss = 1.51147, acc = 0.47482
[ Valid | 042/500 ] loss = 1.51147, acc = 0.47482


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 043/500 ] loss = 1.43620, acc = 0.49465


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 043/500 ] loss = 1.45977, acc = 0.49587
[ Valid | 043/500 ] loss = 1.45977, acc = 0.49587 -> best
Best model found at epoch 42, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 044/500 ] loss = 1.46162, acc = 0.48774


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 044/500 ] loss = 1.51556, acc = 0.46259
[ Valid | 044/500 ] loss = 1.51556, acc = 0.46259


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 045/500 ] loss = 1.43404, acc = 0.49848


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 045/500 ] loss = 1.59242, acc = 0.45210
[ Valid | 045/500 ] loss = 1.59242, acc = 0.45210


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 046/500 ] loss = 1.42634, acc = 0.50405


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 046/500 ] loss = 1.45003, acc = 0.48973
[ Valid | 046/500 ] loss = 1.45003, acc = 0.48973


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 047/500 ] loss = 1.41041, acc = 0.50911


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 047/500 ] loss = 1.54722, acc = 0.46217
[ Valid | 047/500 ] loss = 1.54722, acc = 0.46217


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 048/500 ] loss = 1.42031, acc = 0.50799


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 048/500 ] loss = 1.55800, acc = 0.46157
[ Valid | 048/500 ] loss = 1.55800, acc = 0.46157


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 049/500 ] loss = 1.40440, acc = 0.50633


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 049/500 ] loss = 1.48829, acc = 0.48386
[ Valid | 049/500 ] loss = 1.48829, acc = 0.48386


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 050/500 ] loss = 1.40044, acc = 0.51456


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 050/500 ] loss = 1.42069, acc = 0.50984
[ Valid | 050/500 ] loss = 1.42069, acc = 0.50984 -> best
Best model found at epoch 49, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 051/500 ] loss = 1.37871, acc = 0.51446


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 051/500 ] loss = 1.57425, acc = 0.45623
[ Valid | 051/500 ] loss = 1.57425, acc = 0.45623


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 052/500 ] loss = 1.39285, acc = 0.51166


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 052/500 ] loss = 1.47630, acc = 0.49008
[ Valid | 052/500 ] loss = 1.47630, acc = 0.49008


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 053/500 ] loss = 1.39308, acc = 0.51262


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 053/500 ] loss = 1.48109, acc = 0.49138
[ Valid | 053/500 ] loss = 1.48109, acc = 0.49138


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 054/500 ] loss = 1.37350, acc = 0.52428


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 054/500 ] loss = 1.36437, acc = 0.52685
[ Valid | 054/500 ] loss = 1.36437, acc = 0.52685 -> best
Best model found at epoch 53, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 055/500 ] loss = 1.37042, acc = 0.52324


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 055/500 ] loss = 1.67249, acc = 0.42729
[ Valid | 055/500 ] loss = 1.67249, acc = 0.42729


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 056/500 ] loss = 1.37507, acc = 0.52304


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 056/500 ] loss = 1.42348, acc = 0.50926
[ Valid | 056/500 ] loss = 1.42348, acc = 0.50926


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 057/500 ] loss = 1.36910, acc = 0.51837


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 057/500 ] loss = 1.54068, acc = 0.47222
[ Valid | 057/500 ] loss = 1.54068, acc = 0.47222


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 058/500 ] loss = 1.34989, acc = 0.53375


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 058/500 ] loss = 1.52373, acc = 0.47395
[ Valid | 058/500 ] loss = 1.52373, acc = 0.47395


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 059/500 ] loss = 1.34001, acc = 0.53433


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 059/500 ] loss = 1.37890, acc = 0.52112
[ Valid | 059/500 ] loss = 1.37890, acc = 0.52112


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 060/500 ] loss = 1.31703, acc = 0.54455


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 060/500 ] loss = 1.47379, acc = 0.51107
[ Valid | 060/500 ] loss = 1.47379, acc = 0.51107


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 061/500 ] loss = 1.32047, acc = 0.53832


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 061/500 ] loss = 1.37135, acc = 0.52177
[ Valid | 061/500 ] loss = 1.37135, acc = 0.52177


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 062/500 ] loss = 1.31742, acc = 0.53898


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 062/500 ] loss = 1.42138, acc = 0.51751
[ Valid | 062/500 ] loss = 1.42138, acc = 0.51751


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 063/500 ] loss = 1.29948, acc = 0.54467


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 063/500 ] loss = 1.37597, acc = 0.52157
[ Valid | 063/500 ] loss = 1.37597, acc = 0.52157


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 064/500 ] loss = 1.30829, acc = 0.54651


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 064/500 ] loss = 1.35580, acc = 0.53054
[ Valid | 064/500 ] loss = 1.35580, acc = 0.53054 -> best
Best model found at epoch 63, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 065/500 ] loss = 1.31106, acc = 0.54201


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 065/500 ] loss = 1.37858, acc = 0.52235
[ Valid | 065/500 ] loss = 1.37858, acc = 0.52235


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 066/500 ] loss = 1.32001, acc = 0.54269


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 066/500 ] loss = 1.40781, acc = 0.51687
[ Valid | 066/500 ] loss = 1.40781, acc = 0.51687


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 067/500 ] loss = 1.28985, acc = 0.54619


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 067/500 ] loss = 1.51053, acc = 0.49031
[ Valid | 067/500 ] loss = 1.51053, acc = 0.49031


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 068/500 ] loss = 1.28121, acc = 0.55659


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 068/500 ] loss = 1.40366, acc = 0.51924
[ Valid | 068/500 ] loss = 1.40366, acc = 0.51924


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 069/500 ] loss = 1.27071, acc = 0.56184


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 069/500 ] loss = 1.54672, acc = 0.46246
[ Valid | 069/500 ] loss = 1.54672, acc = 0.46246


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 070/500 ] loss = 1.28452, acc = 0.55284


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 070/500 ] loss = 1.38553, acc = 0.52866
[ Valid | 070/500 ] loss = 1.38553, acc = 0.52866


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 071/500 ] loss = 1.26663, acc = 0.56238


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 071/500 ] loss = 1.51131, acc = 0.48163
[ Valid | 071/500 ] loss = 1.51131, acc = 0.48163


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 072/500 ] loss = 1.26522, acc = 0.56769


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 072/500 ] loss = 1.42097, acc = 0.51804
[ Valid | 072/500 ] loss = 1.42097, acc = 0.51804


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 073/500 ] loss = 1.25713, acc = 0.56929


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 073/500 ] loss = 1.28995, acc = 0.56033
[ Valid | 073/500 ] loss = 1.28995, acc = 0.56033 -> best
Best model found at epoch 72, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 074/500 ] loss = 1.26133, acc = 0.56330


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 074/500 ] loss = 1.34004, acc = 0.55092
[ Valid | 074/500 ] loss = 1.34004, acc = 0.55092


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 075/500 ] loss = 1.24159, acc = 0.56721


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 075/500 ] loss = 1.37177, acc = 0.53538
[ Valid | 075/500 ] loss = 1.37177, acc = 0.53538


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 076/500 ] loss = 1.24617, acc = 0.56759


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 076/500 ] loss = 1.38570, acc = 0.54088
[ Valid | 076/500 ] loss = 1.38570, acc = 0.54088


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 077/500 ] loss = 1.23778, acc = 0.57314


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 077/500 ] loss = 1.28403, acc = 0.56750
[ Valid | 077/500 ] loss = 1.28403, acc = 0.56750 -> best
Best model found at epoch 76, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 078/500 ] loss = 1.21251, acc = 0.57847


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 078/500 ] loss = 1.89915, acc = 0.46021
[ Valid | 078/500 ] loss = 1.89915, acc = 0.46021


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 079/500 ] loss = 1.21349, acc = 0.58279


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 079/500 ] loss = 1.30242, acc = 0.56396
[ Valid | 079/500 ] loss = 1.30242, acc = 0.56396


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 080/500 ] loss = 1.20069, acc = 0.58622


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 080/500 ] loss = 1.34191, acc = 0.54856
[ Valid | 080/500 ] loss = 1.34191, acc = 0.54856


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 081/500 ] loss = 1.21431, acc = 0.58321


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 081/500 ] loss = 1.37232, acc = 0.53704
[ Valid | 081/500 ] loss = 1.37232, acc = 0.53704


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 082/500 ] loss = 1.21021, acc = 0.58023


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 082/500 ] loss = 1.53195, acc = 0.49522
[ Valid | 082/500 ] loss = 1.53195, acc = 0.49522


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 083/500 ] loss = 1.20616, acc = 0.57903


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 083/500 ] loss = 1.36082, acc = 0.54752
[ Valid | 083/500 ] loss = 1.36082, acc = 0.54752


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 084/500 ] loss = 1.19315, acc = 0.59229


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 084/500 ] loss = 1.26835, acc = 0.56678
[ Valid | 084/500 ] loss = 1.26835, acc = 0.56678


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 085/500 ] loss = 1.17717, acc = 0.59012


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 085/500 ] loss = 1.39841, acc = 0.54500
[ Valid | 085/500 ] loss = 1.39841, acc = 0.54500


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 086/500 ] loss = 1.17303, acc = 0.59203


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 086/500 ] loss = 1.20454, acc = 0.59405
[ Valid | 086/500 ] loss = 1.20454, acc = 0.59405 -> best
Best model found at epoch 85, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 087/500 ] loss = 1.18021, acc = 0.59573


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 087/500 ] loss = 1.32421, acc = 0.55461
[ Valid | 087/500 ] loss = 1.32421, acc = 0.55461


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 088/500 ] loss = 1.16987, acc = 0.60160


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 088/500 ] loss = 1.31336, acc = 0.54985
[ Valid | 088/500 ] loss = 1.31336, acc = 0.54985


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 089/500 ] loss = 1.18766, acc = 0.59261


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 089/500 ] loss = 1.34860, acc = 0.54427
[ Valid | 089/500 ] loss = 1.34860, acc = 0.54427


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 090/500 ] loss = 1.16180, acc = 0.59772


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 090/500 ] loss = 1.31350, acc = 0.55390
[ Valid | 090/500 ] loss = 1.31350, acc = 0.55390


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 091/500 ] loss = 1.16457, acc = 0.59673


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 091/500 ] loss = 1.29964, acc = 0.56432
[ Valid | 091/500 ] loss = 1.29964, acc = 0.56432


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 092/500 ] loss = 1.16568, acc = 0.59505


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 092/500 ] loss = 1.39073, acc = 0.54124
[ Valid | 092/500 ] loss = 1.39073, acc = 0.54124


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 093/500 ] loss = 1.16287, acc = 0.59575


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 093/500 ] loss = 1.25890, acc = 0.56975
[ Valid | 093/500 ] loss = 1.25890, acc = 0.56975


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 094/500 ] loss = 1.15507, acc = 0.60367


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 094/500 ] loss = 1.21952, acc = 0.58190
[ Valid | 094/500 ] loss = 1.21952, acc = 0.58190


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 095/500 ] loss = 1.15269, acc = 0.60152


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 095/500 ] loss = 1.28791, acc = 0.56012
[ Valid | 095/500 ] loss = 1.28791, acc = 0.56012


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 096/500 ] loss = 1.15893, acc = 0.59794


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 096/500 ] loss = 1.29633, acc = 0.56032
[ Valid | 096/500 ] loss = 1.29633, acc = 0.56032


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 097/500 ] loss = 1.13899, acc = 0.60491


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 097/500 ] loss = 1.22553, acc = 0.58147
[ Valid | 097/500 ] loss = 1.22553, acc = 0.58147


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 098/500 ] loss = 1.13959, acc = 0.60715


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 098/500 ] loss = 1.31166, acc = 0.55679
[ Valid | 098/500 ] loss = 1.31166, acc = 0.55679


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 099/500 ] loss = 1.12052, acc = 0.61144


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 099/500 ] loss = 1.27972, acc = 0.56712
[ Valid | 099/500 ] loss = 1.27972, acc = 0.56712


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 100/500 ] loss = 1.10182, acc = 0.62183


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 100/500 ] loss = 1.24894, acc = 0.57306
[ Valid | 100/500 ] loss = 1.24894, acc = 0.57306


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 101/500 ] loss = 1.10777, acc = 0.62294


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 101/500 ] loss = 1.23923, acc = 0.57639
[ Valid | 101/500 ] loss = 1.23923, acc = 0.57639


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 102/500 ] loss = 1.10419, acc = 0.62129


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 102/500 ] loss = 1.37481, acc = 0.54796
[ Valid | 102/500 ] loss = 1.37481, acc = 0.54796


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 103/500 ] loss = 1.11837, acc = 0.61687


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 103/500 ] loss = 1.23725, acc = 0.58464
[ Valid | 103/500 ] loss = 1.23725, acc = 0.58464


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 104/500 ] loss = 1.12436, acc = 0.60935


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 104/500 ] loss = 1.24198, acc = 0.59304
[ Valid | 104/500 ] loss = 1.24198, acc = 0.59304


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 105/500 ] loss = 1.10507, acc = 0.61198


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 105/500 ] loss = 1.26510, acc = 0.56771
[ Valid | 105/500 ] loss = 1.26510, acc = 0.56771


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 106/500 ] loss = 1.10201, acc = 0.62252


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 106/500 ] loss = 1.27775, acc = 0.57300
[ Valid | 106/500 ] loss = 1.27775, acc = 0.57300


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 107/500 ] loss = 1.11281, acc = 0.61663


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 107/500 ] loss = 1.35425, acc = 0.55737
[ Valid | 107/500 ] loss = 1.35425, acc = 0.55737


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 108/500 ] loss = 1.09895, acc = 0.61579


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 108/500 ] loss = 1.39356, acc = 0.52040
[ Valid | 108/500 ] loss = 1.39356, acc = 0.52040


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 109/500 ] loss = 1.08803, acc = 0.62568


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 109/500 ] loss = 1.27837, acc = 0.56815
[ Valid | 109/500 ] loss = 1.27837, acc = 0.56815


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 110/500 ] loss = 1.09012, acc = 0.62738


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 110/500 ] loss = 1.18529, acc = 0.60506
[ Valid | 110/500 ] loss = 1.18529, acc = 0.60506 -> best
Best model found at epoch 109, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 111/500 ] loss = 1.09186, acc = 0.62111


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 111/500 ] loss = 1.29429, acc = 0.55889
[ Valid | 111/500 ] loss = 1.29429, acc = 0.55889


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 112/500 ] loss = 1.09202, acc = 0.62260


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 112/500 ] loss = 1.23633, acc = 0.58812
[ Valid | 112/500 ] loss = 1.23633, acc = 0.58812


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 113/500 ] loss = 1.09259, acc = 0.62232


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 113/500 ] loss = 1.23194, acc = 0.58530
[ Valid | 113/500 ] loss = 1.23194, acc = 0.58530


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 114/500 ] loss = 1.08292, acc = 0.62121


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 114/500 ] loss = 1.20988, acc = 0.59217
[ Valid | 114/500 ] loss = 1.20988, acc = 0.59217


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 115/500 ] loss = 1.08523, acc = 0.62672


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 115/500 ] loss = 1.21561, acc = 0.59123
[ Valid | 115/500 ] loss = 1.21561, acc = 0.59123


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 116/500 ] loss = 1.06623, acc = 0.63115


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 116/500 ] loss = 1.28373, acc = 0.58110
[ Valid | 116/500 ] loss = 1.28373, acc = 0.58110


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 117/500 ] loss = 1.06316, acc = 0.63189


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 117/500 ] loss = 1.18898, acc = 0.60490
[ Valid | 117/500 ] loss = 1.18898, acc = 0.60490


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 118/500 ] loss = 1.05366, acc = 0.63742


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 118/500 ] loss = 1.16405, acc = 0.62046
[ Valid | 118/500 ] loss = 1.16405, acc = 0.62046 -> best
Best model found at epoch 117, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 119/500 ] loss = 1.05382, acc = 0.63900


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 119/500 ] loss = 1.33107, acc = 0.56699
[ Valid | 119/500 ] loss = 1.33107, acc = 0.56699


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 120/500 ] loss = 1.03861, acc = 0.63856


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 120/500 ] loss = 1.30644, acc = 0.57741
[ Valid | 120/500 ] loss = 1.30644, acc = 0.57741


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 121/500 ] loss = 1.05312, acc = 0.64311


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 121/500 ] loss = 1.20053, acc = 0.59593
[ Valid | 121/500 ] loss = 1.20053, acc = 0.59593


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 122/500 ] loss = 1.03301, acc = 0.64513


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 122/500 ] loss = 1.21762, acc = 0.59376
[ Valid | 122/500 ] loss = 1.21762, acc = 0.59376


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 123/500 ] loss = 1.04377, acc = 0.64710


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 123/500 ] loss = 1.34061, acc = 0.58277
[ Valid | 123/500 ] loss = 1.34061, acc = 0.58277


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 124/500 ] loss = 1.05354, acc = 0.64423


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 124/500 ] loss = 1.20059, acc = 0.59356
[ Valid | 124/500 ] loss = 1.20059, acc = 0.59356


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 125/500 ] loss = 1.03632, acc = 0.64231


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 125/500 ] loss = 1.16082, acc = 0.60715
[ Valid | 125/500 ] loss = 1.16082, acc = 0.60715


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 126/500 ] loss = 1.04082, acc = 0.64239


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 126/500 ] loss = 1.19189, acc = 0.59926
[ Valid | 126/500 ] loss = 1.19189, acc = 0.59926


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 127/500 ] loss = 1.01648, acc = 0.64357


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 127/500 ] loss = 1.19564, acc = 0.60122
[ Valid | 127/500 ] loss = 1.19564, acc = 0.60122


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 128/500 ] loss = 1.03163, acc = 0.64589


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 128/500 ] loss = 1.24505, acc = 0.60751
[ Valid | 128/500 ] loss = 1.24505, acc = 0.60751


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 129/500 ] loss = 1.01221, acc = 0.65244


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 129/500 ] loss = 1.16758, acc = 0.61171
[ Valid | 129/500 ] loss = 1.16758, acc = 0.61171


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 130/500 ] loss = 1.03135, acc = 0.64956


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 130/500 ] loss = 1.11573, acc = 0.62929
[ Valid | 130/500 ] loss = 1.11573, acc = 0.62929 -> best
Best model found at epoch 129, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 131/500 ] loss = 1.03620, acc = 0.64345


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 131/500 ] loss = 1.77036, acc = 0.52155
[ Valid | 131/500 ] loss = 1.77036, acc = 0.52155


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 132/500 ] loss = 1.04408, acc = 0.63840


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 132/500 ] loss = 1.30793, acc = 0.57215
[ Valid | 132/500 ] loss = 1.30793, acc = 0.57215


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 133/500 ] loss = 1.04556, acc = 0.64299


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 133/500 ] loss = 1.20553, acc = 0.59499
[ Valid | 133/500 ] loss = 1.20553, acc = 0.59499


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 134/500 ] loss = 1.01907, acc = 0.65118


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 134/500 ] loss = 1.18268, acc = 0.60273
[ Valid | 134/500 ] loss = 1.18268, acc = 0.60273


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 135/500 ] loss = 1.00675, acc = 0.65304


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 135/500 ] loss = 1.28407, acc = 0.58538
[ Valid | 135/500 ] loss = 1.28407, acc = 0.58538


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 136/500 ] loss = 1.00842, acc = 0.65407


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 136/500 ] loss = 1.24640, acc = 0.59311
[ Valid | 136/500 ] loss = 1.24640, acc = 0.59311


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 137/500 ] loss = 1.01141, acc = 0.65138


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 137/500 ] loss = 1.15791, acc = 0.61553
[ Valid | 137/500 ] loss = 1.15791, acc = 0.61553


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 138/500 ] loss = 0.98610, acc = 0.65489


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 138/500 ] loss = 1.34982, acc = 0.56585
[ Valid | 138/500 ] loss = 1.34982, acc = 0.56585


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 139/500 ] loss = 0.99443, acc = 0.65639


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 139/500 ] loss = 1.16478, acc = 0.62712
[ Valid | 139/500 ] loss = 1.16478, acc = 0.62712


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 140/500 ] loss = 0.99823, acc = 0.65689


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 140/500 ] loss = 1.17422, acc = 0.61388
[ Valid | 140/500 ] loss = 1.17422, acc = 0.61388


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 141/500 ] loss = 1.02586, acc = 0.64445


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 141/500 ] loss = 1.18582, acc = 0.62096
[ Valid | 141/500 ] loss = 1.18582, acc = 0.62096


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 142/500 ] loss = 0.99230, acc = 0.66222


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 142/500 ] loss = 1.14893, acc = 0.61698
[ Valid | 142/500 ] loss = 1.14893, acc = 0.61698


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 143/500 ] loss = 0.99663, acc = 0.65903


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 143/500 ] loss = 1.10917, acc = 0.63385
[ Valid | 143/500 ] loss = 1.10917, acc = 0.63385 -> best
Best model found at epoch 142, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 144/500 ] loss = 0.98758, acc = 0.66044


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 144/500 ] loss = 1.33698, acc = 0.58428
[ Valid | 144/500 ] loss = 1.33698, acc = 0.58428


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 145/500 ] loss = 0.98498, acc = 0.65595


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 145/500 ] loss = 1.15492, acc = 0.62141
[ Valid | 145/500 ] loss = 1.15492, acc = 0.62141


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 146/500 ] loss = 0.96730, acc = 0.67013


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 146/500 ] loss = 1.31794, acc = 0.59058
[ Valid | 146/500 ] loss = 1.31794, acc = 0.59058


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 147/500 ] loss = 1.00387, acc = 0.65855


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 147/500 ] loss = 1.21393, acc = 0.60042
[ Valid | 147/500 ] loss = 1.21393, acc = 0.60042


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 148/500 ] loss = 0.98729, acc = 0.66174


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 148/500 ] loss = 1.23217, acc = 0.61250
[ Valid | 148/500 ] loss = 1.23217, acc = 0.61250


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 149/500 ] loss = 0.98046, acc = 0.66392


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 149/500 ] loss = 1.20614, acc = 0.60165
[ Valid | 149/500 ] loss = 1.20614, acc = 0.60165


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 150/500 ] loss = 0.95417, acc = 0.67206


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 150/500 ] loss = 1.28835, acc = 0.58761
[ Valid | 150/500 ] loss = 1.28835, acc = 0.58761


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 151/500 ] loss = 0.97241, acc = 0.66286


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 151/500 ] loss = 1.19308, acc = 0.61352
[ Valid | 151/500 ] loss = 1.19308, acc = 0.61352


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 152/500 ] loss = 0.97773, acc = 0.66797


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 152/500 ] loss = 1.23479, acc = 0.60367
[ Valid | 152/500 ] loss = 1.23479, acc = 0.60367


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 153/500 ] loss = 0.96723, acc = 0.67091


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 153/500 ] loss = 1.23154, acc = 0.60324
[ Valid | 153/500 ] loss = 1.23154, acc = 0.60324


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 154/500 ] loss = 0.97581, acc = 0.66925


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 154/500 ] loss = 1.14917, acc = 0.61461
[ Valid | 154/500 ] loss = 1.14917, acc = 0.61461


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 155/500 ] loss = 0.95742, acc = 0.67254


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 155/500 ] loss = 1.15811, acc = 0.61403
[ Valid | 155/500 ] loss = 1.15811, acc = 0.61403


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 156/500 ] loss = 0.97353, acc = 0.66697


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 156/500 ] loss = 1.12004, acc = 0.62299
[ Valid | 156/500 ] loss = 1.12004, acc = 0.62299


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 157/500 ] loss = 0.97590, acc = 0.66184


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 157/500 ] loss = 1.25930, acc = 0.59543
[ Valid | 157/500 ] loss = 1.25930, acc = 0.59543


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 158/500 ] loss = 0.94992, acc = 0.67831


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 158/500 ] loss = 1.17257, acc = 0.62394
[ Valid | 158/500 ] loss = 1.17257, acc = 0.62394


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 159/500 ] loss = 0.95240, acc = 0.67434


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 159/500 ] loss = 1.26809, acc = 0.59782
[ Valid | 159/500 ] loss = 1.26809, acc = 0.59782


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 160/500 ] loss = 0.94345, acc = 0.67800


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 160/500 ] loss = 1.20082, acc = 0.61445
[ Valid | 160/500 ] loss = 1.20082, acc = 0.61445


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 161/500 ] loss = 0.96745, acc = 0.66538


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 161/500 ] loss = 1.20551, acc = 0.60562
[ Valid | 161/500 ] loss = 1.20551, acc = 0.60562


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 162/500 ] loss = 0.95787, acc = 0.67292


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 162/500 ] loss = 1.33465, acc = 0.57054
[ Valid | 162/500 ] loss = 1.33465, acc = 0.57054


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 163/500 ] loss = 0.93804, acc = 0.67927


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 163/500 ] loss = 1.15633, acc = 0.61185
[ Valid | 163/500 ] loss = 1.15633, acc = 0.61185


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 164/500 ] loss = 0.92727, acc = 0.67891


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 164/500 ] loss = 1.17566, acc = 0.61771
[ Valid | 164/500 ] loss = 1.17566, acc = 0.61771


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 165/500 ] loss = 0.93388, acc = 0.67592


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 165/500 ] loss = 1.12855, acc = 0.63623
[ Valid | 165/500 ] loss = 1.12855, acc = 0.63623 -> best
Best model found at epoch 164, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 166/500 ] loss = 0.92900, acc = 0.67794


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 166/500 ] loss = 1.18655, acc = 0.61048
[ Valid | 166/500 ] loss = 1.18655, acc = 0.61048


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 167/500 ] loss = 0.92570, acc = 0.67917


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 167/500 ] loss = 1.11368, acc = 0.63761
[ Valid | 167/500 ] loss = 1.11368, acc = 0.63761 -> best
Best model found at epoch 166, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 168/500 ] loss = 0.91181, acc = 0.68209


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 168/500 ] loss = 1.19554, acc = 0.61989
[ Valid | 168/500 ] loss = 1.19554, acc = 0.61989


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 169/500 ] loss = 0.93825, acc = 0.66987


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 169/500 ] loss = 1.13997, acc = 0.62547
[ Valid | 169/500 ] loss = 1.13997, acc = 0.62547


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 170/500 ] loss = 0.93280, acc = 0.67698


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 170/500 ] loss = 1.21133, acc = 0.60026
[ Valid | 170/500 ] loss = 1.21133, acc = 0.60026


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 171/500 ] loss = 0.90784, acc = 0.68672


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 171/500 ] loss = 1.17016, acc = 0.62175
[ Valid | 171/500 ] loss = 1.17016, acc = 0.62175


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 172/500 ] loss = 0.91393, acc = 0.68427


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 172/500 ] loss = 1.19165, acc = 0.60722
[ Valid | 172/500 ] loss = 1.19165, acc = 0.60722


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 173/500 ] loss = 0.93107, acc = 0.67897


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 173/500 ] loss = 1.21733, acc = 0.61640
[ Valid | 173/500 ] loss = 1.21733, acc = 0.61640


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 174/500 ] loss = 0.93366, acc = 0.67800


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 174/500 ] loss = 1.28446, acc = 0.59029
[ Valid | 174/500 ] loss = 1.28446, acc = 0.59029


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 175/500 ] loss = 0.90543, acc = 0.68748


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 175/500 ] loss = 1.20757, acc = 0.59992
[ Valid | 175/500 ] loss = 1.20757, acc = 0.59992


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 176/500 ] loss = 0.91258, acc = 0.68760


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 176/500 ] loss = 1.12664, acc = 0.63312
[ Valid | 176/500 ] loss = 1.12664, acc = 0.63312


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 177/500 ] loss = 0.89542, acc = 0.68956


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 177/500 ] loss = 1.17045, acc = 0.61778
[ Valid | 177/500 ] loss = 1.17045, acc = 0.61778


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 178/500 ] loss = 0.91789, acc = 0.67895


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 178/500 ] loss = 1.17439, acc = 0.63443
[ Valid | 178/500 ] loss = 1.17439, acc = 0.63443


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 179/500 ] loss = 0.91685, acc = 0.68794


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 179/500 ] loss = 1.15207, acc = 0.62646
[ Valid | 179/500 ] loss = 1.15207, acc = 0.62646


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 180/500 ] loss = 0.90306, acc = 0.69058


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 180/500 ] loss = 1.14023, acc = 0.62523
[ Valid | 180/500 ] loss = 1.14023, acc = 0.62523


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 181/500 ] loss = 0.90660, acc = 0.69169


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 181/500 ] loss = 1.31974, acc = 0.59911
[ Valid | 181/500 ] loss = 1.31974, acc = 0.59911


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 182/500 ] loss = 0.90310, acc = 0.68804


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 182/500 ] loss = 1.22682, acc = 0.61511
[ Valid | 182/500 ] loss = 1.22682, acc = 0.61511


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 183/500 ] loss = 0.89508, acc = 0.69383


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 183/500 ] loss = 1.17668, acc = 0.61858
[ Valid | 183/500 ] loss = 1.17668, acc = 0.61858


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 184/500 ] loss = 0.90540, acc = 0.69407


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 184/500 ] loss = 1.08166, acc = 0.65894
[ Valid | 184/500 ] loss = 1.08166, acc = 0.65894 -> best
Best model found at epoch 183, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 185/500 ] loss = 0.89678, acc = 0.69251


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 185/500 ] loss = 1.15919, acc = 0.61952
[ Valid | 185/500 ] loss = 1.15919, acc = 0.61952


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 186/500 ] loss = 0.90735, acc = 0.69129


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 186/500 ] loss = 1.10324, acc = 0.64686
[ Valid | 186/500 ] loss = 1.10324, acc = 0.64686


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 187/500 ] loss = 0.88065, acc = 0.69677


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 187/500 ] loss = 1.09265, acc = 0.64021
[ Valid | 187/500 ] loss = 1.09265, acc = 0.64021


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 188/500 ] loss = 0.89055, acc = 0.69555


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 188/500 ] loss = 1.25828, acc = 0.60114
[ Valid | 188/500 ] loss = 1.25828, acc = 0.60114


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 189/500 ] loss = 0.88369, acc = 0.69343


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 189/500 ] loss = 1.26634, acc = 0.60621
[ Valid | 189/500 ] loss = 1.26634, acc = 0.60621


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 190/500 ] loss = 0.90306, acc = 0.69205


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 190/500 ] loss = 1.15219, acc = 0.61770
[ Valid | 190/500 ] loss = 1.15219, acc = 0.61770


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 191/500 ] loss = 0.88255, acc = 0.69609


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 191/500 ] loss = 1.17810, acc = 0.62799
[ Valid | 191/500 ] loss = 1.17810, acc = 0.62799


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 192/500 ] loss = 0.88295, acc = 0.69211


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 192/500 ] loss = 1.14729, acc = 0.63638
[ Valid | 192/500 ] loss = 1.14729, acc = 0.63638


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 193/500 ] loss = 0.88557, acc = 0.69615


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 193/500 ] loss = 1.12283, acc = 0.63638
[ Valid | 193/500 ] loss = 1.12283, acc = 0.63638


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 194/500 ] loss = 0.88169, acc = 0.69291


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 194/500 ] loss = 1.11035, acc = 0.63319
[ Valid | 194/500 ] loss = 1.11035, acc = 0.63319


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 195/500 ] loss = 0.87549, acc = 0.70000


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 195/500 ] loss = 1.22154, acc = 0.61106
[ Valid | 195/500 ] loss = 1.22154, acc = 0.61106


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 196/500 ] loss = 0.88899, acc = 0.69559


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 196/500 ] loss = 1.09175, acc = 0.65142
[ Valid | 196/500 ] loss = 1.09175, acc = 0.65142


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 197/500 ] loss = 0.87370, acc = 0.69355


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 197/500 ] loss = 1.17721, acc = 0.60910
[ Valid | 197/500 ] loss = 1.17721, acc = 0.60910


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 198/500 ] loss = 0.88389, acc = 0.69954


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 198/500 ] loss = 1.14313, acc = 0.63486
[ Valid | 198/500 ] loss = 1.14313, acc = 0.63486


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 199/500 ] loss = 0.87500, acc = 0.70128


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 199/500 ] loss = 1.15720, acc = 0.62683
[ Valid | 199/500 ] loss = 1.15720, acc = 0.62683


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 200/500 ] loss = 0.87187, acc = 0.69722


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 200/500 ] loss = 1.13247, acc = 0.64571
[ Valid | 200/500 ] loss = 1.13247, acc = 0.64571


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 201/500 ] loss = 0.88335, acc = 0.69451


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 201/500 ] loss = 1.37451, acc = 0.57569
[ Valid | 201/500 ] loss = 1.37451, acc = 0.57569


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 202/500 ] loss = 0.86567, acc = 0.70517


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 202/500 ] loss = 1.33308, acc = 0.60222
[ Valid | 202/500 ] loss = 1.33308, acc = 0.60222


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 203/500 ] loss = 0.87944, acc = 0.69712


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 203/500 ] loss = 1.12401, acc = 0.63284
[ Valid | 203/500 ] loss = 1.12401, acc = 0.63284


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 204/500 ] loss = 0.86268, acc = 0.70645


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 204/500 ] loss = 1.30218, acc = 0.59015
[ Valid | 204/500 ] loss = 1.30218, acc = 0.59015


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 205/500 ] loss = 0.85162, acc = 0.70437


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 205/500 ] loss = 1.21007, acc = 0.61155
[ Valid | 205/500 ] loss = 1.21007, acc = 0.61155


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 206/500 ] loss = 0.85475, acc = 0.70457


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 206/500 ] loss = 1.10052, acc = 0.64304
[ Valid | 206/500 ] loss = 1.10052, acc = 0.64304


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 207/500 ] loss = 0.84281, acc = 0.70767


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 207/500 ] loss = 1.14937, acc = 0.63167
[ Valid | 207/500 ] loss = 1.14937, acc = 0.63167


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 208/500 ] loss = 0.85859, acc = 0.70541


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 208/500 ] loss = 1.12538, acc = 0.63406
[ Valid | 208/500 ] loss = 1.12538, acc = 0.63406


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 209/500 ] loss = 0.83673, acc = 0.70984


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 209/500 ] loss = 1.21262, acc = 0.61011
[ Valid | 209/500 ] loss = 1.21262, acc = 0.61011


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 210/500 ] loss = 0.85937, acc = 0.70000


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 210/500 ] loss = 1.11300, acc = 0.63926
[ Valid | 210/500 ] loss = 1.11300, acc = 0.63926


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 211/500 ] loss = 0.83699, acc = 0.70833


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 211/500 ] loss = 1.11966, acc = 0.63891
[ Valid | 211/500 ] loss = 1.11966, acc = 0.63891


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 212/500 ] loss = 0.85056, acc = 0.70799


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 212/500 ] loss = 1.18070, acc = 0.63154
[ Valid | 212/500 ] loss = 1.18070, acc = 0.63154


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 213/500 ] loss = 0.84151, acc = 0.70827


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 213/500 ] loss = 1.15467, acc = 0.62515
[ Valid | 213/500 ] loss = 1.15467, acc = 0.62515


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 214/500 ] loss = 0.83502, acc = 0.71795


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 214/500 ] loss = 1.21782, acc = 0.61699
[ Valid | 214/500 ] loss = 1.21782, acc = 0.61699


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 215/500 ] loss = 0.82374, acc = 0.71240


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 215/500 ] loss = 1.18151, acc = 0.63211
[ Valid | 215/500 ] loss = 1.18151, acc = 0.63211


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 216/500 ] loss = 0.85101, acc = 0.70323


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 216/500 ] loss = 1.17270, acc = 0.62980
[ Valid | 216/500 ] loss = 1.17270, acc = 0.62980


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 217/500 ] loss = 0.84506, acc = 0.70919


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 217/500 ] loss = 1.16799, acc = 0.63652
[ Valid | 217/500 ] loss = 1.16799, acc = 0.63652


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 218/500 ] loss = 0.83173, acc = 0.71160


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 218/500 ] loss = 1.19772, acc = 0.62654
[ Valid | 218/500 ] loss = 1.19772, acc = 0.62654


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 219/500 ] loss = 0.83652, acc = 0.71498


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 219/500 ] loss = 1.18352, acc = 0.63138
[ Valid | 219/500 ] loss = 1.18352, acc = 0.63138


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 220/500 ] loss = 0.83058, acc = 0.71230


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 220/500 ] loss = 1.17272, acc = 0.63116
[ Valid | 220/500 ] loss = 1.17272, acc = 0.63116


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 221/500 ] loss = 0.85436, acc = 0.69932


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 221/500 ] loss = 1.32519, acc = 0.58718
[ Valid | 221/500 ] loss = 1.32519, acc = 0.58718


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 222/500 ] loss = 0.82065, acc = 0.71907


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 222/500 ] loss = 1.15942, acc = 0.62487
[ Valid | 222/500 ] loss = 1.15942, acc = 0.62487


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 223/500 ] loss = 0.81222, acc = 0.72670


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 223/500 ] loss = 1.16227, acc = 0.63862
[ Valid | 223/500 ] loss = 1.16227, acc = 0.63862


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 224/500 ] loss = 0.81269, acc = 0.72222


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 224/500 ] loss = 1.08863, acc = 0.64991
[ Valid | 224/500 ] loss = 1.08863, acc = 0.64991


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 225/500 ] loss = 0.81923, acc = 0.72139


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 225/500 ] loss = 1.17898, acc = 0.63407
[ Valid | 225/500 ] loss = 1.17898, acc = 0.63407


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 226/500 ] loss = 0.84392, acc = 0.70909


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 226/500 ] loss = 1.12433, acc = 0.64123
[ Valid | 226/500 ] loss = 1.12433, acc = 0.64123


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 227/500 ] loss = 0.83418, acc = 0.71152


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 227/500 ] loss = 1.16493, acc = 0.63940
[ Valid | 227/500 ] loss = 1.16493, acc = 0.63940


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 228/500 ] loss = 0.82740, acc = 0.71617


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 228/500 ] loss = 1.14132, acc = 0.63412
[ Valid | 228/500 ] loss = 1.14132, acc = 0.63412


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 229/500 ] loss = 0.81633, acc = 0.71713


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 229/500 ] loss = 1.10367, acc = 0.65237
[ Valid | 229/500 ] loss = 1.10367, acc = 0.65237


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 230/500 ] loss = 0.83306, acc = 0.71665


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 230/500 ] loss = 1.20105, acc = 0.62474
[ Valid | 230/500 ] loss = 1.20105, acc = 0.62474


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 231/500 ] loss = 0.82911, acc = 0.71763


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 231/500 ] loss = 1.21859, acc = 0.62351
[ Valid | 231/500 ] loss = 1.21859, acc = 0.62351


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 232/500 ] loss = 0.81867, acc = 0.72208


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 232/500 ] loss = 1.27046, acc = 0.59760
[ Valid | 232/500 ] loss = 1.27046, acc = 0.59760


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 233/500 ] loss = 0.81318, acc = 0.72113


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 233/500 ] loss = 1.13537, acc = 0.63992
[ Valid | 233/500 ] loss = 1.13537, acc = 0.63992


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 234/500 ] loss = 0.82622, acc = 0.71190


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 234/500 ] loss = 1.25969, acc = 0.62632
[ Valid | 234/500 ] loss = 1.25969, acc = 0.62632


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 235/500 ] loss = 0.81363, acc = 0.71665


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 235/500 ] loss = 1.22850, acc = 0.61460
[ Valid | 235/500 ] loss = 1.22850, acc = 0.61460


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 236/500 ] loss = 0.81683, acc = 0.71326


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 236/500 ] loss = 1.29433, acc = 0.59680
[ Valid | 236/500 ] loss = 1.29433, acc = 0.59680


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 237/500 ] loss = 0.81871, acc = 0.71779


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 237/500 ] loss = 1.13503, acc = 0.64875
[ Valid | 237/500 ] loss = 1.13503, acc = 0.64875


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 238/500 ] loss = 0.79490, acc = 0.72520


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 238/500 ] loss = 1.13124, acc = 0.63651
[ Valid | 238/500 ] loss = 1.13124, acc = 0.63651


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 239/500 ] loss = 0.78941, acc = 0.72652


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 239/500 ] loss = 1.13816, acc = 0.63905
[ Valid | 239/500 ] loss = 1.13816, acc = 0.63905


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 240/500 ] loss = 0.77501, acc = 0.73135


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 240/500 ] loss = 1.09943, acc = 0.65795
[ Valid | 240/500 ] loss = 1.09943, acc = 0.65795


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 241/500 ] loss = 0.80364, acc = 0.72348


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 241/500 ] loss = 1.22026, acc = 0.61980
[ Valid | 241/500 ] loss = 1.22026, acc = 0.61980


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 242/500 ] loss = 0.81094, acc = 0.72139


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 242/500 ] loss = 1.15564, acc = 0.63717
[ Valid | 242/500 ] loss = 1.15564, acc = 0.63717


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 243/500 ] loss = 0.81185, acc = 0.72073


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 243/500 ] loss = 1.16506, acc = 0.63008
[ Valid | 243/500 ] loss = 1.16506, acc = 0.63008


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 244/500 ] loss = 0.79516, acc = 0.72899


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 244/500 ] loss = 1.16372, acc = 0.63197
[ Valid | 244/500 ] loss = 1.16372, acc = 0.63197


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 245/500 ] loss = 0.80340, acc = 0.72117


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 245/500 ] loss = 1.14543, acc = 0.63732
[ Valid | 245/500 ] loss = 1.14543, acc = 0.63732


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 246/500 ] loss = 0.80396, acc = 0.71867


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 246/500 ] loss = 1.08834, acc = 0.64803
[ Valid | 246/500 ] loss = 1.08834, acc = 0.64803


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 247/500 ] loss = 0.79986, acc = 0.72542


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 247/500 ] loss = 1.17025, acc = 0.63832
[ Valid | 247/500 ] loss = 1.17025, acc = 0.63832


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 248/500 ] loss = 0.79002, acc = 0.72700


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 248/500 ] loss = 1.24615, acc = 0.60816
[ Valid | 248/500 ] loss = 1.24615, acc = 0.60816


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 249/500 ] loss = 0.78910, acc = 0.72911


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 249/500 ] loss = 1.31807, acc = 0.59891
[ Valid | 249/500 ] loss = 1.31807, acc = 0.59891


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 250/500 ] loss = 0.78416, acc = 0.73041


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 250/500 ] loss = 1.18009, acc = 0.63109
[ Valid | 250/500 ] loss = 1.18009, acc = 0.63109


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 251/500 ] loss = 0.77236, acc = 0.73790


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 251/500 ] loss = 1.24614, acc = 0.62314
[ Valid | 251/500 ] loss = 1.24614, acc = 0.62314


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 252/500 ] loss = 0.79165, acc = 0.72518


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 252/500 ] loss = 1.38126, acc = 0.59550
[ Valid | 252/500 ] loss = 1.38126, acc = 0.59550


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 253/500 ] loss = 0.80378, acc = 0.72602


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 253/500 ] loss = 1.16798, acc = 0.64462
[ Valid | 253/500 ] loss = 1.16798, acc = 0.64462


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 254/500 ] loss = 0.79279, acc = 0.72268


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 254/500 ] loss = 1.11587, acc = 0.64752
[ Valid | 254/500 ] loss = 1.11587, acc = 0.64752


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 255/500 ] loss = 0.78973, acc = 0.72991


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 255/500 ] loss = 1.15465, acc = 0.64506
[ Valid | 255/500 ] loss = 1.15465, acc = 0.64506


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 256/500 ] loss = 0.79843, acc = 0.72582


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 256/500 ] loss = 1.18383, acc = 0.62995
[ Valid | 256/500 ] loss = 1.18383, acc = 0.62995


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 257/500 ] loss = 0.78175, acc = 0.73093


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 257/500 ] loss = 1.10157, acc = 0.65098
[ Valid | 257/500 ] loss = 1.10157, acc = 0.65098


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 258/500 ] loss = 0.76851, acc = 0.73560


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 258/500 ] loss = 1.16552, acc = 0.64238
[ Valid | 258/500 ] loss = 1.16552, acc = 0.64238


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 259/500 ] loss = 0.76395, acc = 0.73738


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 259/500 ] loss = 1.10119, acc = 0.64940
[ Valid | 259/500 ] loss = 1.10119, acc = 0.64940


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 260/500 ] loss = 0.77915, acc = 0.73051


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 260/500 ] loss = 1.13979, acc = 0.63406
[ Valid | 260/500 ] loss = 1.13979, acc = 0.63406


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 261/500 ] loss = 0.77660, acc = 0.73962


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 261/500 ] loss = 1.22049, acc = 0.61525
[ Valid | 261/500 ] loss = 1.22049, acc = 0.61525


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 262/500 ] loss = 0.78464, acc = 0.73085


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 262/500 ] loss = 1.14104, acc = 0.64035
[ Valid | 262/500 ] loss = 1.14104, acc = 0.64035


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 263/500 ] loss = 0.75439, acc = 0.73750


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 263/500 ] loss = 1.18457, acc = 0.63565
[ Valid | 263/500 ] loss = 1.18457, acc = 0.63565


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 264/500 ] loss = 0.78282, acc = 0.73197


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 264/500 ] loss = 1.23758, acc = 0.62379
[ Valid | 264/500 ] loss = 1.23758, acc = 0.62379


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 265/500 ] loss = 0.78164, acc = 0.72993


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 265/500 ] loss = 1.24536, acc = 0.62567
[ Valid | 265/500 ] loss = 1.24536, acc = 0.62567


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 266/500 ] loss = 0.79214, acc = 0.73147


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 266/500 ] loss = 1.18190, acc = 0.62799
[ Valid | 266/500 ] loss = 1.18190, acc = 0.62799


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 267/500 ] loss = 0.76919, acc = 0.73486


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 267/500 ] loss = 1.23432, acc = 0.62126
[ Valid | 267/500 ] loss = 1.23432, acc = 0.62126


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 268/500 ] loss = 0.77480, acc = 0.73229


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 268/500 ] loss = 1.17183, acc = 0.63565
[ Valid | 268/500 ] loss = 1.17183, acc = 0.63565


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 269/500 ] loss = 0.78453, acc = 0.72881


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 269/500 ] loss = 1.16777, acc = 0.64354
[ Valid | 269/500 ] loss = 1.16777, acc = 0.64354


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 270/500 ] loss = 0.77673, acc = 0.73205


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 270/500 ] loss = 1.26438, acc = 0.61048
[ Valid | 270/500 ] loss = 1.26438, acc = 0.61048


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 271/500 ] loss = 0.78256, acc = 0.73223


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 271/500 ] loss = 1.12557, acc = 0.64940
[ Valid | 271/500 ] loss = 1.12557, acc = 0.64940


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 272/500 ] loss = 0.77455, acc = 0.73303


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 272/500 ] loss = 1.14730, acc = 0.63833
[ Valid | 272/500 ] loss = 1.14730, acc = 0.63833


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 273/500 ] loss = 0.76972, acc = 0.73720


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 273/500 ] loss = 1.11794, acc = 0.63624
[ Valid | 273/500 ] loss = 1.11794, acc = 0.63624


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 274/500 ] loss = 0.75269, acc = 0.74089


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 274/500 ] loss = 1.17930, acc = 0.63082
[ Valid | 274/500 ] loss = 1.17930, acc = 0.63082


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 275/500 ] loss = 0.75721, acc = 0.73530


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 275/500 ] loss = 1.15175, acc = 0.63348
[ Valid | 275/500 ] loss = 1.15175, acc = 0.63348


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 276/500 ] loss = 0.74975, acc = 0.74543


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 276/500 ] loss = 1.16318, acc = 0.64101
[ Valid | 276/500 ] loss = 1.16318, acc = 0.64101


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 277/500 ] loss = 0.77504, acc = 0.73141


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 277/500 ] loss = 1.18786, acc = 0.63682
[ Valid | 277/500 ] loss = 1.18786, acc = 0.63682


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 278/500 ] loss = 0.76856, acc = 0.73534


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 278/500 ] loss = 1.13636, acc = 0.63247
[ Valid | 278/500 ] loss = 1.13636, acc = 0.63247


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 279/500 ] loss = 0.75493, acc = 0.73962


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 279/500 ] loss = 1.17930, acc = 0.63702
[ Valid | 279/500 ] loss = 1.17930, acc = 0.63702


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 280/500 ] loss = 0.76435, acc = 0.73347


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 280/500 ] loss = 1.08355, acc = 0.65918
[ Valid | 280/500 ] loss = 1.08355, acc = 0.65918 -> best
Best model found at epoch 279, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 281/500 ] loss = 0.75059, acc = 0.74131


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 281/500 ] loss = 1.21744, acc = 0.62321
[ Valid | 281/500 ] loss = 1.21744, acc = 0.62321


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 282/500 ] loss = 0.74890, acc = 0.74065


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 282/500 ] loss = 1.19135, acc = 0.63724
[ Valid | 282/500 ] loss = 1.19135, acc = 0.63724


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 283/500 ] loss = 0.75433, acc = 0.73806


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 283/500 ] loss = 1.09690, acc = 0.65207
[ Valid | 283/500 ] loss = 1.09690, acc = 0.65207


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 284/500 ] loss = 0.73020, acc = 0.74952


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 284/500 ] loss = 1.15887, acc = 0.64114
[ Valid | 284/500 ] loss = 1.15887, acc = 0.64114


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 285/500 ] loss = 0.72888, acc = 0.74643


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 285/500 ] loss = 1.24125, acc = 0.61604
[ Valid | 285/500 ] loss = 1.24125, acc = 0.61604


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 286/500 ] loss = 0.75731, acc = 0.73444


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 286/500 ] loss = 1.16191, acc = 0.64788
[ Valid | 286/500 ] loss = 1.16191, acc = 0.64788


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 287/500 ] loss = 0.73974, acc = 0.74289


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 287/500 ] loss = 1.21711, acc = 0.62640
[ Valid | 287/500 ] loss = 1.21711, acc = 0.62640


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 288/500 ] loss = 0.74982, acc = 0.74804


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 288/500 ] loss = 1.20363, acc = 0.62964
[ Valid | 288/500 ] loss = 1.20363, acc = 0.62964


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 289/500 ] loss = 0.73454, acc = 0.74956


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 289/500 ] loss = 1.11616, acc = 0.64714
[ Valid | 289/500 ] loss = 1.11616, acc = 0.64714


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 290/500 ] loss = 0.75066, acc = 0.73904


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 290/500 ] loss = 1.15239, acc = 0.63992
[ Valid | 290/500 ] loss = 1.15239, acc = 0.63992


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 291/500 ] loss = 0.74902, acc = 0.74249


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 291/500 ] loss = 1.12970, acc = 0.65396
[ Valid | 291/500 ] loss = 1.12970, acc = 0.65396


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 292/500 ] loss = 0.74615, acc = 0.73918


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 292/500 ] loss = 1.23881, acc = 0.62684
[ Valid | 292/500 ] loss = 1.23881, acc = 0.62684


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 293/500 ] loss = 0.73339, acc = 0.74443


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 293/500 ] loss = 1.15479, acc = 0.64383
[ Valid | 293/500 ] loss = 1.15479, acc = 0.64383


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 294/500 ] loss = 0.73503, acc = 0.74954


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 294/500 ] loss = 1.13078, acc = 0.64852
[ Valid | 294/500 ] loss = 1.13078, acc = 0.64852


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 295/500 ] loss = 0.72379, acc = 0.75174


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 295/500 ] loss = 1.13580, acc = 0.64238
[ Valid | 295/500 ] loss = 1.13580, acc = 0.64238


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 296/500 ] loss = 0.73929, acc = 0.74465


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 296/500 ] loss = 1.18513, acc = 0.64940
[ Valid | 296/500 ] loss = 1.18513, acc = 0.64940


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 297/500 ] loss = 0.73916, acc = 0.74696


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 297/500 ] loss = 1.12382, acc = 0.66039
[ Valid | 297/500 ] loss = 1.12382, acc = 0.66039 -> best
Best model found at epoch 296, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 298/500 ] loss = 0.74575, acc = 0.74079


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 298/500 ] loss = 1.14388, acc = 0.64384
[ Valid | 298/500 ] loss = 1.14388, acc = 0.64384


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 299/500 ] loss = 0.71627, acc = 0.75054


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 299/500 ] loss = 1.11154, acc = 0.65179
[ Valid | 299/500 ] loss = 1.11154, acc = 0.65179


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 300/500 ] loss = 0.70616, acc = 0.75505


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 300/500 ] loss = 1.12231, acc = 0.65706
[ Valid | 300/500 ] loss = 1.12231, acc = 0.65706


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 301/500 ] loss = 0.72623, acc = 0.74744


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 301/500 ] loss = 1.23797, acc = 0.62987
[ Valid | 301/500 ] loss = 1.23797, acc = 0.62987


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 302/500 ] loss = 0.73603, acc = 0.74966


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 302/500 ] loss = 1.17800, acc = 0.63145
[ Valid | 302/500 ] loss = 1.17800, acc = 0.63145


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 303/500 ] loss = 0.72499, acc = 0.75096


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 303/500 ] loss = 1.30573, acc = 0.60411
[ Valid | 303/500 ] loss = 1.30573, acc = 0.60411


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 304/500 ] loss = 0.73214, acc = 0.74830


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 304/500 ] loss = 1.17183, acc = 0.64347
[ Valid | 304/500 ] loss = 1.17183, acc = 0.64347


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 305/500 ] loss = 0.73880, acc = 0.74453


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 305/500 ] loss = 1.23744, acc = 0.62770
[ Valid | 305/500 ] loss = 1.23744, acc = 0.62770


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 306/500 ] loss = 0.70643, acc = 0.75637


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 306/500 ] loss = 1.31236, acc = 0.61770
[ Valid | 306/500 ] loss = 1.31236, acc = 0.61770


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 307/500 ] loss = 0.75218, acc = 0.74431


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 307/500 ] loss = 1.18904, acc = 0.63768
[ Valid | 307/500 ] loss = 1.18904, acc = 0.63768


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 308/500 ] loss = 0.73090, acc = 0.75186


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 308/500 ] loss = 1.17504, acc = 0.64273
[ Valid | 308/500 ] loss = 1.17504, acc = 0.64273


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 309/500 ] loss = 0.74499, acc = 0.73716


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 309/500 ] loss = 1.18466, acc = 0.63746
[ Valid | 309/500 ] loss = 1.18466, acc = 0.63746


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 310/500 ] loss = 0.74305, acc = 0.74677


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 310/500 ] loss = 1.24543, acc = 0.62596
[ Valid | 310/500 ] loss = 1.24543, acc = 0.62596


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 311/500 ] loss = 0.72494, acc = 0.74856


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 311/500 ] loss = 1.10579, acc = 0.65337
[ Valid | 311/500 ] loss = 1.10579, acc = 0.65337


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 312/500 ] loss = 0.73106, acc = 0.74842


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 312/500 ] loss = 1.11628, acc = 0.64898
[ Valid | 312/500 ] loss = 1.11628, acc = 0.64898


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 313/500 ] loss = 0.72066, acc = 0.75258


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 313/500 ] loss = 1.13026, acc = 0.64634
[ Valid | 313/500 ] loss = 1.13026, acc = 0.64634


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 314/500 ] loss = 0.71926, acc = 0.75465


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 314/500 ] loss = 1.22343, acc = 0.62569
[ Valid | 314/500 ] loss = 1.22343, acc = 0.62569


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 315/500 ] loss = 0.70965, acc = 0.75282


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 315/500 ] loss = 1.11152, acc = 0.65750
[ Valid | 315/500 ] loss = 1.11152, acc = 0.65750


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 316/500 ] loss = 0.71684, acc = 0.75160


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 316/500 ] loss = 1.29092, acc = 0.60743
[ Valid | 316/500 ] loss = 1.29092, acc = 0.60743


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 317/500 ] loss = 0.74383, acc = 0.74279


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 317/500 ] loss = 1.11594, acc = 0.64767
[ Valid | 317/500 ] loss = 1.11594, acc = 0.64767


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 318/500 ] loss = 0.71176, acc = 0.75124


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 318/500 ] loss = 1.10521, acc = 0.65889
[ Valid | 318/500 ] loss = 1.10521, acc = 0.65889


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 319/500 ] loss = 0.71258, acc = 0.75136


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 319/500 ] loss = 1.10961, acc = 0.65221
[ Valid | 319/500 ] loss = 1.10961, acc = 0.65221


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 320/500 ] loss = 0.70659, acc = 0.75351


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 320/500 ] loss = 1.34479, acc = 0.61338
[ Valid | 320/500 ] loss = 1.34479, acc = 0.61338


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 321/500 ] loss = 0.72457, acc = 0.74643


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 321/500 ] loss = 1.19072, acc = 0.62754
[ Valid | 321/500 ] loss = 1.19072, acc = 0.62754


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 322/500 ] loss = 0.72080, acc = 0.75008


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 322/500 ] loss = 1.14351, acc = 0.64845
[ Valid | 322/500 ] loss = 1.14351, acc = 0.64845


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 323/500 ] loss = 0.70259, acc = 0.75627


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 323/500 ] loss = 1.09927, acc = 0.65757
[ Valid | 323/500 ] loss = 1.09927, acc = 0.65757


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 324/500 ] loss = 0.69965, acc = 0.75895


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 324/500 ] loss = 1.15418, acc = 0.63891
[ Valid | 324/500 ] loss = 1.15418, acc = 0.63891


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 325/500 ] loss = 0.70177, acc = 0.76164


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 325/500 ] loss = 1.28740, acc = 0.62524
[ Valid | 325/500 ] loss = 1.28740, acc = 0.62524


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 326/500 ] loss = 0.68707, acc = 0.75577


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 326/500 ] loss = 1.14387, acc = 0.65171
[ Valid | 326/500 ] loss = 1.14387, acc = 0.65171


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 327/500 ] loss = 0.70325, acc = 0.75208


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 327/500 ] loss = 1.24110, acc = 0.62162
[ Valid | 327/500 ] loss = 1.24110, acc = 0.62162


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 328/500 ] loss = 0.69542, acc = 0.76154


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 328/500 ] loss = 1.13278, acc = 0.65013
[ Valid | 328/500 ] loss = 1.13278, acc = 0.65013


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 329/500 ] loss = 0.68707, acc = 0.76334


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 329/500 ] loss = 1.12039, acc = 0.66336
[ Valid | 329/500 ] loss = 1.12039, acc = 0.66336 -> best
Best model found at epoch 328, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 330/500 ] loss = 0.70324, acc = 0.75699


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 330/500 ] loss = 1.14310, acc = 0.65288
[ Valid | 330/500 ] loss = 1.14310, acc = 0.65288


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 331/500 ] loss = 0.70564, acc = 0.75060


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 331/500 ] loss = 1.15181, acc = 0.65005
[ Valid | 331/500 ] loss = 1.15181, acc = 0.65005


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 332/500 ] loss = 0.71819, acc = 0.75325


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 332/500 ] loss = 1.12683, acc = 0.65092
[ Valid | 332/500 ] loss = 1.12683, acc = 0.65092


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 333/500 ] loss = 0.70867, acc = 0.75915


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 333/500 ] loss = 1.16806, acc = 0.63790
[ Valid | 333/500 ] loss = 1.16806, acc = 0.63790


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 334/500 ] loss = 0.70372, acc = 0.75657


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 334/500 ] loss = 1.21649, acc = 0.63601
[ Valid | 334/500 ] loss = 1.21649, acc = 0.63601


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 335/500 ] loss = 0.69498, acc = 0.76340


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 335/500 ] loss = 1.15918, acc = 0.64650
[ Valid | 335/500 ] loss = 1.15918, acc = 0.64650


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 336/500 ] loss = 0.71244, acc = 0.75593


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 336/500 ] loss = 1.12718, acc = 0.64174
[ Valid | 336/500 ] loss = 1.12718, acc = 0.64174


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 337/500 ] loss = 0.68735, acc = 0.76593


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 337/500 ] loss = 1.21360, acc = 0.64144
[ Valid | 337/500 ] loss = 1.21360, acc = 0.64144


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 338/500 ] loss = 0.69840, acc = 0.75667


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 338/500 ] loss = 1.16739, acc = 0.64803
[ Valid | 338/500 ] loss = 1.16739, acc = 0.64803


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 339/500 ] loss = 0.69017, acc = 0.75980


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 339/500 ] loss = 1.11619, acc = 0.65960
[ Valid | 339/500 ] loss = 1.11619, acc = 0.65960


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 340/500 ] loss = 0.68432, acc = 0.76595


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 340/500 ] loss = 1.16472, acc = 0.63703
[ Valid | 340/500 ] loss = 1.16472, acc = 0.63703


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 341/500 ] loss = 0.69161, acc = 0.76472


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 341/500 ] loss = 1.20751, acc = 0.63768
[ Valid | 341/500 ] loss = 1.20751, acc = 0.63768


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 342/500 ] loss = 0.72875, acc = 0.74956


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 342/500 ] loss = 1.10666, acc = 0.65691
[ Valid | 342/500 ] loss = 1.10666, acc = 0.65691


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 343/500 ] loss = 0.69527, acc = 0.75885


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 343/500 ] loss = 1.09422, acc = 0.66149
[ Valid | 343/500 ] loss = 1.09422, acc = 0.66149


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 344/500 ] loss = 0.68164, acc = 0.76292


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 344/500 ] loss = 1.12580, acc = 0.65954
[ Valid | 344/500 ] loss = 1.12580, acc = 0.65954


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 345/500 ] loss = 0.67638, acc = 0.76607


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 345/500 ] loss = 1.18945, acc = 0.64094
[ Valid | 345/500 ] loss = 1.18945, acc = 0.64094


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 346/500 ] loss = 0.68146, acc = 0.76448


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 346/500 ] loss = 1.09538, acc = 0.65331
[ Valid | 346/500 ] loss = 1.09538, acc = 0.65331


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 347/500 ] loss = 0.67429, acc = 0.76420


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 347/500 ] loss = 1.12411, acc = 0.65816
[ Valid | 347/500 ] loss = 1.12411, acc = 0.65816


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 348/500 ] loss = 0.67159, acc = 0.76905


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 348/500 ] loss = 1.11602, acc = 0.65310
[ Valid | 348/500 ] loss = 1.11602, acc = 0.65310


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 349/500 ] loss = 0.67981, acc = 0.76366


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 349/500 ] loss = 1.17238, acc = 0.64542
[ Valid | 349/500 ] loss = 1.17238, acc = 0.64542


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 350/500 ] loss = 0.67825, acc = 0.76767


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 350/500 ] loss = 1.12589, acc = 0.66553
[ Valid | 350/500 ] loss = 1.12589, acc = 0.66553 -> best
Best model found at epoch 349, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 351/500 ] loss = 0.66005, acc = 0.77270


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 351/500 ] loss = 1.21586, acc = 0.63927
[ Valid | 351/500 ] loss = 1.21586, acc = 0.63927


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 352/500 ] loss = 0.66582, acc = 0.77370


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 352/500 ] loss = 1.12616, acc = 0.65845
[ Valid | 352/500 ] loss = 1.12616, acc = 0.65845


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 353/500 ] loss = 0.66814, acc = 0.76140


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 353/500 ] loss = 1.10732, acc = 0.66453
[ Valid | 353/500 ] loss = 1.10732, acc = 0.66453


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 354/500 ] loss = 0.71160, acc = 0.76308


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 354/500 ] loss = 1.19470, acc = 0.62907
[ Valid | 354/500 ] loss = 1.19470, acc = 0.62907


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 355/500 ] loss = 0.68228, acc = 0.76599


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 355/500 ] loss = 1.09813, acc = 0.65815
[ Valid | 355/500 ] loss = 1.09813, acc = 0.65815


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 356/500 ] loss = 0.67457, acc = 0.76657


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 356/500 ] loss = 1.20391, acc = 0.64708
[ Valid | 356/500 ] loss = 1.20391, acc = 0.64708


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 357/500 ] loss = 0.67810, acc = 0.76288


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 357/500 ] loss = 1.09545, acc = 0.66531
[ Valid | 357/500 ] loss = 1.09545, acc = 0.66531


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 358/500 ] loss = 0.67519, acc = 0.76791


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 358/500 ] loss = 1.24575, acc = 0.64586
[ Valid | 358/500 ] loss = 1.24575, acc = 0.64586


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 359/500 ] loss = 0.68286, acc = 0.75982


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 359/500 ] loss = 1.25484, acc = 0.61011
[ Valid | 359/500 ] loss = 1.25484, acc = 0.61011


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 360/500 ] loss = 0.68670, acc = 0.76090


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 360/500 ] loss = 1.26288, acc = 0.62364
[ Valid | 360/500 ] loss = 1.26288, acc = 0.62364


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 361/500 ] loss = 0.66704, acc = 0.76999


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 361/500 ] loss = 1.35198, acc = 0.62848
[ Valid | 361/500 ] loss = 1.35198, acc = 0.62848


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 362/500 ] loss = 0.69300, acc = 0.76516


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 362/500 ] loss = 1.15020, acc = 0.64918
[ Valid | 362/500 ] loss = 1.15020, acc = 0.64918


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 363/500 ] loss = 0.66703, acc = 0.76997


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 363/500 ] loss = 1.19891, acc = 0.65786
[ Valid | 363/500 ] loss = 1.19891, acc = 0.65786


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 364/500 ] loss = 0.66312, acc = 0.77330


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 364/500 ] loss = 1.15489, acc = 0.65519
[ Valid | 364/500 ] loss = 1.15489, acc = 0.65519


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 365/500 ] loss = 0.66761, acc = 0.76861


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 365/500 ] loss = 1.17397, acc = 0.65265
[ Valid | 365/500 ] loss = 1.17397, acc = 0.65265


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 366/500 ] loss = 0.66450, acc = 0.77280


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 366/500 ] loss = 1.14033, acc = 0.65974
[ Valid | 366/500 ] loss = 1.14033, acc = 0.65974


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 367/500 ] loss = 0.67319, acc = 0.77314


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 367/500 ] loss = 1.30904, acc = 0.62053
[ Valid | 367/500 ] loss = 1.30904, acc = 0.62053


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 368/500 ] loss = 0.67397, acc = 0.76811


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 368/500 ] loss = 1.15428, acc = 0.65656
[ Valid | 368/500 ] loss = 1.15428, acc = 0.65656


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 369/500 ] loss = 0.70658, acc = 0.75849


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 369/500 ] loss = 1.30861, acc = 0.63102
[ Valid | 369/500 ] loss = 1.30861, acc = 0.63102


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 370/500 ] loss = 0.68126, acc = 0.75907


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 370/500 ] loss = 1.17683, acc = 0.64180
[ Valid | 370/500 ] loss = 1.17683, acc = 0.64180


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 371/500 ] loss = 0.69345, acc = 0.75942


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 371/500 ] loss = 1.15202, acc = 0.65997
[ Valid | 371/500 ] loss = 1.15202, acc = 0.65997


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 372/500 ] loss = 0.66357, acc = 0.77099


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 372/500 ] loss = 1.16248, acc = 0.65287
[ Valid | 372/500 ] loss = 1.16248, acc = 0.65287


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 373/500 ] loss = 0.65666, acc = 0.77107


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 373/500 ] loss = 1.17048, acc = 0.65417
[ Valid | 373/500 ] loss = 1.17048, acc = 0.65417


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 374/500 ] loss = 0.65744, acc = 0.77210


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 374/500 ] loss = 1.28230, acc = 0.61511
[ Valid | 374/500 ] loss = 1.28230, acc = 0.61511


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 375/500 ] loss = 0.64595, acc = 0.77540


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 375/500 ] loss = 1.15916, acc = 0.66357
[ Valid | 375/500 ] loss = 1.15916, acc = 0.66357


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 376/500 ] loss = 0.64032, acc = 0.77686


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 376/500 ] loss = 1.12324, acc = 0.66365
[ Valid | 376/500 ] loss = 1.12324, acc = 0.66365


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 377/500 ] loss = 0.67005, acc = 0.77294


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 377/500 ] loss = 1.13276, acc = 0.64389
[ Valid | 377/500 ] loss = 1.13276, acc = 0.64389


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 378/500 ] loss = 0.67794, acc = 0.76719


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 378/500 ] loss = 1.21517, acc = 0.64144
[ Valid | 378/500 ] loss = 1.21517, acc = 0.64144


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 379/500 ] loss = 0.67744, acc = 0.76454


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 379/500 ] loss = 1.24169, acc = 0.62124
[ Valid | 379/500 ] loss = 1.24169, acc = 0.62124


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 380/500 ] loss = 0.67295, acc = 0.76456


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 380/500 ] loss = 1.16022, acc = 0.64513
[ Valid | 380/500 ] loss = 1.16022, acc = 0.64513


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 381/500 ] loss = 0.65168, acc = 0.77532


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 381/500 ] loss = 1.20712, acc = 0.63680
[ Valid | 381/500 ] loss = 1.20712, acc = 0.63680


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 382/500 ] loss = 0.66693, acc = 0.76635


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 382/500 ] loss = 1.12253, acc = 0.66162
[ Valid | 382/500 ] loss = 1.12253, acc = 0.66162


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 383/500 ] loss = 0.65624, acc = 0.77430


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 383/500 ] loss = 1.24199, acc = 0.63659
[ Valid | 383/500 ] loss = 1.24199, acc = 0.63659


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 384/500 ] loss = 0.65417, acc = 0.77318


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 384/500 ] loss = 1.13717, acc = 0.66104
[ Valid | 384/500 ] loss = 1.13717, acc = 0.66104


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 385/500 ] loss = 0.64303, acc = 0.78117


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 385/500 ] loss = 1.17554, acc = 0.65583
[ Valid | 385/500 ] loss = 1.17554, acc = 0.65583


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 386/500 ] loss = 0.62983, acc = 0.78588


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 386/500 ] loss = 1.29711, acc = 0.62509
[ Valid | 386/500 ] loss = 1.29711, acc = 0.62509


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 387/500 ] loss = 0.63884, acc = 0.77744


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 387/500 ] loss = 1.33485, acc = 0.62936
[ Valid | 387/500 ] loss = 1.33485, acc = 0.62936


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 388/500 ] loss = 0.65331, acc = 0.77460


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 388/500 ] loss = 1.06598, acc = 0.66894
[ Valid | 388/500 ] loss = 1.06598, acc = 0.66894 -> best
Best model found at epoch 387, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 389/500 ] loss = 0.65263, acc = 0.77218


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 389/500 ] loss = 1.18673, acc = 0.64390
[ Valid | 389/500 ] loss = 1.18673, acc = 0.64390


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 390/500 ] loss = 0.63857, acc = 0.77728


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 390/500 ] loss = 1.22454, acc = 0.64506
[ Valid | 390/500 ] loss = 1.22454, acc = 0.64506


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 391/500 ] loss = 0.64705, acc = 0.77410


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 391/500 ] loss = 1.17650, acc = 0.64563
[ Valid | 391/500 ] loss = 1.17650, acc = 0.64563


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 392/500 ] loss = 0.66179, acc = 0.77566


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 392/500 ] loss = 1.20391, acc = 0.63146
[ Valid | 392/500 ] loss = 1.20391, acc = 0.63146


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 393/500 ] loss = 0.63873, acc = 0.77634


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 393/500 ] loss = 1.12090, acc = 0.66574
[ Valid | 393/500 ] loss = 1.12090, acc = 0.66574


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 394/500 ] loss = 0.62924, acc = 0.78231


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 394/500 ] loss = 1.12278, acc = 0.65150
[ Valid | 394/500 ] loss = 1.12278, acc = 0.65150


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 395/500 ] loss = 0.63510, acc = 0.78161


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 395/500 ] loss = 1.25754, acc = 0.61980
[ Valid | 395/500 ] loss = 1.25754, acc = 0.61980


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 396/500 ] loss = 0.63197, acc = 0.78568


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 396/500 ] loss = 1.10978, acc = 0.65229
[ Valid | 396/500 ] loss = 1.10978, acc = 0.65229


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 397/500 ] loss = 0.62290, acc = 0.78472


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 397/500 ] loss = 1.21837, acc = 0.63595
[ Valid | 397/500 ] loss = 1.21837, acc = 0.63595


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 398/500 ] loss = 0.65902, acc = 0.77881


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 398/500 ] loss = 1.14421, acc = 0.65374
[ Valid | 398/500 ] loss = 1.14421, acc = 0.65374


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 399/500 ] loss = 0.63975, acc = 0.77710


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 399/500 ] loss = 1.17117, acc = 0.64592
[ Valid | 399/500 ] loss = 1.17117, acc = 0.64592


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 400/500 ] loss = 0.63437, acc = 0.78039


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 400/500 ] loss = 1.15117, acc = 0.66083
[ Valid | 400/500 ] loss = 1.15117, acc = 0.66083


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 401/500 ] loss = 0.65142, acc = 0.77422


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 401/500 ] loss = 1.36240, acc = 0.58689
[ Valid | 401/500 ] loss = 1.36240, acc = 0.58689


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 402/500 ] loss = 0.63483, acc = 0.78157


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 402/500 ] loss = 1.17356, acc = 0.64897
[ Valid | 402/500 ] loss = 1.17356, acc = 0.64897


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 403/500 ] loss = 0.63496, acc = 0.77612


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 403/500 ] loss = 1.29738, acc = 0.63197
[ Valid | 403/500 ] loss = 1.29738, acc = 0.63197


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 404/500 ] loss = 0.64314, acc = 0.77861


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 404/500 ] loss = 1.13266, acc = 0.67161
[ Valid | 404/500 ] loss = 1.13266, acc = 0.67161 -> best
Best model found at epoch 403, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 405/500 ] loss = 0.63281, acc = 0.78564


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 405/500 ] loss = 1.10477, acc = 0.66163
[ Valid | 405/500 ] loss = 1.10477, acc = 0.66163


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 406/500 ] loss = 0.63773, acc = 0.78267


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 406/500 ] loss = 1.27112, acc = 0.64078
[ Valid | 406/500 ] loss = 1.27112, acc = 0.64078


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 407/500 ] loss = 0.62987, acc = 0.78243


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 407/500 ] loss = 1.17607, acc = 0.65048
[ Valid | 407/500 ] loss = 1.17607, acc = 0.65048


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 408/500 ] loss = 0.63928, acc = 0.77867


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 408/500 ] loss = 1.16772, acc = 0.64882
[ Valid | 408/500 ] loss = 1.16772, acc = 0.64882


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 409/500 ] loss = 0.61919, acc = 0.78600


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 409/500 ] loss = 1.24639, acc = 0.64512
[ Valid | 409/500 ] loss = 1.24639, acc = 0.64512


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 410/500 ] loss = 0.63286, acc = 0.77985


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 410/500 ] loss = 1.21140, acc = 0.65692
[ Valid | 410/500 ] loss = 1.21140, acc = 0.65692


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 411/500 ] loss = 0.60803, acc = 0.78968


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 411/500 ] loss = 1.17345, acc = 0.64773
[ Valid | 411/500 ] loss = 1.17345, acc = 0.64773


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 412/500 ] loss = 0.65060, acc = 0.77660


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 412/500 ] loss = 1.15761, acc = 0.65823
[ Valid | 412/500 ] loss = 1.15761, acc = 0.65823


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 413/500 ] loss = 0.62375, acc = 0.78247


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 413/500 ] loss = 1.16075, acc = 0.65006
[ Valid | 413/500 ] loss = 1.16075, acc = 0.65006


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 414/500 ] loss = 0.63163, acc = 0.78269


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 414/500 ] loss = 1.23347, acc = 0.64109
[ Valid | 414/500 ] loss = 1.23347, acc = 0.64109


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 415/500 ] loss = 0.62371, acc = 0.78193


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 415/500 ] loss = 1.10522, acc = 0.65922
[ Valid | 415/500 ] loss = 1.10522, acc = 0.65922


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 416/500 ] loss = 0.63060, acc = 0.78470


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 416/500 ] loss = 1.12575, acc = 0.66214
[ Valid | 416/500 ] loss = 1.12575, acc = 0.66214


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 417/500 ] loss = 0.63852, acc = 0.77730


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 417/500 ] loss = 1.17204, acc = 0.65281
[ Valid | 417/500 ] loss = 1.17204, acc = 0.65281


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 418/500 ] loss = 0.65292, acc = 0.77708


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 418/500 ] loss = 1.22722, acc = 0.63392
[ Valid | 418/500 ] loss = 1.22722, acc = 0.63392


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 419/500 ] loss = 0.63786, acc = 0.78145


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 419/500 ] loss = 1.10508, acc = 0.65780
[ Valid | 419/500 ] loss = 1.10508, acc = 0.65780


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 420/500 ] loss = 0.61627, acc = 0.78470


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 420/500 ] loss = 1.15524, acc = 0.65462
[ Valid | 420/500 ] loss = 1.15524, acc = 0.65462


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 421/500 ] loss = 0.62688, acc = 0.78397


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 421/500 ] loss = 1.27873, acc = 0.63175
[ Valid | 421/500 ] loss = 1.27873, acc = 0.63175


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 422/500 ] loss = 0.63332, acc = 0.78159


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 422/500 ] loss = 1.24903, acc = 0.64043
[ Valid | 422/500 ] loss = 1.24903, acc = 0.64043


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 423/500 ] loss = 0.63346, acc = 0.77528


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 423/500 ] loss = 1.17099, acc = 0.67060
[ Valid | 423/500 ] loss = 1.17099, acc = 0.67060


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 424/500 ] loss = 0.62637, acc = 0.78117


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 424/500 ] loss = 1.15349, acc = 0.65142
[ Valid | 424/500 ] loss = 1.15349, acc = 0.65142


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 425/500 ] loss = 0.60963, acc = 0.78938


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 425/500 ] loss = 1.15828, acc = 0.65425
[ Valid | 425/500 ] loss = 1.15828, acc = 0.65425


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 426/500 ] loss = 0.60636, acc = 0.79359


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 426/500 ] loss = 1.15779, acc = 0.65939
[ Valid | 426/500 ] loss = 1.15779, acc = 0.65939


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 427/500 ] loss = 0.60280, acc = 0.78988


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 427/500 ] loss = 1.13234, acc = 0.66048
[ Valid | 427/500 ] loss = 1.13234, acc = 0.66048


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 428/500 ] loss = 0.62114, acc = 0.78788


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 428/500 ] loss = 1.20253, acc = 0.65461
[ Valid | 428/500 ] loss = 1.20253, acc = 0.65461


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 429/500 ] loss = 0.59368, acc = 0.79058


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 429/500 ] loss = 1.12806, acc = 0.66416
[ Valid | 429/500 ] loss = 1.12806, acc = 0.66416


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 430/500 ] loss = 0.62195, acc = 0.78838


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 430/500 ] loss = 1.24536, acc = 0.64614
[ Valid | 430/500 ] loss = 1.24536, acc = 0.64614


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 431/500 ] loss = 0.61508, acc = 0.78271


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 431/500 ] loss = 1.19084, acc = 0.65532
[ Valid | 431/500 ] loss = 1.19084, acc = 0.65532


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 432/500 ] loss = 0.59994, acc = 0.79111


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 432/500 ] loss = 1.19577, acc = 0.65071
[ Valid | 432/500 ] loss = 1.19577, acc = 0.65071


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 433/500 ] loss = 0.61537, acc = 0.78512


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 433/500 ] loss = 1.15506, acc = 0.66690
[ Valid | 433/500 ] loss = 1.15506, acc = 0.66690


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 434/500 ] loss = 0.60339, acc = 0.78932


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 434/500 ] loss = 1.15180, acc = 0.66367
[ Valid | 434/500 ] loss = 1.15180, acc = 0.66367


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 435/500 ] loss = 0.63350, acc = 0.78744


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 435/500 ] loss = 1.17373, acc = 0.64810
[ Valid | 435/500 ] loss = 1.17373, acc = 0.64810


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 436/500 ] loss = 0.62365, acc = 0.78421


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 436/500 ] loss = 1.22177, acc = 0.65672
[ Valid | 436/500 ] loss = 1.22177, acc = 0.65672


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 437/500 ] loss = 0.60913, acc = 0.78514


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 437/500 ] loss = 1.20595, acc = 0.65026
[ Valid | 437/500 ] loss = 1.20595, acc = 0.65026


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 438/500 ] loss = 0.60887, acc = 0.78534


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 438/500 ] loss = 1.20062, acc = 0.64535
[ Valid | 438/500 ] loss = 1.20062, acc = 0.64535


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 439/500 ] loss = 0.60565, acc = 0.78762


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 439/500 ] loss = 1.24117, acc = 0.63159
[ Valid | 439/500 ] loss = 1.24117, acc = 0.63159


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 440/500 ] loss = 0.61575, acc = 0.78401


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 440/500 ] loss = 1.36920, acc = 0.61308
[ Valid | 440/500 ] loss = 1.36920, acc = 0.61308


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 441/500 ] loss = 0.60507, acc = 0.78950


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 441/500 ] loss = 1.11381, acc = 0.66873
[ Valid | 441/500 ] loss = 1.11381, acc = 0.66873


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 442/500 ] loss = 0.60657, acc = 0.79409


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 442/500 ] loss = 1.26106, acc = 0.63746
[ Valid | 442/500 ] loss = 1.26106, acc = 0.63746


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 443/500 ] loss = 0.61732, acc = 0.78706


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 443/500 ] loss = 1.20542, acc = 0.65403
[ Valid | 443/500 ] loss = 1.20542, acc = 0.65403


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 444/500 ] loss = 0.64343, acc = 0.77542


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 444/500 ] loss = 1.21812, acc = 0.64672
[ Valid | 444/500 ] loss = 1.21812, acc = 0.64672


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 445/500 ] loss = 0.62233, acc = 0.78429


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 445/500 ] loss = 1.16543, acc = 0.66156
[ Valid | 445/500 ] loss = 1.16543, acc = 0.66156


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 446/500 ] loss = 0.61547, acc = 0.78870


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 446/500 ] loss = 1.16251, acc = 0.65721
[ Valid | 446/500 ] loss = 1.16251, acc = 0.65721


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 447/500 ] loss = 0.62776, acc = 0.77953


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 447/500 ] loss = 1.17549, acc = 0.66227
[ Valid | 447/500 ] loss = 1.17549, acc = 0.66227


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 448/500 ] loss = 0.59812, acc = 0.78816


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 448/500 ] loss = 1.17297, acc = 0.65931
[ Valid | 448/500 ] loss = 1.17297, acc = 0.65931


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 449/500 ] loss = 0.61543, acc = 0.78864


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 449/500 ] loss = 1.19012, acc = 0.65063
[ Valid | 449/500 ] loss = 1.19012, acc = 0.65063


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 450/500 ] loss = 0.62954, acc = 0.78053


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 450/500 ] loss = 1.28435, acc = 0.63226
[ Valid | 450/500 ] loss = 1.28435, acc = 0.63226


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 451/500 ] loss = 0.60609, acc = 0.78968


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 451/500 ] loss = 1.17520, acc = 0.64838
[ Valid | 451/500 ] loss = 1.17520, acc = 0.64838


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 452/500 ] loss = 0.59919, acc = 0.79335


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 452/500 ] loss = 1.18464, acc = 0.65042
[ Valid | 452/500 ] loss = 1.18464, acc = 0.65042


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 453/500 ] loss = 0.60493, acc = 0.79269


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 453/500 ] loss = 1.15058, acc = 0.65201
[ Valid | 453/500 ] loss = 1.15058, acc = 0.65201


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 454/500 ] loss = 0.59754, acc = 0.79363


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 454/500 ] loss = 1.12047, acc = 0.66003
[ Valid | 454/500 ] loss = 1.12047, acc = 0.66003


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 455/500 ] loss = 0.59654, acc = 0.79439


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 455/500 ] loss = 1.19958, acc = 0.64795
[ Valid | 455/500 ] loss = 1.19958, acc = 0.64795


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 456/500 ] loss = 0.59255, acc = 0.79669


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 456/500 ] loss = 1.16705, acc = 0.65460
[ Valid | 456/500 ] loss = 1.16705, acc = 0.65460


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 457/500 ] loss = 0.59712, acc = 0.79629


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 457/500 ] loss = 1.10887, acc = 0.66582
[ Valid | 457/500 ] loss = 1.10887, acc = 0.66582


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 458/500 ] loss = 0.60426, acc = 0.78958


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 458/500 ] loss = 1.11315, acc = 0.67109
[ Valid | 458/500 ] loss = 1.11315, acc = 0.67109


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 459/500 ] loss = 0.59067, acc = 0.79469


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 459/500 ] loss = 1.19666, acc = 0.64773
[ Valid | 459/500 ] loss = 1.19666, acc = 0.64773


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 460/500 ] loss = 0.59198, acc = 0.79509


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 460/500 ] loss = 1.20877, acc = 0.66055
[ Valid | 460/500 ] loss = 1.20877, acc = 0.66055


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 461/500 ] loss = 0.59840, acc = 0.79245


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 461/500 ] loss = 1.16785, acc = 0.65801
[ Valid | 461/500 ] loss = 1.16785, acc = 0.65801


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 462/500 ] loss = 0.57843, acc = 0.79902


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 462/500 ] loss = 1.19668, acc = 0.66011
[ Valid | 462/500 ] loss = 1.19668, acc = 0.66011


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 463/500 ] loss = 0.58516, acc = 0.79942


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 463/500 ] loss = 1.13161, acc = 0.66365
[ Valid | 463/500 ] loss = 1.13161, acc = 0.66365


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 464/500 ] loss = 0.58793, acc = 0.79463


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 464/500 ] loss = 1.20719, acc = 0.65598
[ Valid | 464/500 ] loss = 1.20719, acc = 0.65598


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 465/500 ] loss = 0.59774, acc = 0.79313


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 465/500 ] loss = 1.22409, acc = 0.64803
[ Valid | 465/500 ] loss = 1.22409, acc = 0.64803


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 466/500 ] loss = 0.60305, acc = 0.78844


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 466/500 ] loss = 1.11015, acc = 0.66996
[ Valid | 466/500 ] loss = 1.11015, acc = 0.66996


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 467/500 ] loss = 0.61362, acc = 0.78974


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 467/500 ] loss = 1.17119, acc = 0.65808
[ Valid | 467/500 ] loss = 1.17119, acc = 0.65808


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 468/500 ] loss = 0.62238, acc = 0.78253


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 468/500 ] loss = 1.17055, acc = 0.65713
[ Valid | 468/500 ] loss = 1.17055, acc = 0.65713


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 469/500 ] loss = 0.60249, acc = 0.79085


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 469/500 ] loss = 1.14773, acc = 0.66481
[ Valid | 469/500 ] loss = 1.14773, acc = 0.66481


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 470/500 ] loss = 0.61673, acc = 0.78444


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 470/500 ] loss = 1.16429, acc = 0.65302
[ Valid | 470/500 ] loss = 1.16429, acc = 0.65302


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 471/500 ] loss = 0.57804, acc = 0.79561


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 471/500 ] loss = 1.15843, acc = 0.66438
[ Valid | 471/500 ] loss = 1.15843, acc = 0.66438


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 472/500 ] loss = 0.60319, acc = 0.79397


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 472/500 ] loss = 1.15180, acc = 0.65670
[ Valid | 472/500 ] loss = 1.15180, acc = 0.65670


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 473/500 ] loss = 0.58288, acc = 0.80156


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 473/500 ] loss = 1.18814, acc = 0.64615
[ Valid | 473/500 ] loss = 1.18814, acc = 0.64615


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 474/500 ] loss = 0.57238, acc = 0.80351


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 474/500 ] loss = 1.19712, acc = 0.65034
[ Valid | 474/500 ] loss = 1.19712, acc = 0.65034


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 475/500 ] loss = 0.57357, acc = 0.79694


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 475/500 ] loss = 1.13084, acc = 0.66634
[ Valid | 475/500 ] loss = 1.13084, acc = 0.66634


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 476/500 ] loss = 0.57736, acc = 0.79637


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 476/500 ] loss = 1.18910, acc = 0.65468
[ Valid | 476/500 ] loss = 1.18910, acc = 0.65468


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 477/500 ] loss = 0.58692, acc = 0.79744


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 477/500 ] loss = 1.11884, acc = 0.66871
[ Valid | 477/500 ] loss = 1.11884, acc = 0.66871


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 478/500 ] loss = 0.58380, acc = 0.79932


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 478/500 ] loss = 1.28979, acc = 0.63753
[ Valid | 478/500 ] loss = 1.28979, acc = 0.63753


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 479/500 ] loss = 0.58188, acc = 0.79770


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 479/500 ] loss = 1.13804, acc = 0.67169
[ Valid | 479/500 ] loss = 1.13804, acc = 0.67169 -> best
Best model found at epoch 478, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 480/500 ] loss = 0.56812, acc = 0.80064


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 480/500 ] loss = 1.14980, acc = 0.66134
[ Valid | 480/500 ] loss = 1.14980, acc = 0.66134


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 481/500 ] loss = 0.58252, acc = 0.79832


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 481/500 ] loss = 1.44872, acc = 0.62799
[ Valid | 481/500 ] loss = 1.44872, acc = 0.62799


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 482/500 ] loss = 0.55345, acc = 0.80663


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 482/500 ] loss = 1.17720, acc = 0.66583
[ Valid | 482/500 ] loss = 1.17720, acc = 0.66583


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 483/500 ] loss = 0.58288, acc = 0.80304


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 483/500 ] loss = 1.18698, acc = 0.65858
[ Valid | 483/500 ] loss = 1.18698, acc = 0.65858


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 484/500 ] loss = 0.58557, acc = 0.79503


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 484/500 ] loss = 1.24573, acc = 0.64245
[ Valid | 484/500 ] loss = 1.24573, acc = 0.64245


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 485/500 ] loss = 0.58581, acc = 0.79375


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 485/500 ] loss = 1.18633, acc = 0.65130
[ Valid | 485/500 ] loss = 1.18633, acc = 0.65130


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 486/500 ] loss = 0.57879, acc = 0.79942


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 486/500 ] loss = 1.15809, acc = 0.66053
[ Valid | 486/500 ] loss = 1.15809, acc = 0.66053


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 487/500 ] loss = 0.57478, acc = 0.79673


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 487/500 ] loss = 1.18564, acc = 0.65286
[ Valid | 487/500 ] loss = 1.18564, acc = 0.65286


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 488/500 ] loss = 0.58491, acc = 0.80092


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 488/500 ] loss = 1.18454, acc = 0.65026
[ Valid | 488/500 ] loss = 1.18454, acc = 0.65026


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 489/500 ] loss = 0.56937, acc = 0.80002


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 489/500 ] loss = 1.22649, acc = 0.64671
[ Valid | 489/500 ] loss = 1.22649, acc = 0.64671


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 490/500 ] loss = 0.57155, acc = 0.80541


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 490/500 ] loss = 1.15175, acc = 0.67211
[ Valid | 490/500 ] loss = 1.15175, acc = 0.67211 -> best
Best model found at epoch 489, saving model


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 491/500 ] loss = 0.55846, acc = 0.80605


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 491/500 ] loss = 1.22260, acc = 0.66098
[ Valid | 491/500 ] loss = 1.22260, acc = 0.66098


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 492/500 ] loss = 0.58279, acc = 0.79844


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 492/500 ] loss = 1.22774, acc = 0.64577
[ Valid | 492/500 ] loss = 1.22774, acc = 0.64577


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 493/500 ] loss = 0.58133, acc = 0.79932


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 493/500 ] loss = 1.15638, acc = 0.65757
[ Valid | 493/500 ] loss = 1.15638, acc = 0.65757


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 494/500 ] loss = 0.57071, acc = 0.79952


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 494/500 ] loss = 1.19382, acc = 0.66626
[ Valid | 494/500 ] loss = 1.19382, acc = 0.66626


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 495/500 ] loss = 0.58439, acc = 0.79555


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 495/500 ] loss = 1.22699, acc = 0.64440
[ Valid | 495/500 ] loss = 1.22699, acc = 0.64440


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 496/500 ] loss = 0.58834, acc = 0.79766


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 496/500 ] loss = 1.23154, acc = 0.65178
[ Valid | 496/500 ] loss = 1.23154, acc = 0.65178


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 497/500 ] loss = 0.55477, acc = 0.80721


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 497/500 ] loss = 1.22541, acc = 0.64114
[ Valid | 497/500 ] loss = 1.22541, acc = 0.64114


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 498/500 ] loss = 0.58519, acc = 0.79535


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 498/500 ] loss = 1.27994, acc = 0.64397
[ Valid | 498/500 ] loss = 1.27994, acc = 0.64397


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 499/500 ] loss = 0.57448, acc = 0.80621


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 499/500 ] loss = 1.19328, acc = 0.66576
[ Valid | 499/500 ] loss = 1.19328, acc = 0.66576


  0%|          | 0/78 [00:00<?, ?it/s]

[ Train | 500/500 ] loss = 0.58226, acc = 0.79661


  0%|          | 0/27 [00:00<?, ?it/s]

[ Valid | 500/500 ] loss = 1.20091, acc = 0.65662
[ Valid | 500/500 ] loss = 1.20091, acc = 0.65662


In [12]:
class FoodDataset(Dataset):

    def __init__(self, path, mode="train", files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.mode = mode
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        if self.mode == "training":
            im = train_tfm(im)
            label = int(fname.split("/")[-1].split("_")[0])
            return im, label
        elif self.mode == "validation":
            im = test_tfm(im)
            label = -1 # test has no label
            return im, label
        else:
            ims = [test_tfm(im)]
            ims += [train_tfm(im) for _ in range(10)]
            label = -1 # test has no label
            return torch.stack(ims), label

In [13]:

test_set = FoodDataset(os.path.join(_dataset_dir,"test"), mode="test")
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11/test sample ./food11/test/0001.jpg


# Testing and generate prediction CSV

In [14]:
model_best = ResNet(Residual_Block, [2, 2, 2, 2]).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for img_list, _ in test_loader:
        # TTA
        test_pred = []
        for imgs in img_list:
            imgs_first = imgs[0].unsqueeze(0)
            origin_logit = model_best(imgs_first.to(device)).squeeze(0)
            tta_logit = model_best(imgs[1:].to(device))
            tta_logit = torch.mean(tta_logit, 0)
            logit = (0.6*origin_logit) + (0.4*tta_logit)
            test_pred.append(logit)
        test_pred = torch.stack(test_pred)
        
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))


In [15]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [16]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.ToTensor(),
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [17]:
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        
        x1 = self.relu(x1)

        Residual = x1
        
        x2 = self.cnn_layer2(x1)

        x2 = x2 + Residual
        
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        
        x3 = self.relu(x3)

        Residual = x3
        
        x4 = self.cnn_layer4(x3)

        x4 = x4 + Residual
        
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        
        x5 = self.relu(x5)

        Residual = x5
        
        x6 = self.cnn_layer6(x5)

        x6 = x6 + Residual
        
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout