# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [None]:
! wget -O food11.zip "https://github.com/virginiakm1988/ML2022-Spring/blob/main/HW03/food11.zip?raw=true"

--2022-03-16 12:37:54--  https://github.com/virginiakm1988/ML2022-Spring/blob/main/HW03/food11.zip?raw=true
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/virginiakm1988/ML2022-Spring/raw/main/HW03/food11.zip [following]
--2022-03-16 12:37:54--  https://github.com/virginiakm1988/ML2022-Spring/raw/main/HW03/food11.zip
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://media.githubusercontent.com/media/virginiakm1988/ML2022-Spring/main/HW03/food11.zip [following]
--2022-03-16 12:37:55--  https://media.githubusercontent.com/media/virginiakm1988/ML2022-Spring/main/HW03/food11.zip
Resolving media.githubusercontent.com (media.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to media.githubusercontent.com (media.githubusercontent.com)|185

In [None]:
! unzip -q food11.zip

# Training

In [None]:
_exp_name = "sample"

In [None]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [None]:
myseed = 880310  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

I tried several augmentation methods, and finally found above used methods are the best ones.

In [None]:
from torchvision.transforms.transforms import RandomAdjustSharpness
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    #transforms.RandomCrop((100, 100), padding=4),
    transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0),
    #transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.RandomAffine(degrees=(30, 70)),
    #transforms.RandomInvert(),
    #transforms.RandomAdjustSharpness(sharpness_factor=2),
    #transforms.RandomAutocontrast(),
    #transforms.RandomPosterize(bits=2),
    transforms.RandomResizedCrop(size=(96, 96)),
    transforms.RandomRotation(degrees=(0, 180)),
    #transforms.RandomPerspective(distortion_scale=0.6, p=1.0),
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [None]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [None]:
model = models.vgg16_bn(pretrained=False)
model.classifier[6] = nn.Linear(4096, 11)
#model.load_state_dict(torch.load("/content/sample_best.ckpt"))
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [None]:
batch_size = 64
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11/training sample ./food11/training/0_0.jpg
One ./food11/validation sample ./food11/validation/0_0.jpg


*   I manually adjust the learning rate during the training process.

*   I first used 0.0003, and when the performance didn't improve for long time, I decrease the learing rate to 0.00003 or 0.000003.


*   Everytime I restarted the training process, I would load the previous model parameters.


*   The whole training process takes about 400~500 epochs.




In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 300
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model.to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 


# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 001/300 ] loss = 0.50938, acc = 0.84040


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 001/300 ] loss = 0.86281, acc = 0.79193
[ Valid | 001/300 ] loss = 0.86281, acc = 0.79193 -> best
Best model found at epoch 0, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 002/300 ] loss = 0.51494, acc = 0.84593


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 002/300 ] loss = 0.88512, acc = 0.79415
[ Valid | 002/300 ] loss = 0.88512, acc = 0.79415 -> best
Best model found at epoch 1, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 003/300 ] loss = 0.50529, acc = 0.85036


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 003/300 ] loss = 0.84435, acc = 0.79799
[ Valid | 003/300 ] loss = 0.84435, acc = 0.79799 -> best
Best model found at epoch 2, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 004/300 ] loss = 0.56551, acc = 0.84323


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 004/300 ] loss = 0.86494, acc = 0.81121
[ Valid | 004/300 ] loss = 0.86494, acc = 0.81121 -> best
Best model found at epoch 3, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 005/300 ] loss = 0.50590, acc = 0.85198


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 005/300 ] loss = 0.84991, acc = 0.79558
[ Valid | 005/300 ] loss = 0.84991, acc = 0.79558


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 006/300 ] loss = 0.52223, acc = 0.84081


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 006/300 ] loss = 0.82863, acc = 0.81093
[ Valid | 006/300 ] loss = 0.82863, acc = 0.81093


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 007/300 ] loss = 0.51236, acc = 0.85014


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 007/300 ] loss = 0.86036, acc = 0.81631
[ Valid | 007/300 ] loss = 0.86036, acc = 0.81631 -> best
Best model found at epoch 6, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 008/300 ] loss = 0.47484, acc = 0.84861


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 008/300 ] loss = 0.86259, acc = 0.80214
[ Valid | 008/300 ] loss = 0.86259, acc = 0.80214


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 009/300 ] loss = 0.49322, acc = 0.84750


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 009/300 ] loss = 0.71590, acc = 0.81823
[ Valid | 009/300 ] loss = 0.71590, acc = 0.81823 -> best
Best model found at epoch 8, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 010/300 ] loss = 0.45156, acc = 0.85556


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 010/300 ] loss = 0.89437, acc = 0.80202
[ Valid | 010/300 ] loss = 0.89437, acc = 0.80202


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 011/300 ] loss = 0.50154, acc = 0.84942


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 011/300 ] loss = 0.82251, acc = 0.79742
[ Valid | 011/300 ] loss = 0.82251, acc = 0.79742


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 012/300 ] loss = 0.49524, acc = 0.84800


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 012/300 ] loss = 0.83851, acc = 0.80810
[ Valid | 012/300 ] loss = 0.83851, acc = 0.80810


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 013/300 ] loss = 0.48758, acc = 0.84873


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 013/300 ] loss = 0.99372, acc = 0.76733
[ Valid | 013/300 ] loss = 0.99372, acc = 0.76733


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 014/300 ] loss = 0.46463, acc = 0.85282


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 014/300 ] loss = 0.87492, acc = 0.77822
[ Valid | 014/300 ] loss = 0.87492, acc = 0.77822


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 015/300 ] loss = 0.45391, acc = 0.85343


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 015/300 ] loss = 0.94525, acc = 0.80339
[ Valid | 015/300 ] loss = 0.94525, acc = 0.80339


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 016/300 ] loss = 0.46814, acc = 0.84909


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 016/300 ] loss = 0.73490, acc = 0.81818
[ Valid | 016/300 ] loss = 0.73490, acc = 0.81818


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 017/300 ] loss = 0.45002, acc = 0.85649


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 017/300 ] loss = 0.69271, acc = 0.82404
[ Valid | 017/300 ] loss = 0.69271, acc = 0.82404 -> best
Best model found at epoch 16, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 018/300 ] loss = 0.46401, acc = 0.84919


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 018/300 ] loss = 0.89009, acc = 0.80330
[ Valid | 018/300 ] loss = 0.89009, acc = 0.80330


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 019/300 ] loss = 0.49075, acc = 0.85242


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 019/300 ] loss = 0.77090, acc = 0.80725
[ Valid | 019/300 ] loss = 0.77090, acc = 0.80725


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 020/300 ] loss = 0.45689, acc = 0.85405


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 020/300 ] loss = 0.84792, acc = 0.79520
[ Valid | 020/300 ] loss = 0.84792, acc = 0.79520


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 021/300 ] loss = 0.48706, acc = 0.85065


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 021/300 ] loss = 0.88274, acc = 0.80126
[ Valid | 021/300 ] loss = 0.88274, acc = 0.80126


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 022/300 ] loss = 0.47515, acc = 0.85357


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 022/300 ] loss = 0.78059, acc = 0.81315
[ Valid | 022/300 ] loss = 0.78059, acc = 0.81315


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 023/300 ] loss = 0.43657, acc = 0.85708


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 023/300 ] loss = 0.83440, acc = 0.79674
[ Valid | 023/300 ] loss = 0.83440, acc = 0.79674


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 024/300 ] loss = 0.44341, acc = 0.85698


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 024/300 ] loss = 0.83322, acc = 0.81379
[ Valid | 024/300 ] loss = 0.83322, acc = 0.81379


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 025/300 ] loss = 0.47671, acc = 0.85440


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 025/300 ] loss = 0.84280, acc = 0.81372
[ Valid | 025/300 ] loss = 0.84280, acc = 0.81372


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 026/300 ] loss = 0.45790, acc = 0.85802


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 026/300 ] loss = 0.69300, acc = 0.82491
[ Valid | 026/300 ] loss = 0.69300, acc = 0.82491 -> best
Best model found at epoch 25, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 027/300 ] loss = 0.45170, acc = 0.85845


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 027/300 ] loss = 0.74743, acc = 0.80947
[ Valid | 027/300 ] loss = 0.74743, acc = 0.80947


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 028/300 ] loss = 0.44676, acc = 0.85742


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 028/300 ] loss = 0.75000, acc = 0.80842
[ Valid | 028/300 ] loss = 0.75000, acc = 0.80842


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 029/300 ] loss = 0.41583, acc = 0.86232


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 029/300 ] loss = 0.78989, acc = 0.81168
[ Valid | 029/300 ] loss = 0.78989, acc = 0.81168


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 030/300 ] loss = 0.43626, acc = 0.86270


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 030/300 ] loss = 0.78935, acc = 0.80117
[ Valid | 030/300 ] loss = 0.78935, acc = 0.80117


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 031/300 ] loss = 0.42943, acc = 0.86185


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 031/300 ] loss = 0.72551, acc = 0.82030
[ Valid | 031/300 ] loss = 0.72551, acc = 0.82030


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 032/300 ] loss = 0.42576, acc = 0.86867


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 032/300 ] loss = 0.77055, acc = 0.81401
[ Valid | 032/300 ] loss = 0.77055, acc = 0.81401


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 033/300 ] loss = 0.45025, acc = 0.85829


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 033/300 ] loss = 0.76322, acc = 0.81786
[ Valid | 033/300 ] loss = 0.76322, acc = 0.81786


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 034/300 ] loss = 0.43994, acc = 0.86232


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 034/300 ] loss = 0.79144, acc = 0.81255
[ Valid | 034/300 ] loss = 0.79144, acc = 0.81255


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 035/300 ] loss = 0.42780, acc = 0.86317


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 035/300 ] loss = 0.76033, acc = 0.80869
[ Valid | 035/300 ] loss = 0.76033, acc = 0.80869


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 036/300 ] loss = 0.44207, acc = 0.86212


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 036/300 ] loss = 0.77270, acc = 0.81690
[ Valid | 036/300 ] loss = 0.77270, acc = 0.81690


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 037/300 ] loss = 0.41582, acc = 0.86294


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 037/300 ] loss = 0.86735, acc = 0.80630
[ Valid | 037/300 ] loss = 0.86735, acc = 0.80630


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 038/300 ] loss = 0.42113, acc = 0.86298


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 038/300 ] loss = 0.82055, acc = 0.81468
[ Valid | 038/300 ] loss = 0.82055, acc = 0.81468


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 039/300 ] loss = 0.41776, acc = 0.86633


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 039/300 ] loss = 0.78205, acc = 0.80871
[ Valid | 039/300 ] loss = 0.78205, acc = 0.80871


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 040/300 ] loss = 0.42222, acc = 0.86480


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 040/300 ] loss = 0.85713, acc = 0.79450
[ Valid | 040/300 ] loss = 0.85713, acc = 0.79450


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 041/300 ] loss = 0.44976, acc = 0.85633


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 041/300 ] loss = 1.11153, acc = 0.77312
[ Valid | 041/300 ] loss = 1.11153, acc = 0.77312


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 042/300 ] loss = 0.44864, acc = 0.85651


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 042/300 ] loss = 0.73028, acc = 0.82479
[ Valid | 042/300 ] loss = 0.73028, acc = 0.82479


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 043/300 ] loss = 0.42650, acc = 0.86143


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 043/300 ] loss = 0.82744, acc = 0.81623
[ Valid | 043/300 ] loss = 0.82744, acc = 0.81623


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 044/300 ] loss = 0.40045, acc = 0.87109


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 044/300 ] loss = 0.78288, acc = 0.82056
[ Valid | 044/300 ] loss = 0.78288, acc = 0.82056


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 045/300 ] loss = 0.43370, acc = 0.86194


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 045/300 ] loss = 0.78185, acc = 0.80600
[ Valid | 045/300 ] loss = 0.78185, acc = 0.80600


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 046/300 ] loss = 0.44055, acc = 0.85962


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 046/300 ] loss = 0.84252, acc = 0.80859
[ Valid | 046/300 ] loss = 0.84252, acc = 0.80859


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 047/300 ] loss = 0.41901, acc = 0.86712


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 047/300 ] loss = 0.82015, acc = 0.81110
[ Valid | 047/300 ] loss = 0.82015, acc = 0.81110


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 048/300 ] loss = 0.40278, acc = 0.87038


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 048/300 ] loss = 0.76553, acc = 0.82424
[ Valid | 048/300 ] loss = 0.76553, acc = 0.82424


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 049/300 ] loss = 0.41903, acc = 0.86964


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 049/300 ] loss = 0.79859, acc = 0.81140
[ Valid | 049/300 ] loss = 0.79859, acc = 0.81140


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 050/300 ] loss = 0.40386, acc = 0.87151


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 050/300 ] loss = 0.78439, acc = 0.81294
[ Valid | 050/300 ] loss = 0.78439, acc = 0.81294


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 051/300 ] loss = 0.42282, acc = 0.86488


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 051/300 ] loss = 0.76380, acc = 0.81565
[ Valid | 051/300 ] loss = 0.76380, acc = 0.81565


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 052/300 ] loss = 0.41751, acc = 0.86681


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 052/300 ] loss = 0.80757, acc = 0.80880
[ Valid | 052/300 ] loss = 0.80757, acc = 0.80880


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 053/300 ] loss = 0.39712, acc = 0.87304


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 053/300 ] loss = 0.82689, acc = 0.82010
[ Valid | 053/300 ] loss = 0.82689, acc = 0.82010


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 054/300 ] loss = 0.41867, acc = 0.86460


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 054/300 ] loss = 0.75486, acc = 0.82200
[ Valid | 054/300 ] loss = 0.75486, acc = 0.82200


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 055/300 ] loss = 0.40300, acc = 0.87234


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 055/300 ] loss = 0.82431, acc = 0.80004
[ Valid | 055/300 ] loss = 0.82431, acc = 0.80004


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 056/300 ] loss = 0.41862, acc = 0.86917


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 056/300 ] loss = 0.85993, acc = 0.80455
[ Valid | 056/300 ] loss = 0.85993, acc = 0.80455


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 057/300 ] loss = 0.38590, acc = 0.87069


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 057/300 ] loss = 0.79139, acc = 0.81912
[ Valid | 057/300 ] loss = 0.79139, acc = 0.81912


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 058/300 ] loss = 0.39788, acc = 0.87419


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 058/300 ] loss = 0.78838, acc = 0.80763
[ Valid | 058/300 ] loss = 0.78838, acc = 0.80763


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 059/300 ] loss = 0.39338, acc = 0.87173


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 059/300 ] loss = 0.80783, acc = 0.81274
[ Valid | 059/300 ] loss = 0.80783, acc = 0.81274


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 060/300 ] loss = 0.39340, acc = 0.87224


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 060/300 ] loss = 0.76690, acc = 0.81043
[ Valid | 060/300 ] loss = 0.76690, acc = 0.81043


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 061/300 ] loss = 0.40047, acc = 0.86948


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 061/300 ] loss = 0.78989, acc = 0.80397
[ Valid | 061/300 ] loss = 0.78989, acc = 0.80397


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 062/300 ] loss = 0.40529, acc = 0.87147


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 062/300 ] loss = 0.85508, acc = 0.81064
[ Valid | 062/300 ] loss = 0.85508, acc = 0.81064


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 063/300 ] loss = 0.41611, acc = 0.87272


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 063/300 ] loss = 0.79922, acc = 0.82028
[ Valid | 063/300 ] loss = 0.79922, acc = 0.82028


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 064/300 ] loss = 0.41464, acc = 0.87288


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 064/300 ] loss = 0.80063, acc = 0.82182
[ Valid | 064/300 ] loss = 0.80063, acc = 0.82182


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 065/300 ] loss = 0.41671, acc = 0.87153


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 065/300 ] loss = 0.84330, acc = 0.81063
[ Valid | 065/300 ] loss = 0.84330, acc = 0.81063


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 066/300 ] loss = 0.41471, acc = 0.87367


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 066/300 ] loss = 0.91815, acc = 0.80233
[ Valid | 066/300 ] loss = 0.91815, acc = 0.80233


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 067/300 ] loss = 0.40295, acc = 0.87232


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 067/300 ] loss = 0.95729, acc = 0.79820
[ Valid | 067/300 ] loss = 0.95729, acc = 0.79820


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 068/300 ] loss = 0.38904, acc = 0.87748


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 068/300 ] loss = 0.77251, acc = 0.82356
[ Valid | 068/300 ] loss = 0.77251, acc = 0.82356


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 069/300 ] loss = 0.39237, acc = 0.87466


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 069/300 ] loss = 0.77368, acc = 0.82586
[ Valid | 069/300 ] loss = 0.77368, acc = 0.82586 -> best
Best model found at epoch 68, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 070/300 ] loss = 0.39482, acc = 0.87119


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 070/300 ] loss = 0.86980, acc = 0.79857
[ Valid | 070/300 ] loss = 0.86980, acc = 0.79857


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 071/300 ] loss = 0.38628, acc = 0.87893


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 071/300 ] loss = 0.84384, acc = 0.80860
[ Valid | 071/300 ] loss = 0.84384, acc = 0.80860


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 072/300 ] loss = 0.37164, acc = 0.88016


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 072/300 ] loss = 0.90319, acc = 0.80920
[ Valid | 072/300 ] loss = 0.90319, acc = 0.80920


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 073/300 ] loss = 0.37042, acc = 0.88444


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 073/300 ] loss = 0.78047, acc = 0.81334
[ Valid | 073/300 ] loss = 0.78047, acc = 0.81334


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 074/300 ] loss = 0.39350, acc = 0.87841


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 074/300 ] loss = 0.77139, acc = 0.82714
[ Valid | 074/300 ] loss = 0.77139, acc = 0.82714 -> best
Best model found at epoch 73, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 075/300 ] loss = 0.38383, acc = 0.87859


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 075/300 ] loss = 0.77629, acc = 0.82404
[ Valid | 075/300 ] loss = 0.77629, acc = 0.82404


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 076/300 ] loss = 0.39497, acc = 0.87454


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 076/300 ] loss = 0.87382, acc = 0.82452
[ Valid | 076/300 ] loss = 0.87382, acc = 0.82452


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 077/300 ] loss = 0.39160, acc = 0.87137


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 077/300 ] loss = 0.78177, acc = 0.82260
[ Valid | 077/300 ] loss = 0.78177, acc = 0.82260


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 078/300 ] loss = 0.40537, acc = 0.87268


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 078/300 ] loss = 0.84463, acc = 0.80339
[ Valid | 078/300 ] loss = 0.84463, acc = 0.80339


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 079/300 ] loss = 0.36834, acc = 0.87815


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 079/300 ] loss = 0.81129, acc = 0.82153
[ Valid | 079/300 ] loss = 0.81129, acc = 0.82153


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 080/300 ] loss = 0.39307, acc = 0.87482


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 080/300 ] loss = 0.89480, acc = 0.80252
[ Valid | 080/300 ] loss = 0.89480, acc = 0.80252


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 081/300 ] loss = 0.40540, acc = 0.87657


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 081/300 ] loss = 0.86699, acc = 0.82046
[ Valid | 081/300 ] loss = 0.86699, acc = 0.82046


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 082/300 ] loss = 0.39724, acc = 0.88147


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 082/300 ] loss = 0.90049, acc = 0.80533
[ Valid | 082/300 ] loss = 0.90049, acc = 0.80533


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 083/300 ] loss = 0.38587, acc = 0.87690


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 083/300 ] loss = 0.87405, acc = 0.80620
[ Valid | 083/300 ] loss = 0.87405, acc = 0.80620


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 084/300 ] loss = 0.38098, acc = 0.87845


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 084/300 ] loss = 0.85564, acc = 0.81207
[ Valid | 084/300 ] loss = 0.85564, acc = 0.81207


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 085/300 ] loss = 0.39842, acc = 0.87054


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 085/300 ] loss = 0.75494, acc = 0.81555
[ Valid | 085/300 ] loss = 0.75494, acc = 0.81555


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 086/300 ] loss = 0.36182, acc = 0.88308


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 086/300 ] loss = 1.04028, acc = 0.80391
[ Valid | 086/300 ] loss = 1.04028, acc = 0.80391


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 087/300 ] loss = 0.40445, acc = 0.88075


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 087/300 ] loss = 0.88801, acc = 0.82115
[ Valid | 087/300 ] loss = 0.88801, acc = 0.82115


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 088/300 ] loss = 0.40529, acc = 0.87619


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 088/300 ] loss = 0.80426, acc = 0.81981
[ Valid | 088/300 ] loss = 0.80426, acc = 0.81981


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 089/300 ] loss = 0.36381, acc = 0.88579


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 089/300 ] loss = 0.88764, acc = 0.80457
[ Valid | 089/300 ] loss = 0.88764, acc = 0.80457


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 090/300 ] loss = 0.37459, acc = 0.87875


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 090/300 ] loss = 0.89290, acc = 0.80252
[ Valid | 090/300 ] loss = 0.89290, acc = 0.80252


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 091/300 ] loss = 0.40347, acc = 0.87629


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 091/300 ] loss = 0.81326, acc = 0.81178
[ Valid | 091/300 ] loss = 0.81326, acc = 0.81178


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 092/300 ] loss = 0.37307, acc = 0.88304


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 092/300 ] loss = 0.77305, acc = 0.81679
[ Valid | 092/300 ] loss = 0.77305, acc = 0.81679


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 093/300 ] loss = 0.37384, acc = 0.88450


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 093/300 ] loss = 0.83578, acc = 0.81728
[ Valid | 093/300 ] loss = 0.83578, acc = 0.81728


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 094/300 ] loss = 0.37584, acc = 0.88167


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 094/300 ] loss = 0.81159, acc = 0.82569
[ Valid | 094/300 ] loss = 0.81159, acc = 0.82569


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 095/300 ] loss = 0.38641, acc = 0.87587


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 095/300 ] loss = 0.87363, acc = 0.81457
[ Valid | 095/300 ] loss = 0.87363, acc = 0.81457


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 096/300 ] loss = 0.35767, acc = 0.88399


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 096/300 ] loss = 0.84058, acc = 0.82548
[ Valid | 096/300 ] loss = 0.84058, acc = 0.82548


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 097/300 ] loss = 0.37516, acc = 0.88244


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 097/300 ] loss = 0.75636, acc = 0.82152
[ Valid | 097/300 ] loss = 0.75636, acc = 0.82152


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 098/300 ] loss = 0.35052, acc = 0.89264


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 098/300 ] loss = 0.82673, acc = 0.81536
[ Valid | 098/300 ] loss = 0.82673, acc = 0.81536


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 099/300 ] loss = 0.37220, acc = 0.88163


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 099/300 ] loss = 0.82039, acc = 0.82365
[ Valid | 099/300 ] loss = 0.82039, acc = 0.82365


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 100/300 ] loss = 0.35803, acc = 0.88585


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 100/300 ] loss = 0.86269, acc = 0.82452
[ Valid | 100/300 ] loss = 0.86269, acc = 0.82452


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 101/300 ] loss = 0.39566, acc = 0.88308


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 101/300 ] loss = 0.81561, acc = 0.81873
[ Valid | 101/300 ] loss = 0.81561, acc = 0.81873


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 102/300 ] loss = 0.37041, acc = 0.88530


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 102/300 ] loss = 0.74798, acc = 0.82791
[ Valid | 102/300 ] loss = 0.74798, acc = 0.82791 -> best
Best model found at epoch 101, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 103/300 ] loss = 0.36561, acc = 0.88556


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 103/300 ] loss = 0.85919, acc = 0.80744
[ Valid | 103/300 ] loss = 0.85919, acc = 0.80744


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 104/300 ] loss = 0.36331, acc = 0.88504


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 104/300 ] loss = 0.84082, acc = 0.81719
[ Valid | 104/300 ] loss = 0.84082, acc = 0.81719


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 105/300 ] loss = 0.34876, acc = 0.88883


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 105/300 ] loss = 0.74644, acc = 0.82896
[ Valid | 105/300 ] loss = 0.74644, acc = 0.82896 -> best
Best model found at epoch 104, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 106/300 ] loss = 0.36347, acc = 0.88560


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 106/300 ] loss = 0.80487, acc = 0.81806
[ Valid | 106/300 ] loss = 0.80487, acc = 0.81806


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 107/300 ] loss = 0.35293, acc = 0.88607


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 107/300 ] loss = 0.92924, acc = 0.81220
[ Valid | 107/300 ] loss = 0.92924, acc = 0.81220


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 108/300 ] loss = 0.35302, acc = 0.88794


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 108/300 ] loss = 0.84447, acc = 0.81979
[ Valid | 108/300 ] loss = 0.84447, acc = 0.81979


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 109/300 ] loss = 0.35193, acc = 0.88635


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 109/300 ] loss = 0.86999, acc = 0.82345
[ Valid | 109/300 ] loss = 0.86999, acc = 0.82345


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 110/300 ] loss = 0.35600, acc = 0.88823


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 110/300 ] loss = 0.85498, acc = 0.82413
[ Valid | 110/300 ] loss = 0.85498, acc = 0.82413


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 111/300 ] loss = 0.38485, acc = 0.88738


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 111/300 ] loss = 0.90414, acc = 0.82759
[ Valid | 111/300 ] loss = 0.90414, acc = 0.82759


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 112/300 ] loss = 0.33575, acc = 0.89306


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 112/300 ] loss = 0.89430, acc = 0.81218
[ Valid | 112/300 ] loss = 0.89430, acc = 0.81218


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 113/300 ] loss = 0.36158, acc = 0.88532


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 113/300 ] loss = 0.91950, acc = 0.81960
[ Valid | 113/300 ] loss = 0.91950, acc = 0.81960


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 114/300 ] loss = 0.36972, acc = 0.88345


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 114/300 ] loss = 0.83470, acc = 0.81969
[ Valid | 114/300 ] loss = 0.83470, acc = 0.81969


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 115/300 ] loss = 0.37885, acc = 0.88480


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 115/300 ] loss = 0.93163, acc = 0.79732
[ Valid | 115/300 ] loss = 0.93163, acc = 0.79732


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 116/300 ] loss = 0.37315, acc = 0.88137


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 116/300 ] loss = 0.82667, acc = 0.81979
[ Valid | 116/300 ] loss = 0.82667, acc = 0.81979


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 117/300 ] loss = 0.36214, acc = 0.88585


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 117/300 ] loss = 0.81374, acc = 0.82704
[ Valid | 117/300 ] loss = 0.81374, acc = 0.82704


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 118/300 ] loss = 0.36128, acc = 0.88786


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 118/300 ] loss = 0.88004, acc = 0.81207
[ Valid | 118/300 ] loss = 0.88004, acc = 0.81207


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 119/300 ] loss = 0.33632, acc = 0.89300


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 119/300 ] loss = 0.94690, acc = 0.81209
[ Valid | 119/300 ] loss = 0.94690, acc = 0.81209


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 120/300 ] loss = 0.34649, acc = 0.89038


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 120/300 ] loss = 0.88798, acc = 0.81273
[ Valid | 120/300 ] loss = 0.88798, acc = 0.81273


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 121/300 ] loss = 0.35644, acc = 0.88581


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 121/300 ] loss = 0.80346, acc = 0.82366
[ Valid | 121/300 ] loss = 0.80346, acc = 0.82366


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 122/300 ] loss = 0.33767, acc = 0.89280


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 122/300 ] loss = 0.88597, acc = 0.80910
[ Valid | 122/300 ] loss = 0.88597, acc = 0.80910


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 123/300 ] loss = 0.33707, acc = 0.89262


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 123/300 ] loss = 0.91309, acc = 0.81999
[ Valid | 123/300 ] loss = 0.91309, acc = 0.81999


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 124/300 ] loss = 0.34851, acc = 0.88853


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 124/300 ] loss = 0.75034, acc = 0.82975
[ Valid | 124/300 ] loss = 0.75034, acc = 0.82975 -> best
Best model found at epoch 123, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 125/300 ] loss = 0.37856, acc = 0.88054


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 125/300 ] loss = 0.91852, acc = 0.81689
[ Valid | 125/300 ] loss = 0.91852, acc = 0.81689


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 126/300 ] loss = 0.36779, acc = 0.88462


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 126/300 ] loss = 0.86245, acc = 0.82211
[ Valid | 126/300 ] loss = 0.86245, acc = 0.82211


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 127/300 ] loss = 0.33882, acc = 0.88988


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 127/300 ] loss = 0.80388, acc = 0.83193
[ Valid | 127/300 ] loss = 0.80388, acc = 0.83193 -> best
Best model found at epoch 126, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 128/300 ] loss = 0.34839, acc = 0.88903


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 128/300 ] loss = 0.98816, acc = 0.80417
[ Valid | 128/300 ] loss = 0.98816, acc = 0.80417


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 129/300 ] loss = 0.33431, acc = 0.89337


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 129/300 ] loss = 0.88839, acc = 0.81314
[ Valid | 129/300 ] loss = 0.88839, acc = 0.81314


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 130/300 ] loss = 0.35729, acc = 0.89238


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 130/300 ] loss = 0.93481, acc = 0.80869
[ Valid | 130/300 ] loss = 0.93481, acc = 0.80869


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 131/300 ] loss = 0.32545, acc = 0.89760


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 131/300 ] loss = 0.98257, acc = 0.81468
[ Valid | 131/300 ] loss = 0.98257, acc = 0.81468


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 132/300 ] loss = 0.34114, acc = 0.89256


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 132/300 ] loss = 1.01652, acc = 0.81565
[ Valid | 132/300 ] loss = 1.01652, acc = 0.81565


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 133/300 ] loss = 0.36598, acc = 0.88813


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 133/300 ] loss = 0.82648, acc = 0.81856
[ Valid | 133/300 ] loss = 0.82648, acc = 0.81856


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 134/300 ] loss = 0.37898, acc = 0.88649


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 134/300 ] loss = 0.97691, acc = 0.81536
[ Valid | 134/300 ] loss = 0.97691, acc = 0.81536


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 135/300 ] loss = 0.34768, acc = 0.88925


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 135/300 ] loss = 0.88750, acc = 0.81314
[ Valid | 135/300 ] loss = 0.88750, acc = 0.81314


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 136/300 ] loss = 0.34210, acc = 0.88976


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 136/300 ] loss = 0.91989, acc = 0.81439
[ Valid | 136/300 ] loss = 0.91989, acc = 0.81439


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 137/300 ] loss = 0.32803, acc = 0.89194


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 137/300 ] loss = 1.07701, acc = 0.79327
[ Valid | 137/300 ] loss = 1.07701, acc = 0.79327


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 138/300 ] loss = 0.34213, acc = 0.89123


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 138/300 ] loss = 1.04905, acc = 0.80062
[ Valid | 138/300 ] loss = 1.04905, acc = 0.80062


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 139/300 ] loss = 0.43468, acc = 0.88829


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 139/300 ] loss = 0.87916, acc = 0.81632
[ Valid | 139/300 ] loss = 0.87916, acc = 0.81632


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 140/300 ] loss = 0.35924, acc = 0.89347


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 140/300 ] loss = 0.87363, acc = 0.80945
[ Valid | 140/300 ] loss = 0.87363, acc = 0.80945


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 141/300 ] loss = 0.33467, acc = 0.89365


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 141/300 ] loss = 0.91177, acc = 0.81178
[ Valid | 141/300 ] loss = 0.91177, acc = 0.81178


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 142/300 ] loss = 0.33129, acc = 0.89520


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 142/300 ] loss = 0.89331, acc = 0.82589
[ Valid | 142/300 ] loss = 0.89331, acc = 0.82589


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 143/300 ] loss = 0.33274, acc = 0.89131


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 143/300 ] loss = 0.94819, acc = 0.80216
[ Valid | 143/300 ] loss = 0.94819, acc = 0.80216


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 144/300 ] loss = 0.37460, acc = 0.89083


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 144/300 ] loss = 0.86940, acc = 0.82018
[ Valid | 144/300 ] loss = 0.86940, acc = 0.82018


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 145/300 ] loss = 0.31316, acc = 0.89986


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 145/300 ] loss = 0.76154, acc = 0.82898
[ Valid | 145/300 ] loss = 0.76154, acc = 0.82898


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 146/300 ] loss = 0.34475, acc = 0.89532


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 146/300 ] loss = 0.99120, acc = 0.79212
[ Valid | 146/300 ] loss = 0.99120, acc = 0.79212


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 147/300 ] loss = 0.33583, acc = 0.89472


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 147/300 ] loss = 0.76905, acc = 0.82541
[ Valid | 147/300 ] loss = 0.76905, acc = 0.82541


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 148/300 ] loss = 0.33500, acc = 0.89252


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 148/300 ] loss = 0.80492, acc = 0.83397
[ Valid | 148/300 ] loss = 0.80492, acc = 0.83397 -> best
Best model found at epoch 147, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 149/300 ] loss = 0.32950, acc = 0.89024


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 149/300 ] loss = 0.84553, acc = 0.81969
[ Valid | 149/300 ] loss = 0.84553, acc = 0.81969


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 150/300 ] loss = 0.33681, acc = 0.89266


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 150/300 ] loss = 0.82918, acc = 0.81576
[ Valid | 150/300 ] loss = 0.82918, acc = 0.81576


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 151/300 ] loss = 0.32403, acc = 0.89891


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 151/300 ] loss = 0.89164, acc = 0.81545
[ Valid | 151/300 ] loss = 0.89164, acc = 0.81545


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 152/300 ] loss = 0.35187, acc = 0.89385


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 152/300 ] loss = 1.08159, acc = 0.81690
[ Valid | 152/300 ] loss = 1.08159, acc = 0.81690


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 153/300 ] loss = 0.37722, acc = 0.88831


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 153/300 ] loss = 0.84959, acc = 0.82450
[ Valid | 153/300 ] loss = 0.84959, acc = 0.82450


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 154/300 ] loss = 0.30641, acc = 0.90153


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 154/300 ] loss = 0.94560, acc = 0.81469
[ Valid | 154/300 ] loss = 0.94560, acc = 0.81469


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 155/300 ] loss = 0.34257, acc = 0.89524


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 155/300 ] loss = 0.81440, acc = 0.82357
[ Valid | 155/300 ] loss = 0.81440, acc = 0.82357


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 156/300 ] loss = 0.33358, acc = 0.89635


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 156/300 ] loss = 0.87778, acc = 0.81373
[ Valid | 156/300 ] loss = 0.87778, acc = 0.81373


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 157/300 ] loss = 0.32915, acc = 0.89442


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 157/300 ] loss = 0.85559, acc = 0.82887
[ Valid | 157/300 ] loss = 0.85559, acc = 0.82887


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 158/300 ] loss = 0.31777, acc = 0.89677


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 158/300 ] loss = 0.85403, acc = 0.82799
[ Valid | 158/300 ] loss = 0.85403, acc = 0.82799


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 159/300 ] loss = 0.32098, acc = 0.89740


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 159/300 ] loss = 0.83104, acc = 0.82375
[ Valid | 159/300 ] loss = 0.83104, acc = 0.82375


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 160/300 ] loss = 0.32136, acc = 0.89962


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 160/300 ] loss = 0.97415, acc = 0.81584
[ Valid | 160/300 ] loss = 0.97415, acc = 0.81584


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 161/300 ] loss = 0.33609, acc = 0.89714


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 161/300 ] loss = 0.77199, acc = 0.83557
[ Valid | 161/300 ] loss = 0.77199, acc = 0.83557 -> best
Best model found at epoch 160, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 162/300 ] loss = 0.33979, acc = 0.88974


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 162/300 ] loss = 0.86250, acc = 0.81351
[ Valid | 162/300 ] loss = 0.86250, acc = 0.81351


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 163/300 ] loss = 0.31276, acc = 0.90262


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 163/300 ] loss = 0.77415, acc = 0.82943
[ Valid | 163/300 ] loss = 0.77415, acc = 0.82943


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 164/300 ] loss = 0.37915, acc = 0.89601


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 164/300 ] loss = 0.86877, acc = 0.82423
[ Valid | 164/300 ] loss = 0.86877, acc = 0.82423


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 165/300 ] loss = 0.30338, acc = 0.90460


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 165/300 ] loss = 0.92238, acc = 0.83851
[ Valid | 165/300 ] loss = 0.92238, acc = 0.83851 -> best
Best model found at epoch 164, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 166/300 ] loss = 0.33017, acc = 0.89228


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 166/300 ] loss = 0.94521, acc = 0.81507
[ Valid | 166/300 ] loss = 0.94521, acc = 0.81507


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 167/300 ] loss = 0.33371, acc = 0.90127


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 167/300 ] loss = 0.98405, acc = 0.82366
[ Valid | 167/300 ] loss = 0.98405, acc = 0.82366


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 168/300 ] loss = 0.33144, acc = 0.89296


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 168/300 ] loss = 0.91314, acc = 0.81227
[ Valid | 168/300 ] loss = 0.91314, acc = 0.81227


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 169/300 ] loss = 0.32279, acc = 0.90028


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 169/300 ] loss = 0.87065, acc = 0.80728
[ Valid | 169/300 ] loss = 0.87065, acc = 0.80728


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 170/300 ] loss = 0.31586, acc = 0.89778


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 170/300 ] loss = 0.75866, acc = 0.82056
[ Valid | 170/300 ] loss = 0.75866, acc = 0.82056


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 171/300 ] loss = 0.31011, acc = 0.90129


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 171/300 ] loss = 0.85356, acc = 0.81967
[ Valid | 171/300 ] loss = 0.85356, acc = 0.81967


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 172/300 ] loss = 0.31899, acc = 0.89683


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 172/300 ] loss = 0.84750, acc = 0.82279
[ Valid | 172/300 ] loss = 0.84750, acc = 0.82279


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 173/300 ] loss = 0.32984, acc = 0.89687


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 173/300 ] loss = 0.94259, acc = 0.80849
[ Valid | 173/300 ] loss = 0.94259, acc = 0.80849


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 174/300 ] loss = 0.30468, acc = 0.90379


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 174/300 ] loss = 0.94241, acc = 0.81564
[ Valid | 174/300 ] loss = 0.94241, acc = 0.81564


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 175/300 ] loss = 0.30156, acc = 0.90274


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 175/300 ] loss = 0.91423, acc = 0.81893
[ Valid | 175/300 ] loss = 0.91423, acc = 0.81893


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 176/300 ] loss = 0.31331, acc = 0.89790


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 176/300 ] loss = 0.98558, acc = 0.82308
[ Valid | 176/300 ] loss = 0.98558, acc = 0.82308


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 177/300 ] loss = 0.33043, acc = 0.89587


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 177/300 ] loss = 0.97698, acc = 0.81303
[ Valid | 177/300 ] loss = 0.97698, acc = 0.81303


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 178/300 ] loss = 0.33930, acc = 0.89649


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 178/300 ] loss = 0.84165, acc = 0.83310
[ Valid | 178/300 ] loss = 0.84165, acc = 0.83310


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 179/300 ] loss = 0.30963, acc = 0.90194


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 179/300 ] loss = 0.93968, acc = 0.82482
[ Valid | 179/300 ] loss = 0.93968, acc = 0.82482


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 180/300 ] loss = 0.31808, acc = 0.90159


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 180/300 ] loss = 0.82255, acc = 0.82385
[ Valid | 180/300 ] loss = 0.82255, acc = 0.82385


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 181/300 ] loss = 0.31693, acc = 0.90446


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 181/300 ] loss = 0.84359, acc = 0.82742
[ Valid | 181/300 ] loss = 0.84359, acc = 0.82742


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 182/300 ] loss = 0.29077, acc = 0.90712


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 182/300 ] loss = 0.89678, acc = 0.80766
[ Valid | 182/300 ] loss = 0.89678, acc = 0.80766


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 183/300 ] loss = 0.33391, acc = 0.89980


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 183/300 ] loss = 0.79357, acc = 0.83505
[ Valid | 183/300 ] loss = 0.79357, acc = 0.83505


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 184/300 ] loss = 0.30643, acc = 0.90395


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 184/300 ] loss = 0.85816, acc = 0.81896
[ Valid | 184/300 ] loss = 0.85816, acc = 0.81896


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 185/300 ] loss = 0.32022, acc = 0.89762


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 185/300 ] loss = 0.88767, acc = 0.82135
[ Valid | 185/300 ] loss = 0.88767, acc = 0.82135


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 186/300 ] loss = 0.31351, acc = 0.90179


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 186/300 ] loss = 0.97430, acc = 0.81547
[ Valid | 186/300 ] loss = 0.97430, acc = 0.81547


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 187/300 ] loss = 0.29798, acc = 0.90494


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 187/300 ] loss = 0.84172, acc = 0.82435
[ Valid | 187/300 ] loss = 0.84172, acc = 0.82435


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 188/300 ] loss = 0.37036, acc = 0.89855


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 188/300 ] loss = 1.02964, acc = 0.81419
[ Valid | 188/300 ] loss = 1.02964, acc = 0.81419


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 189/300 ] loss = 0.32162, acc = 0.89845


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 189/300 ] loss = 0.86510, acc = 0.81873
[ Valid | 189/300 ] loss = 0.86510, acc = 0.81873


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 190/300 ] loss = 0.31703, acc = 0.89861


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 190/300 ] loss = 0.81929, acc = 0.82666
[ Valid | 190/300 ] loss = 0.81929, acc = 0.82666


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 191/300 ] loss = 0.29686, acc = 0.90306


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 191/300 ] loss = 0.89100, acc = 0.83245
[ Valid | 191/300 ] loss = 0.89100, acc = 0.83245


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 192/300 ] loss = 0.29834, acc = 0.90476


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 192/300 ] loss = 0.92971, acc = 0.81833
[ Valid | 192/300 ] loss = 0.92971, acc = 0.81833


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 193/300 ] loss = 0.29314, acc = 0.90591


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 193/300 ] loss = 0.85246, acc = 0.82548
[ Valid | 193/300 ] loss = 0.85246, acc = 0.82548


  0%|          | 0/155 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11/test sample ./food11/test/0001.jpg


# Testing and generate prediction CSV

In [None]:
model_best = models.vgg16_bn(pretrained=False)
model_best.classifier[6] = nn.Linear(4096, 11)
model_best.load_state_dict(torch.load("/content/sample_best.ckpt"))
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [None]:
# Fro TTA
model_best.to(device)
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        prediction.append(test_pred.cpu().data.numpy())

# Augmentation 1
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best.eval()
prediction_aug1 = []   # remember to change number
with torch.no_grad():
    for data, _ in test_loader:
        test_pred = model_best(data.to(device))
        prediction_aug1.append(test_pred.cpu().data.numpy())   # remember to change number

# Augmentation 2
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best.eval()
prediction_aug2 = []   # remember to change number
with torch.no_grad():
    for data, _ in test_loader:
        test_pred = model_best(data.to(device))
        prediction_aug2.append(test_pred.cpu().data.numpy())   # remember to change number

# Augmentation 3
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best.eval()
prediction_aug3 = []   # remember to change number
with torch.no_grad():
    for data, _ in test_loader:
        test_pred = model_best(data.to(device))
        prediction_aug3.append(test_pred.cpu().data.numpy())   # remember to change number

# Augmentation 4
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best.eval()
prediction_aug4 = []   # remember to change number
with torch.no_grad():
    for data, _ in test_loader:
        test_pred = model_best(data.to(device))
        prediction_aug4.append(test_pred.cpu().data.numpy())   # remember to change number

# Augmentation 5
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best.eval()
prediction_aug5 = []   # remember to change number
with torch.no_grad():
    for data, _ in test_loader:
        test_pred = model_best(data.to(device))
        prediction_aug5.append(test_pred.cpu().data.numpy())   # remember to change number

for batch_num in range(53):
    for idx, sample in enumerate(prediction[batch_num]):
        final_pred = prediction.copy()
        final_pred[batch_num][idx] = 0.5*prediction[batch_num][idx] + 0.1*prediction_aug1[batch_num][idx] + 0.1*prediction_aug2[batch_num][idx] + 0.1*prediction_aug3[batch_num][idx] + 0.1*prediction_aug4[batch_num][idx] + 0.1*prediction_aug5[batch_num][idx]

labels = []
for batch_num in range(53):
    labels.append(np.argmax(final_pred[batch_num], axis=1))

pred = []
for batch_num in range(53):
    for elements in labels[batch_num]:
        pred.append(elements)

#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = pred
df.to_csv("submission_vgg16bn.csv",index = False)

One ./food11/test sample ./food11/test/0001.jpg
One ./food11/test sample ./food11/test/0001.jpg
One ./food11/test sample ./food11/test/0001.jpg
One ./food11/test sample ./food11/test/0001.jpg
One ./food11/test sample ./food11/test/0001.jpg
