# HW3 Image Classification
## We strongly recommend that you run with [Kaggle](https://www.kaggle.com/t/86ca241732c04da99aca6490080bae73) for this homework

If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

# Check GPU Type

In [1]:
!nvidia-smi

Fri Mar 17 20:10:30 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 516.94       Driver Version: 516.94       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   34C    P8    10W / 170W |   8461MiB / 12288MiB |      5%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
#!pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [3]:
# Download Link
# Link 1 (Dropbox): https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0
# Link 2 (Google Drive): https://drive.google.com/file/d/1tbGNwk1yGoCBdu4Gi_Cia7EJ9OhubYD9/view?usp=share_link
# Link 3: Kaggle Competition.

# (1) dropbox link
#!wget -O food11.zip https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0

# (2) google drive link
# !gdown --id '1tbGNwk1yGoCBdu4Gi_Cia7EJ9OhubYD9' --output food11.zip

In [4]:
#! unzip food11.zip

# Import Packages

In [1]:
_exp_name = "sample"

In [2]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [3]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

# Transforms
Torchvision provides lots of useful utilities for image preprocessing, data *wrapping* as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [4]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    #https://blog.csdn.net/weixin_42369818/article/details/123796214
    #https://blog.csdn.net/Miao_sin/article/details/122474968
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(128),
    transforms.RandomHorizontalFlip(),
    
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])


# Datasets
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [5]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
            
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            #print(fname)
            #print(fname.split("/")[-1].split("_"))
            # label = int(fname.split("/")[-1].split("_")[0])
            label = int(fname.split("\\")[-1].split("_")[0])
        except:
            label = -1 # test has no label
            
        return im,label

# Model

In [6]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
             
            #https://github.com/Aleadinglight/Pytorch-VGG-19/blob/master/VGG_19.ipynb
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2, 0),
            nn.Dropout(0.5),
            
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2, 2, 0),
            nn.Dropout(0.5),
            
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(2, 2, 0),
            nn.Dropout(0.5),
            
            nn.Conv2d(256, 512,3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(2, 2 , 0),
            nn.Dropout(0.5),
            
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(2, 2, 0),
            nn.Dropout(0.5),
            
            
            
            #nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            #nn.BatchNorm2d(64),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            #nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            #nn.BatchNorm2d(128),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            #nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            #nn.BatchNorm2d(256),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            #nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            #nn.BatchNorm2d(512),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            #nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            #nn.BatchNorm2d(512),
            #nn.ReLU(),
            #nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
            
            
            
            
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.5),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# Configurations

In [6]:
import torchvision.models as models
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'
# Initialize a model, and put it on the device specified.
#model = models.resnet18(weights=False).to(device)
#model = models.resnet50(weights=False).to(device)
#model = models.vgg16(weights=False).to(device)
model = Classifier().to(device)

# The number of batch size.
batch_size = 256
#batch_size = 64

# The number of training epochs.
n_epochs = 200
#n_epochs = 8

# If no improvement in 'patience' epochs, early stop.
patience = 300

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()
#criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)



# Dataloader

In [7]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("train", tfm=train_tfm)
# train_set = FoodDataset("./train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
# valid_set = FoodDataset(".valid", tfm=test_tfm)
valid_set = FoodDataset("valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# Start Training

In [8]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        #print(labels)
        # labels = torch.zeros_like(labels)
        #print(logits.size(), torch.max(labels))
        
        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))
        # print(labels)
        # print(logits.shape)
        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 6.46554, acc = 0.11709


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 2.31611, acc = 0.15027
[ Valid | 001/200 ] loss = 2.31611, acc = 0.15027 -> best
Best model found at epoch 0, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 2.32471, acc = 0.13955


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 2.29030, acc = 0.13456
[ Valid | 002/200 ] loss = 2.29030, acc = 0.13456


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 2.29696, acc = 0.14639


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 2.27115, acc = 0.13334
[ Valid | 003/200 ] loss = 2.27115, acc = 0.13334


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 2.26927, acc = 0.17305


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 2.20404, acc = 0.20601
[ Valid | 004/200 ] loss = 2.20404, acc = 0.20601 -> best
Best model found at epoch 3, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 2.22996, acc = 0.20068


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 2.19873, acc = 0.20063
[ Valid | 005/200 ] loss = 2.19873, acc = 0.20063


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 2.19276, acc = 0.20908


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 2.15924, acc = 0.22511
[ Valid | 006/200 ] loss = 2.15924, acc = 0.22511 -> best
Best model found at epoch 5, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 2.17983, acc = 0.21172


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 2.22308, acc = 0.16625
[ Valid | 007/200 ] loss = 2.22308, acc = 0.16625


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 2.15943, acc = 0.22412


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 2.11918, acc = 0.24429
[ Valid | 008/200 ] loss = 2.11918, acc = 0.24429 -> best
Best model found at epoch 7, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 2.09167, acc = 0.25830


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 2.07121, acc = 0.28127
[ Valid | 009/200 ] loss = 2.07121, acc = 0.28127 -> best
Best model found at epoch 8, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 2.03661, acc = 0.27959


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 2.17567, acc = 0.25401
[ Valid | 010/200 ] loss = 2.17567, acc = 0.25401


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 2.01488, acc = 0.28320


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.90257, acc = 0.32946
[ Valid | 011/200 ] loss = 1.90257, acc = 0.32946 -> best
Best model found at epoch 10, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 1.97201, acc = 0.30479


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 2.06584, acc = 0.28648
[ Valid | 012/200 ] loss = 2.06584, acc = 0.28648


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 1.98149, acc = 0.31201


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.87334, acc = 0.34638
[ Valid | 013/200 ] loss = 1.87334, acc = 0.34638 -> best
Best model found at epoch 12, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 1.90379, acc = 0.32822


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 2.04345, acc = 0.29881
[ Valid | 014/200 ] loss = 2.04345, acc = 0.29881


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 1.90049, acc = 0.33125


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 1.78126, acc = 0.38033
[ Valid | 015/200 ] loss = 1.78126, acc = 0.38033 -> best
Best model found at epoch 14, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 1.85296, acc = 0.35918


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 1.86018, acc = 0.38206
[ Valid | 016/200 ] loss = 1.86018, acc = 0.38206 -> best
Best model found at epoch 15, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 1.83063, acc = 0.36201


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 1.72401, acc = 0.40386
[ Valid | 017/200 ] loss = 1.72401, acc = 0.40386 -> best
Best model found at epoch 16, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 1.82138, acc = 0.37148


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 1.69080, acc = 0.42174
[ Valid | 018/200 ] loss = 1.69080, acc = 0.42174 -> best
Best model found at epoch 17, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 1.76786, acc = 0.38770


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 1.68798, acc = 0.41375
[ Valid | 019/200 ] loss = 1.68798, acc = 0.41375


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 1.73250, acc = 0.39707


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 1.78996, acc = 0.39361
[ Valid | 020/200 ] loss = 1.78996, acc = 0.39361


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 1.70744, acc = 0.40986


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 1.65265, acc = 0.42877
[ Valid | 021/200 ] loss = 1.65265, acc = 0.42877 -> best
Best model found at epoch 20, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 1.67384, acc = 0.42695


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 1.64533, acc = 0.43901
[ Valid | 022/200 ] loss = 1.64533, acc = 0.43901 -> best
Best model found at epoch 21, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 1.63219, acc = 0.44355


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 1.57162, acc = 0.46471
[ Valid | 023/200 ] loss = 1.57162, acc = 0.46471 -> best
Best model found at epoch 22, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 1.63352, acc = 0.44375


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 1.58804, acc = 0.43926
[ Valid | 024/200 ] loss = 1.58804, acc = 0.43926


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 1.59841, acc = 0.45352


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 1.66854, acc = 0.44082
[ Valid | 025/200 ] loss = 1.66854, acc = 0.44082


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 1.57340, acc = 0.46191


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 1.56576, acc = 0.47174
[ Valid | 026/200 ] loss = 1.56576, acc = 0.47174 -> best
Best model found at epoch 25, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 1.53575, acc = 0.47451


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 1.41157, acc = 0.51073
[ Valid | 027/200 ] loss = 1.41157, acc = 0.51073 -> best
Best model found at epoch 26, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 1.47531, acc = 0.49541


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 1.42409, acc = 0.51220
[ Valid | 028/200 ] loss = 1.42409, acc = 0.51220 -> best
Best model found at epoch 27, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 1.45261, acc = 0.51279


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 1.41155, acc = 0.51836
[ Valid | 029/200 ] loss = 1.41155, acc = 0.51836 -> best
Best model found at epoch 28, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 030/200 ] loss = 1.43170, acc = 0.51260


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 030/200 ] loss = 1.36564, acc = 0.53365
[ Valid | 030/200 ] loss = 1.36564, acc = 0.53365 -> best
Best model found at epoch 29, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 031/200 ] loss = 1.42391, acc = 0.51357


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 031/200 ] loss = 1.43053, acc = 0.52400
[ Valid | 031/200 ] loss = 1.43053, acc = 0.52400


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 032/200 ] loss = 1.35767, acc = 0.54727


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 032/200 ] loss = 1.50449, acc = 0.50238
[ Valid | 032/200 ] loss = 1.50449, acc = 0.50238


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 033/200 ] loss = 1.36109, acc = 0.54268


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 033/200 ] loss = 1.31421, acc = 0.55213
[ Valid | 033/200 ] loss = 1.31421, acc = 0.55213 -> best
Best model found at epoch 32, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 034/200 ] loss = 1.32803, acc = 0.55137


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 034/200 ] loss = 1.28155, acc = 0.56898
[ Valid | 034/200 ] loss = 1.28155, acc = 0.56898 -> best
Best model found at epoch 33, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 035/200 ] loss = 1.28243, acc = 0.56914


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 035/200 ] loss = 1.30969, acc = 0.56229
[ Valid | 035/200 ] loss = 1.30969, acc = 0.56229


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 036/200 ] loss = 1.28052, acc = 0.58008


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 036/200 ] loss = 1.26629, acc = 0.57236
[ Valid | 036/200 ] loss = 1.26629, acc = 0.57236 -> best
Best model found at epoch 35, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 037/200 ] loss = 1.25866, acc = 0.57559


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 037/200 ] loss = 1.50602, acc = 0.52904
[ Valid | 037/200 ] loss = 1.50602, acc = 0.52904


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 038/200 ] loss = 1.23654, acc = 0.58271


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 038/200 ] loss = 1.36058, acc = 0.56117
[ Valid | 038/200 ] loss = 1.36058, acc = 0.56117


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 039/200 ] loss = 1.20302, acc = 0.59805


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 039/200 ] loss = 1.21199, acc = 0.59519
[ Valid | 039/200 ] loss = 1.21199, acc = 0.59519 -> best
Best model found at epoch 38, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 040/200 ] loss = 1.18202, acc = 0.60820


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 040/200 ] loss = 1.26306, acc = 0.58018
[ Valid | 040/200 ] loss = 1.26306, acc = 0.58018


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 041/200 ] loss = 1.19452, acc = 0.59404


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 041/200 ] loss = 1.31010, acc = 0.58756
[ Valid | 041/200 ] loss = 1.31010, acc = 0.58756


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 042/200 ] loss = 1.16203, acc = 0.61123


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 042/200 ] loss = 1.12792, acc = 0.61933
[ Valid | 042/200 ] loss = 1.12792, acc = 0.61933 -> best
Best model found at epoch 41, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 043/200 ] loss = 1.12569, acc = 0.61641


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 043/200 ] loss = 1.13944, acc = 0.61297
[ Valid | 043/200 ] loss = 1.13944, acc = 0.61297


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 044/200 ] loss = 1.11974, acc = 0.61455


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 044/200 ] loss = 1.32763, acc = 0.57288
[ Valid | 044/200 ] loss = 1.32763, acc = 0.57288


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 045/200 ] loss = 1.09194, acc = 0.63232


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 045/200 ] loss = 1.18253, acc = 0.61186
[ Valid | 045/200 ] loss = 1.18253, acc = 0.61186


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 046/200 ] loss = 1.08206, acc = 0.63672


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 046/200 ] loss = 1.36823, acc = 0.58008
[ Valid | 046/200 ] loss = 1.36823, acc = 0.58008


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 047/200 ] loss = 1.08607, acc = 0.63184


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 047/200 ] loss = 1.32589, acc = 0.59364
[ Valid | 047/200 ] loss = 1.32589, acc = 0.59364


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 048/200 ] loss = 1.07364, acc = 0.64150


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 048/200 ] loss = 1.14558, acc = 0.61072
[ Valid | 048/200 ] loss = 1.14558, acc = 0.61072


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 049/200 ] loss = 1.01558, acc = 0.65928


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 049/200 ] loss = 1.08348, acc = 0.65067
[ Valid | 049/200 ] loss = 1.08348, acc = 0.65067 -> best
Best model found at epoch 48, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 050/200 ] loss = 1.03740, acc = 0.65020


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 050/200 ] loss = 1.14879, acc = 0.62107
[ Valid | 050/200 ] loss = 1.14879, acc = 0.62107


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 051/200 ] loss = 1.03535, acc = 0.65352


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 051/200 ] loss = 1.10161, acc = 0.63287
[ Valid | 051/200 ] loss = 1.10161, acc = 0.63287


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 052/200 ] loss = 0.99144, acc = 0.67061


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 052/200 ] loss = 1.05636, acc = 0.66005
[ Valid | 052/200 ] loss = 1.05636, acc = 0.66005 -> best
Best model found at epoch 51, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 053/200 ] loss = 0.98452, acc = 0.66631


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 053/200 ] loss = 1.12993, acc = 0.63096
[ Valid | 053/200 ] loss = 1.12993, acc = 0.63096


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 054/200 ] loss = 0.97673, acc = 0.67148


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 054/200 ] loss = 1.18702, acc = 0.64885
[ Valid | 054/200 ] loss = 1.18702, acc = 0.64885


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 055/200 ] loss = 0.95629, acc = 0.67334


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 055/200 ] loss = 1.15318, acc = 0.63182
[ Valid | 055/200 ] loss = 1.15318, acc = 0.63182


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 056/200 ] loss = 0.98045, acc = 0.67119


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 056/200 ] loss = 1.08091, acc = 0.65631
[ Valid | 056/200 ] loss = 1.08091, acc = 0.65631


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 057/200 ] loss = 0.91041, acc = 0.69043


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 057/200 ] loss = 1.04846, acc = 0.66959
[ Valid | 057/200 ] loss = 1.04846, acc = 0.66959 -> best
Best model found at epoch 56, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 058/200 ] loss = 0.91276, acc = 0.69385


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 058/200 ] loss = 0.95525, acc = 0.68149
[ Valid | 058/200 ] loss = 0.95525, acc = 0.68149 -> best
Best model found at epoch 57, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 059/200 ] loss = 0.91884, acc = 0.68975


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 059/200 ] loss = 1.21635, acc = 0.62965
[ Valid | 059/200 ] loss = 1.21635, acc = 0.62965


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 060/200 ] loss = 0.90125, acc = 0.69600


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 060/200 ] loss = 1.23641, acc = 0.63712
[ Valid | 060/200 ] loss = 1.23641, acc = 0.63712


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 061/200 ] loss = 0.90785, acc = 0.69668


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 061/200 ] loss = 1.30715, acc = 0.61047
[ Valid | 061/200 ] loss = 1.30715, acc = 0.61047


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 062/200 ] loss = 0.86076, acc = 0.71816


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 062/200 ] loss = 1.12909, acc = 0.65510
[ Valid | 062/200 ] loss = 1.12909, acc = 0.65510


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 063/200 ] loss = 0.83533, acc = 0.71758


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 063/200 ] loss = 1.24786, acc = 0.63652
[ Valid | 063/200 ] loss = 1.24786, acc = 0.63652


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 064/200 ] loss = 0.83867, acc = 0.72578


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 064/200 ] loss = 1.02036, acc = 0.68070
[ Valid | 064/200 ] loss = 1.02036, acc = 0.68070


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 065/200 ] loss = 0.84135, acc = 0.71572


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 065/200 ] loss = 1.03678, acc = 0.68384
[ Valid | 065/200 ] loss = 1.03678, acc = 0.68384 -> best
Best model found at epoch 64, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 066/200 ] loss = 0.81942, acc = 0.72871


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 066/200 ] loss = 1.03825, acc = 0.67427
[ Valid | 066/200 ] loss = 1.03825, acc = 0.67427


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 067/200 ] loss = 0.80540, acc = 0.73076


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 067/200 ] loss = 1.11150, acc = 0.67983
[ Valid | 067/200 ] loss = 1.11150, acc = 0.67983


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 068/200 ] loss = 0.79580, acc = 0.73838


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 068/200 ] loss = 1.11300, acc = 0.66759
[ Valid | 068/200 ] loss = 1.11300, acc = 0.66759


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 069/200 ] loss = 0.81217, acc = 0.73242


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 069/200 ] loss = 1.14921, acc = 0.66620
[ Valid | 069/200 ] loss = 1.14921, acc = 0.66620


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 070/200 ] loss = 0.79042, acc = 0.73867


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 070/200 ] loss = 0.95219, acc = 0.69338
[ Valid | 070/200 ] loss = 0.95219, acc = 0.69338 -> best
Best model found at epoch 69, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 071/200 ] loss = 0.75014, acc = 0.74814


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 071/200 ] loss = 0.99185, acc = 0.70580
[ Valid | 071/200 ] loss = 0.99185, acc = 0.70580 -> best
Best model found at epoch 70, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 072/200 ] loss = 0.75608, acc = 0.74463


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 072/200 ] loss = 0.99681, acc = 0.69112
[ Valid | 072/200 ] loss = 0.99681, acc = 0.69112


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 073/200 ] loss = 0.79093, acc = 0.73613


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 073/200 ] loss = 0.96065, acc = 0.70561
[ Valid | 073/200 ] loss = 0.96065, acc = 0.70561


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 074/200 ] loss = 0.72087, acc = 0.75830


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 074/200 ] loss = 0.94729, acc = 0.71387
[ Valid | 074/200 ] loss = 0.94729, acc = 0.71387 -> best
Best model found at epoch 73, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 075/200 ] loss = 0.73709, acc = 0.75410


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 075/200 ] loss = 0.98318, acc = 0.70544
[ Valid | 075/200 ] loss = 0.98318, acc = 0.70544


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 076/200 ] loss = 0.72345, acc = 0.75967


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 076/200 ] loss = 0.97143, acc = 0.71656
[ Valid | 076/200 ] loss = 0.97143, acc = 0.71656 -> best
Best model found at epoch 75, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 077/200 ] loss = 0.72145, acc = 0.76084


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 077/200 ] loss = 1.11615, acc = 0.68165
[ Valid | 077/200 ] loss = 1.11615, acc = 0.68165


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 078/200 ] loss = 0.71751, acc = 0.76445


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 078/200 ] loss = 0.96457, acc = 0.70292
[ Valid | 078/200 ] loss = 0.96457, acc = 0.70292


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 079/200 ] loss = 0.69396, acc = 0.76982


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 079/200 ] loss = 0.93515, acc = 0.71882
[ Valid | 079/200 ] loss = 0.93515, acc = 0.71882 -> best
Best model found at epoch 78, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 080/200 ] loss = 0.67150, acc = 0.78086


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 080/200 ] loss = 1.02865, acc = 0.69434
[ Valid | 080/200 ] loss = 1.02865, acc = 0.69434


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 081/200 ] loss = 0.66720, acc = 0.78154


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 081/200 ] loss = 0.98063, acc = 0.71672
[ Valid | 081/200 ] loss = 0.98063, acc = 0.71672


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 082/200 ] loss = 0.66929, acc = 0.77852


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 082/200 ] loss = 1.03765, acc = 0.70761
[ Valid | 082/200 ] loss = 1.03765, acc = 0.70761


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 083/200 ] loss = 0.66030, acc = 0.77949


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 083/200 ] loss = 1.05181, acc = 0.70240
[ Valid | 083/200 ] loss = 1.05181, acc = 0.70240


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 084/200 ] loss = 0.64761, acc = 0.78584


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 084/200 ] loss = 1.01973, acc = 0.71039
[ Valid | 084/200 ] loss = 1.01973, acc = 0.71039


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 085/200 ] loss = 0.64890, acc = 0.78467


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 085/200 ] loss = 1.03695, acc = 0.70527
[ Valid | 085/200 ] loss = 1.03695, acc = 0.70527


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 086/200 ] loss = 0.63418, acc = 0.79316


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 086/200 ] loss = 1.07673, acc = 0.70874
[ Valid | 086/200 ] loss = 1.07673, acc = 0.70874


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 087/200 ] loss = 0.62234, acc = 0.79463


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 087/200 ] loss = 1.04252, acc = 0.71369
[ Valid | 087/200 ] loss = 1.04252, acc = 0.71369


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 088/200 ] loss = 0.61565, acc = 0.79766


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 088/200 ] loss = 1.01930, acc = 0.71083
[ Valid | 088/200 ] loss = 1.01930, acc = 0.71083


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 089/200 ] loss = 0.60212, acc = 0.79961


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 089/200 ] loss = 1.07625, acc = 0.72107
[ Valid | 089/200 ] loss = 1.07625, acc = 0.72107 -> best
Best model found at epoch 88, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 090/200 ] loss = 0.60881, acc = 0.79541


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 090/200 ] loss = 1.00341, acc = 0.72072
[ Valid | 090/200 ] loss = 1.00341, acc = 0.72072


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 091/200 ] loss = 0.59018, acc = 0.80527


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 091/200 ] loss = 1.01267, acc = 0.71829
[ Valid | 091/200 ] loss = 1.01267, acc = 0.71829


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 092/200 ] loss = 0.59287, acc = 0.80313


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 092/200 ] loss = 1.06396, acc = 0.71673
[ Valid | 092/200 ] loss = 1.06396, acc = 0.71673


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 093/200 ] loss = 0.60752, acc = 0.79688


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 093/200 ] loss = 1.04094, acc = 0.71986
[ Valid | 093/200 ] loss = 1.04094, acc = 0.71986


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 094/200 ] loss = 0.57710, acc = 0.80967


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 094/200 ] loss = 1.04493, acc = 0.72801
[ Valid | 094/200 ] loss = 1.04493, acc = 0.72801 -> best
Best model found at epoch 93, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 095/200 ] loss = 0.56919, acc = 0.81543


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 095/200 ] loss = 1.02216, acc = 0.73209
[ Valid | 095/200 ] loss = 1.02216, acc = 0.73209 -> best
Best model found at epoch 94, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 096/200 ] loss = 0.56192, acc = 0.80879


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 096/200 ] loss = 1.21786, acc = 0.69450
[ Valid | 096/200 ] loss = 1.21786, acc = 0.69450


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 097/200 ] loss = 0.59145, acc = 0.79893


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 097/200 ] loss = 1.03256, acc = 0.72029
[ Valid | 097/200 ] loss = 1.03256, acc = 0.72029


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 098/200 ] loss = 0.55330, acc = 0.81738


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 098/200 ] loss = 1.23657, acc = 0.69051
[ Valid | 098/200 ] loss = 1.23657, acc = 0.69051


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 099/200 ] loss = 0.58369, acc = 0.81172


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 099/200 ] loss = 1.18783, acc = 0.70414
[ Valid | 099/200 ] loss = 1.18783, acc = 0.70414


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 100/200 ] loss = 0.55427, acc = 0.81875


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 100/200 ] loss = 1.03294, acc = 0.72680
[ Valid | 100/200 ] loss = 1.03294, acc = 0.72680


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 101/200 ] loss = 0.55135, acc = 0.81768


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 101/200 ] loss = 0.99437, acc = 0.73044
[ Valid | 101/200 ] loss = 0.99437, acc = 0.73044


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 102/200 ] loss = 0.49661, acc = 0.83369


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 102/200 ] loss = 1.14621, acc = 0.71854
[ Valid | 102/200 ] loss = 1.14621, acc = 0.71854


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 103/200 ] loss = 0.52056, acc = 0.82598


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 103/200 ] loss = 1.22543, acc = 0.70223
[ Valid | 103/200 ] loss = 1.22543, acc = 0.70223


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 104/200 ] loss = 0.53781, acc = 0.82305


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 104/200 ] loss = 1.05524, acc = 0.72359
[ Valid | 104/200 ] loss = 1.05524, acc = 0.72359


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 105/200 ] loss = 0.51461, acc = 0.83418


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 105/200 ] loss = 1.20358, acc = 0.69728
[ Valid | 105/200 ] loss = 1.20358, acc = 0.69728


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 106/200 ] loss = 0.48484, acc = 0.84189


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 106/200 ] loss = 1.03386, acc = 0.72967
[ Valid | 106/200 ] loss = 1.03386, acc = 0.72967


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 107/200 ] loss = 0.49859, acc = 0.83887


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 107/200 ] loss = 1.03859, acc = 0.73860
[ Valid | 107/200 ] loss = 1.03859, acc = 0.73860 -> best
Best model found at epoch 106, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 108/200 ] loss = 0.53997, acc = 0.82070


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 108/200 ] loss = 1.18939, acc = 0.72558
[ Valid | 108/200 ] loss = 1.18939, acc = 0.72558


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 109/200 ] loss = 0.53070, acc = 0.82334


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 109/200 ] loss = 1.18974, acc = 0.70276
[ Valid | 109/200 ] loss = 1.18974, acc = 0.70276


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 110/200 ] loss = 0.51910, acc = 0.82783


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 110/200 ] loss = 1.10680, acc = 0.72428
[ Valid | 110/200 ] loss = 1.10680, acc = 0.72428


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 111/200 ] loss = 0.47937, acc = 0.84707


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 111/200 ] loss = 1.31277, acc = 0.71048
[ Valid | 111/200 ] loss = 1.31277, acc = 0.71048


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 112/200 ] loss = 0.49960, acc = 0.83877


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 112/200 ] loss = 1.07365, acc = 0.72967
[ Valid | 112/200 ] loss = 1.07365, acc = 0.72967


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 113/200 ] loss = 0.46542, acc = 0.84873


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 113/200 ] loss = 1.15273, acc = 0.73122
[ Valid | 113/200 ] loss = 1.15273, acc = 0.73122


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 114/200 ] loss = 0.47515, acc = 0.84561


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 114/200 ] loss = 1.21241, acc = 0.71439
[ Valid | 114/200 ] loss = 1.21241, acc = 0.71439


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 115/200 ] loss = 0.47515, acc = 0.84629


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 115/200 ] loss = 1.03273, acc = 0.73861
[ Valid | 115/200 ] loss = 1.03273, acc = 0.73861 -> best
Best model found at epoch 114, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 116/200 ] loss = 0.45992, acc = 0.84502


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 116/200 ] loss = 1.14895, acc = 0.71126
[ Valid | 116/200 ] loss = 1.14895, acc = 0.71126


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 117/200 ] loss = 0.47966, acc = 0.84639


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 117/200 ] loss = 1.26525, acc = 0.72038
[ Valid | 117/200 ] loss = 1.26525, acc = 0.72038


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 118/200 ] loss = 0.45614, acc = 0.84912


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 118/200 ] loss = 1.09284, acc = 0.72584
[ Valid | 118/200 ] loss = 1.09284, acc = 0.72584


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 119/200 ] loss = 0.44364, acc = 0.85400


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 119/200 ] loss = 1.23303, acc = 0.72775
[ Valid | 119/200 ] loss = 1.23303, acc = 0.72775


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 120/200 ] loss = 0.45943, acc = 0.84922


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 120/200 ] loss = 1.28679, acc = 0.71221
[ Valid | 120/200 ] loss = 1.28679, acc = 0.71221


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 121/200 ] loss = 0.44072, acc = 0.85264


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 121/200 ] loss = 1.22704, acc = 0.72880
[ Valid | 121/200 ] loss = 1.22704, acc = 0.72880


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 122/200 ] loss = 0.44873, acc = 0.85742


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 122/200 ] loss = 1.18700, acc = 0.71013
[ Valid | 122/200 ] loss = 1.18700, acc = 0.71013


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 123/200 ] loss = 0.44865, acc = 0.85254


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 123/200 ] loss = 1.13448, acc = 0.73522
[ Valid | 123/200 ] loss = 1.13448, acc = 0.73522


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 124/200 ] loss = 0.43685, acc = 0.85898


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 124/200 ] loss = 1.08752, acc = 0.73938
[ Valid | 124/200 ] loss = 1.08752, acc = 0.73938 -> best
Best model found at epoch 123, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 125/200 ] loss = 0.45187, acc = 0.85029


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 125/200 ] loss = 1.06034, acc = 0.75189
[ Valid | 125/200 ] loss = 1.06034, acc = 0.75189 -> best
Best model found at epoch 124, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 126/200 ] loss = 0.45998, acc = 0.85117


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 126/200 ] loss = 1.09884, acc = 0.73235
[ Valid | 126/200 ] loss = 1.09884, acc = 0.73235


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 127/200 ] loss = 0.44501, acc = 0.85361


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 127/200 ] loss = 1.17328, acc = 0.72090
[ Valid | 127/200 ] loss = 1.17328, acc = 0.72090


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 128/200 ] loss = 0.45003, acc = 0.85195


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 128/200 ] loss = 1.31704, acc = 0.72811
[ Valid | 128/200 ] loss = 1.31704, acc = 0.72811


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 129/200 ] loss = 0.43422, acc = 0.85947


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 129/200 ] loss = 1.12687, acc = 0.74451
[ Valid | 129/200 ] loss = 1.12687, acc = 0.74451


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 130/200 ] loss = 0.42203, acc = 0.86279


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 130/200 ] loss = 1.34742, acc = 0.71586
[ Valid | 130/200 ] loss = 1.34742, acc = 0.71586


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 131/200 ] loss = 0.42853, acc = 0.85947


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 131/200 ] loss = 1.28456, acc = 0.72394
[ Valid | 131/200 ] loss = 1.28456, acc = 0.72394


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 132/200 ] loss = 0.41956, acc = 0.86553


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 132/200 ] loss = 1.15718, acc = 0.74677
[ Valid | 132/200 ] loss = 1.15718, acc = 0.74677


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 133/200 ] loss = 0.41699, acc = 0.86436


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 133/200 ] loss = 1.09111, acc = 0.74720
[ Valid | 133/200 ] loss = 1.09111, acc = 0.74720


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 134/200 ] loss = 0.38444, acc = 0.87734


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 134/200 ] loss = 1.08802, acc = 0.74772
[ Valid | 134/200 ] loss = 1.08802, acc = 0.74772


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 135/200 ] loss = 0.40278, acc = 0.87246


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 135/200 ] loss = 1.25961, acc = 0.70770
[ Valid | 135/200 ] loss = 1.25961, acc = 0.70770


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 136/200 ] loss = 0.39156, acc = 0.87256


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 136/200 ] loss = 1.26257, acc = 0.73175
[ Valid | 136/200 ] loss = 1.26257, acc = 0.73175


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 137/200 ] loss = 0.41180, acc = 0.86689


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 137/200 ] loss = 1.14346, acc = 0.73487
[ Valid | 137/200 ] loss = 1.14346, acc = 0.73487


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 138/200 ] loss = 0.40439, acc = 0.87012


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 138/200 ] loss = 1.26552, acc = 0.73244
[ Valid | 138/200 ] loss = 1.26552, acc = 0.73244


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 139/200 ] loss = 0.37521, acc = 0.87637


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 139/200 ] loss = 1.12899, acc = 0.73756
[ Valid | 139/200 ] loss = 1.12899, acc = 0.73756


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 140/200 ] loss = 0.39044, acc = 0.87764


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 140/200 ] loss = 1.13108, acc = 0.74190
[ Valid | 140/200 ] loss = 1.13108, acc = 0.74190


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 141/200 ] loss = 0.40418, acc = 0.87295


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 141/200 ] loss = 1.21013, acc = 0.72662
[ Valid | 141/200 ] loss = 1.21013, acc = 0.72662


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 142/200 ] loss = 0.39373, acc = 0.87412


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 142/200 ] loss = 1.19444, acc = 0.74061
[ Valid | 142/200 ] loss = 1.19444, acc = 0.74061


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 143/200 ] loss = 0.38196, acc = 0.87861


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 143/200 ] loss = 1.17867, acc = 0.73582
[ Valid | 143/200 ] loss = 1.17867, acc = 0.73582


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 144/200 ] loss = 0.37953, acc = 0.87461


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 144/200 ] loss = 1.16869, acc = 0.73461
[ Valid | 144/200 ] loss = 1.16869, acc = 0.73461


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 145/200 ] loss = 0.39778, acc = 0.87187


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 145/200 ] loss = 1.14099, acc = 0.74572
[ Valid | 145/200 ] loss = 1.14099, acc = 0.74572


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 146/200 ] loss = 0.38678, acc = 0.87383


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 146/200 ] loss = 1.29528, acc = 0.71882
[ Valid | 146/200 ] loss = 1.29528, acc = 0.71882


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 147/200 ] loss = 0.39735, acc = 0.87383


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 147/200 ] loss = 1.18988, acc = 0.73557
[ Valid | 147/200 ] loss = 1.18988, acc = 0.73557


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 148/200 ] loss = 0.36463, acc = 0.88066


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 148/200 ] loss = 1.14069, acc = 0.74894
[ Valid | 148/200 ] loss = 1.14069, acc = 0.74894


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 149/200 ] loss = 0.37740, acc = 0.87656


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 149/200 ] loss = 1.17009, acc = 0.74234
[ Valid | 149/200 ] loss = 1.17009, acc = 0.74234


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 150/200 ] loss = 0.37787, acc = 0.87832


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 150/200 ] loss = 1.20199, acc = 0.73739
[ Valid | 150/200 ] loss = 1.20199, acc = 0.73739


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 151/200 ] loss = 0.39077, acc = 0.87422


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 151/200 ] loss = 1.25005, acc = 0.72775
[ Valid | 151/200 ] loss = 1.25005, acc = 0.72775


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 152/200 ] loss = 0.37774, acc = 0.87656


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 152/200 ] loss = 1.15933, acc = 0.73088
[ Valid | 152/200 ] loss = 1.15933, acc = 0.73088


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 153/200 ] loss = 0.38870, acc = 0.87197


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 153/200 ] loss = 1.19870, acc = 0.73261
[ Valid | 153/200 ] loss = 1.19870, acc = 0.73261


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 154/200 ] loss = 0.37270, acc = 0.87969


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 154/200 ] loss = 1.17802, acc = 0.73678
[ Valid | 154/200 ] loss = 1.17802, acc = 0.73678


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 155/200 ] loss = 0.36350, acc = 0.87939


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 155/200 ] loss = 1.13889, acc = 0.75163
[ Valid | 155/200 ] loss = 1.13889, acc = 0.75163


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 156/200 ] loss = 0.36389, acc = 0.88232


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 156/200 ] loss = 1.23837, acc = 0.73383
[ Valid | 156/200 ] loss = 1.23837, acc = 0.73383


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 157/200 ] loss = 0.36055, acc = 0.88281


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 157/200 ] loss = 1.24293, acc = 0.73757
[ Valid | 157/200 ] loss = 1.24293, acc = 0.73757


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 158/200 ] loss = 0.35067, acc = 0.88740


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 158/200 ] loss = 1.14372, acc = 0.74728
[ Valid | 158/200 ] loss = 1.14372, acc = 0.74728


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 159/200 ] loss = 0.35906, acc = 0.88545


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 159/200 ] loss = 1.17218, acc = 0.74156
[ Valid | 159/200 ] loss = 1.17218, acc = 0.74156


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 160/200 ] loss = 0.36577, acc = 0.88486


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 160/200 ] loss = 1.28765, acc = 0.72349
[ Valid | 160/200 ] loss = 1.28765, acc = 0.72349


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 161/200 ] loss = 0.34593, acc = 0.88867


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 161/200 ] loss = 1.25988, acc = 0.73253
[ Valid | 161/200 ] loss = 1.25988, acc = 0.73253


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 162/200 ] loss = 0.35052, acc = 0.88711


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 162/200 ] loss = 1.23668, acc = 0.75154
[ Valid | 162/200 ] loss = 1.23668, acc = 0.75154


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 163/200 ] loss = 0.33455, acc = 0.89033


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 163/200 ] loss = 1.34658, acc = 0.73939
[ Valid | 163/200 ] loss = 1.34658, acc = 0.73939


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 164/200 ] loss = 0.33105, acc = 0.89463


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 164/200 ] loss = 1.37135, acc = 0.71803
[ Valid | 164/200 ] loss = 1.37135, acc = 0.71803


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 165/200 ] loss = 0.36230, acc = 0.88398


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 165/200 ] loss = 1.36407, acc = 0.73305
[ Valid | 165/200 ] loss = 1.36407, acc = 0.73305


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 166/200 ] loss = 0.33145, acc = 0.88848


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 166/200 ] loss = 1.30521, acc = 0.73504
[ Valid | 166/200 ] loss = 1.30521, acc = 0.73504


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 167/200 ] loss = 0.33713, acc = 0.89131


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 167/200 ] loss = 1.23542, acc = 0.74529
[ Valid | 167/200 ] loss = 1.23542, acc = 0.74529


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 168/200 ] loss = 0.34060, acc = 0.88955


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 168/200 ] loss = 1.10364, acc = 0.75623
[ Valid | 168/200 ] loss = 1.10364, acc = 0.75623 -> best
Best model found at epoch 167, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 169/200 ] loss = 0.33418, acc = 0.89326


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 169/200 ] loss = 1.31351, acc = 0.74295
[ Valid | 169/200 ] loss = 1.31351, acc = 0.74295


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 170/200 ] loss = 0.33592, acc = 0.89092


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 170/200 ] loss = 1.22511, acc = 0.75528
[ Valid | 170/200 ] loss = 1.22511, acc = 0.75528


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 171/200 ] loss = 0.33129, acc = 0.89678


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 171/200 ] loss = 1.26067, acc = 0.74876
[ Valid | 171/200 ] loss = 1.26067, acc = 0.74876


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 172/200 ] loss = 0.35131, acc = 0.88428


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 172/200 ] loss = 1.28268, acc = 0.73184
[ Valid | 172/200 ] loss = 1.28268, acc = 0.73184


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 173/200 ] loss = 0.33687, acc = 0.89053


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 173/200 ] loss = 1.13245, acc = 0.75840
[ Valid | 173/200 ] loss = 1.13245, acc = 0.75840 -> best
Best model found at epoch 172, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 174/200 ] loss = 0.30567, acc = 0.90273


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 174/200 ] loss = 1.19990, acc = 0.75076
[ Valid | 174/200 ] loss = 1.19990, acc = 0.75076


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 175/200 ] loss = 0.34177, acc = 0.88984


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 175/200 ] loss = 1.32073, acc = 0.73869
[ Valid | 175/200 ] loss = 1.32073, acc = 0.73869


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 176/200 ] loss = 0.36797, acc = 0.88135


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 176/200 ] loss = 1.25145, acc = 0.72324
[ Valid | 176/200 ] loss = 1.25145, acc = 0.72324


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 177/200 ] loss = 0.33820, acc = 0.89229


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 177/200 ] loss = 1.21309, acc = 0.75111
[ Valid | 177/200 ] loss = 1.21309, acc = 0.75111


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 178/200 ] loss = 0.34005, acc = 0.89287


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 178/200 ] loss = 1.18000, acc = 0.74895
[ Valid | 178/200 ] loss = 1.18000, acc = 0.74895


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 179/200 ] loss = 0.31453, acc = 0.89844


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 179/200 ] loss = 1.40140, acc = 0.72567
[ Valid | 179/200 ] loss = 1.40140, acc = 0.72567


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 180/200 ] loss = 0.31553, acc = 0.90098


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 180/200 ] loss = 1.50587, acc = 0.72949
[ Valid | 180/200 ] loss = 1.50587, acc = 0.72949


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 181/200 ] loss = 0.32420, acc = 0.89541


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 181/200 ] loss = 1.31895, acc = 0.73522
[ Valid | 181/200 ] loss = 1.31895, acc = 0.73522


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 182/200 ] loss = 0.33247, acc = 0.88994


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 182/200 ] loss = 1.29790, acc = 0.73973
[ Valid | 182/200 ] loss = 1.29790, acc = 0.73973


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 183/200 ] loss = 0.32443, acc = 0.89639


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 183/200 ] loss = 1.18514, acc = 0.74659
[ Valid | 183/200 ] loss = 1.18514, acc = 0.74659


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 184/200 ] loss = 0.31705, acc = 0.89785


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 184/200 ] loss = 1.14616, acc = 0.75119
[ Valid | 184/200 ] loss = 1.14616, acc = 0.75119


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 185/200 ] loss = 0.32528, acc = 0.89668


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 185/200 ] loss = 1.18264, acc = 0.74650
[ Valid | 185/200 ] loss = 1.18264, acc = 0.74650


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 186/200 ] loss = 0.33319, acc = 0.88945


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 186/200 ] loss = 1.23461, acc = 0.74929
[ Valid | 186/200 ] loss = 1.23461, acc = 0.74929


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 187/200 ] loss = 0.31431, acc = 0.90000


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 187/200 ] loss = 1.11587, acc = 0.74929
[ Valid | 187/200 ] loss = 1.11587, acc = 0.74929


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 188/200 ] loss = 0.30661, acc = 0.90615


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 188/200 ] loss = 1.63578, acc = 0.71830
[ Valid | 188/200 ] loss = 1.63578, acc = 0.71830


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 189/200 ] loss = 0.31834, acc = 0.89482


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 189/200 ] loss = 1.24141, acc = 0.73669
[ Valid | 189/200 ] loss = 1.24141, acc = 0.73669


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 190/200 ] loss = 0.33704, acc = 0.89150


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 190/200 ] loss = 1.19937, acc = 0.74789
[ Valid | 190/200 ] loss = 1.19937, acc = 0.74789


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 191/200 ] loss = 0.31287, acc = 0.90303


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 191/200 ] loss = 1.17068, acc = 0.73487
[ Valid | 191/200 ] loss = 1.17068, acc = 0.73487


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 192/200 ] loss = 0.30114, acc = 0.90420


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 192/200 ] loss = 1.20883, acc = 0.75102
[ Valid | 192/200 ] loss = 1.20883, acc = 0.75102


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 193/200 ] loss = 0.31657, acc = 0.89912


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 193/200 ] loss = 1.33561, acc = 0.74911
[ Valid | 193/200 ] loss = 1.33561, acc = 0.74911


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 194/200 ] loss = 0.31018, acc = 0.90029


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 194/200 ] loss = 1.24795, acc = 0.74251
[ Valid | 194/200 ] loss = 1.24795, acc = 0.74251


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 195/200 ] loss = 0.33532, acc = 0.89092


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 195/200 ] loss = 1.27135, acc = 0.74260
[ Valid | 195/200 ] loss = 1.27135, acc = 0.74260


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 196/200 ] loss = 0.31540, acc = 0.89727


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 196/200 ] loss = 1.18495, acc = 0.74676
[ Valid | 196/200 ] loss = 1.18495, acc = 0.74676


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 197/200 ] loss = 0.31406, acc = 0.89922


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 197/200 ] loss = 1.18972, acc = 0.75388
[ Valid | 197/200 ] loss = 1.18972, acc = 0.75388


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 198/200 ] loss = 0.31708, acc = 0.89668


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 198/200 ] loss = 1.17156, acc = 0.73938
[ Valid | 198/200 ] loss = 1.17156, acc = 0.73938


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 199/200 ] loss = 0.30369, acc = 0.89932


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 199/200 ] loss = 1.31890, acc = 0.75840
[ Valid | 199/200 ] loss = 1.31890, acc = 0.75840 -> best
Best model found at epoch 198, saving model


  0%|          | 0/40 [00:00<?, ?it/s]

[ Train | 200/200 ] loss = 0.29596, acc = 0.90420


  0%|          | 0/15 [00:00<?, ?it/s]

[ Valid | 200/200 ] loss = 1.34637, acc = 0.73097
[ Valid | 200/200 ] loss = 1.34637, acc = 0.73097


# Dataloader for test

In [9]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("./test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [11]:
model_best = Classifier().to(device)
#model_best = models.vgg16(weights=False).to(device)
#model_best = models.resnet18().to(device)
#model_best = models.resnet50().to(device)

model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  0%|          | 0/12 [00:00<?, ?it/s]

In [12]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

# ensemble

In [13]:
# ensemble
import os
import csv
import pandas as pd
import numpy as np
from scipy import stats
from collections import Counter
#result_list = os.listdir('./')
#result_list
candidate_1= pd.read_csv("submission1_kaggle0.706.csv") #validation accuracy: 0.83067
category_1 = candidate_1['Category']
id_1=candidate_1['Id']
candidate_2 = pd.read_csv("submission2_kaggle0.82733.csv") #validation accuracy: 0.83733
category_2 = candidate_2['Category']
id_2=candidate_2['Id']
candidate_3 = pd.read_csv("submission3_.csv") #validation accuracy: 0.85267
category_3 = candidate_3['Category']
id_3=candidate_3['Id']
with open("final.csv", "w") as f:
    f.write("Id,Category\n")
    for i in range(len(category_1)):
        id_num = [category_1[i], category_2[i], category_3[i]]
        #https://www.796t.com/content/1545109566.html
        count = np.bincount(id_num)
        #https://www.delftstack.com/zh-tw/api/numpy/python-numpy-argmax/
        #https://www.cjavapy.com/article/1021/
        #https://zhuanlan.zhihu.com/p/364342349
        result = np.argmax(count)
        f.write(f"{i},{result}\n")

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [None]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You can add some transforms here.
    
    #ref:https://zhuanlan.zhihu.com/p/483694635
    #https://blog.csdn.net/Miao_sin/article/details/122474968
    transforms.RandomHorizontalFlip(p=1),   # 随机水平翻转
    transforms.RandomVerticalFlip(p=1),     # 随机上下翻转
    transforms.RandomGrayscale(0.5), # 随机灰度化
    transforms.RandomSolarize(threshold=192.0),
    transforms.ColorJitter(brightness=.5,hue=0.5), # 改变图像的亮度和饱和度
    transforms.RandomRotation(degrees=(0, 180)), # 图像随机旋转
    transforms.RandomInvert(),# 改变图像的颜色
    
    transforms.ToTensor(),
])

# Q2. Visual Representations Implementation
## Visualize the learned visual representations of the CNN model on the validation set by implementing t-SNE (t-distributed Stochastic Neighbor Embedding) on the output of both top & mid layers (You need to submit 2 images). 


In [None]:
import torch
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from tqdm import tqdm
import matplotlib.cm as cm
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the trained model
model = Classifier().to(device)
#model = models.resnet18().to(device)

state_dict = torch.load(f"{_exp_name}_best.ckpt")
model.load_state_dict(state_dict)
model.eval()

print(model)

In [None]:
# Load the vaildation set defined by TA
valid_set = FoodDataset("./valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

# Extract the representations for the specific layer of model
index = 10 # You should find out the index of layer which is defined as "top" or 'mid' layer of your model.
features = []
labels = []
for batch in tqdm(valid_loader):
    imgs, lbls = batch
    with torch.no_grad():
        logits = model.cnn[:index](imgs.to(device))
        logits = logits.view(logits.size()[0], -1)
    labels.extend(lbls.cpu().numpy())
    logits = np.squeeze(logits.cpu().numpy())
    features.extend(logits)
    
features = np.array(features)
colors_per_class = cm.rainbow(np.linspace(0, 1, 11))

# Apply t-SNE to the features
features_tsne = TSNE(n_components=2, init='pca', random_state=42).fit_transform(features)

# Plot the t-SNE visualization
plt.figure(figsize=(10, 8))
for label in np.unique(labels):
    plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=label, s=5)
plt.legend()
plt.show()

In [None]:
#ref:
#https://zhuanlan.zhihu.com/p/488970185
#