In [29]:
import torch 
from transformers import ViTFeatureExtractor, ViTForImageClassification
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np 
from PIL import Image
import pandas as pd
import os 
# import wandb
from tqdm import tqdm
from functools import partial
classes = os.listdir("../input/stanford-car-dataset-by-classes-folder/car_data/car_data/train")
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

In [30]:
feature_extractor

ViTFeatureExtractor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_processor_type": "ViTFeatureExtractor",
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

In [31]:
train_dir = "../input/stanford-car-dataset-by-classes-folder/car_data/car_data/train"
test_dir = "../input/stanford-car-dataset-by-classes-folder/car_data/car_data/test"

In [32]:
class Standford_Car_Dataset(Dataset):
    
    def __init__(self, root, feature_extractor, labels):
        self.feature_extractor = feature_extractor 
        self.files = []
        self.labels = labels
        
        subdirs = sorted(os.listdir(root))
        
        for subdir in subdirs:
            for path, _, files in os.walk(os.path.join(root, subdir)):
                for file in files:
                    self.files.append((os.path.join(root, subdir, file), subdir))
    
    def __getitem__(self, index):
        img, label = self.files[index % len(self.files)]
        img = Image.open(img).convert("RGB")
        img = self.feature_extractor(img, return_tensors="pt")
        img["labels"] = torch.tensor(self.labels.index(label))
        return img
    
    def __len__(self):
        return len(self.files)

In [33]:
train_dataset = Standford_Car_Dataset(train_dir, feature_extractor, classes)
test_dataset = Standford_Car_Dataset(test_dir, feature_extractor, classes)
test_dataset, val_dataset = torch.utils.data.random_split(test_dataset, [6041, 2000])

In [34]:
batch_size = 32
num_workers = 2

train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)

In [35]:
batch = next(iter(train_loader))
batch_images = batch["pixel_values"]
batch_labels = batch["labels"]

In [36]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
num_epochs = 10
lr = 2e-4
eval_steps = 100
record_steps= 10
save_checkpoint = 5

model_V = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', num_labels=len(classes),
                                                   id2label={str(i): c for i, c in enumerate(classes)},
                                                   label2id={c: str(i) for i, c in enumerate(classes)},
                                                   ignore_mismatched_sizes=True).to(device)

optim = torch.optim.AdamW(model_V.parameters(), lr=lr)

training_stats_step = 10

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([196, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([196]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [37]:
from torch.optim.lr_scheduler import StepLR

scheduler = StepLR(optim, step_size=1, gamma=0.6)

In [38]:
def evaluate(model, eval_loader, eval_dataset):
    correct = 0
    eval_loss = []
    
    model.eval()
    for batch in tqdm(eval_loader):
        # extracting images and labels from batch 
        batch_images = batch["pixel_values"].squeeze(1).to(device)
        batch_labels = batch["labels"].to(device)
        
        # not training the model
        with torch.no_grad():
            outputs = model(pixel_values=batch_images, labels=batch_labels)
            loss = outputs[0]
            eval_loss.append(loss.item())
            correct += (torch.argmax(outputs["logits"], dim=1) == batch_labels).sum().item()
    
    # return eval accuracy and loss
    accuracy = (100 * correct / len(eval_dataset))
    avg_loss = (sum(eval_loss) / len(epoch_loss))
    
    return accuracy, avg_loss

In [39]:
tqdm = partial(tqdm, position=0, leave=True)

In [40]:
for epoch in range(1, num_epochs+1):
    # storing loss and accuracy across the epoch
    epoch_loss = []
    epoch_acc = []
    
    print(f"Epoch {epoch}")
    for index, batch in enumerate(tqdm(train_loader)):
        model_V.train()
        optim.zero_grad()

        # extract images and labels from batch
        batch_images = batch["pixel_values"].squeeze(1).to(device)
        batch_labels = batch["labels"].to(device)
        size = len(batch_images)
        
        outputs = model_V(pixel_values=batch_images, labels=batch_labels)
        
        loss = outputs[0]
        epoch_loss.append(loss.item())
        loss.backward()
        optim.step()
        
        correct = (torch.argmax(outputs["logits"], dim=1) == batch_labels).sum().item()
        acc = (100 * correct) / size
        epoch_acc.append(acc)
        
        # log the training metrics
        if index % record_steps == 0:
            print({'loss': loss, "acc" : acc})
        
    
    # calculate summary stats for each epoch 
    avg_accuracy = (sum(epoch_acc) / len(epoch_acc))
    avg_loss = (sum(epoch_loss) / len(epoch_loss))
    
    # we decay the loss over time 
    scheduler.step()
    
    # save checkpoints using torchscript 
    if epoch % save_checkpoint == 0:
        model_V.save_pretrained(f"Epoch {epoch}")
    
    # finding validation accuracy and loss
    val_acc, val_loss = evaluate(model_V, val_loader, val_dataset)
    print({'validation accuracy': val_acc, "validation loss" : val_loss})
    
    # summary stats at the end of the episode
    print("evaluating on validation set")
    print(f"val loss: {round(val_loss, 4)}, val acc: {round(val_acc, 4)}%")
    print(f"Epoch: {epoch}  avg loss: {round(avg_loss, 4)} avg acc: {round(avg_accuracy, 4)}%")

Epoch 1


  0%|          | 1/255 [00:01<07:54,  1.87s/it]

{'loss': tensor(5.3355, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 0.0}


  4%|▍         | 11/255 [00:12<04:23,  1.08s/it]

{'loss': tensor(5.6026, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 0.0}


  8%|▊         | 21/255 [00:23<04:17,  1.10s/it]

{'loss': tensor(5.3620, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 0.0}


 12%|█▏        | 31/255 [00:34<04:12,  1.13s/it]

{'loss': tensor(5.1223, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 0.0}


 16%|█▌        | 41/255 [00:46<04:07,  1.16s/it]

{'loss': tensor(4.7281, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 3.125}


 20%|██        | 51/255 [00:57<03:46,  1.11s/it]

{'loss': tensor(4.8372, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 3.125}


 24%|██▍       | 61/255 [01:08<03:29,  1.08s/it]

{'loss': tensor(4.7392, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 0.0}


 28%|██▊       | 71/255 [01:18<03:19,  1.08s/it]

{'loss': tensor(4.4828, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 12.5}


 32%|███▏      | 81/255 [01:29<03:06,  1.07s/it]

{'loss': tensor(4.2196, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 12.5}


 36%|███▌      | 91/255 [01:40<02:55,  1.07s/it]

{'loss': tensor(3.9833, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 12.5}


 40%|███▉      | 101/255 [01:51<02:49,  1.10s/it]

{'loss': tensor(3.7568, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 18.75}


 44%|████▎     | 111/255 [02:02<02:38,  1.10s/it]

{'loss': tensor(3.8740, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 9.375}


 47%|████▋     | 121/255 [02:13<02:27,  1.10s/it]

{'loss': tensor(3.4481, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 25.0}


 51%|█████▏    | 131/255 [02:24<02:16,  1.10s/it]

{'loss': tensor(3.8111, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 9.375}


 55%|█████▌    | 141/255 [02:35<02:03,  1.08s/it]

{'loss': tensor(3.0796, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 31.25}


 59%|█████▉    | 151/255 [02:45<01:52,  1.08s/it]

{'loss': tensor(2.6918, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 34.375}


 63%|██████▎   | 161/255 [02:56<01:42,  1.09s/it]

{'loss': tensor(3.0471, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 31.25}


 67%|██████▋   | 171/255 [03:07<01:30,  1.08s/it]

{'loss': tensor(2.7752, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 46.875}


 71%|███████   | 181/255 [03:18<01:20,  1.08s/it]

{'loss': tensor(2.7600, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 28.125}


 75%|███████▍  | 191/255 [03:29<01:10,  1.10s/it]

{'loss': tensor(2.3012, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 40.625}


 79%|███████▉  | 201/255 [03:40<00:58,  1.09s/it]

{'loss': tensor(2.3409, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 40.625}


 83%|████████▎ | 211/255 [03:51<00:47,  1.08s/it]

{'loss': tensor(2.1870, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 56.25}


 87%|████████▋ | 221/255 [04:02<00:36,  1.09s/it]

{'loss': tensor(2.2173, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 56.25}


 91%|█████████ | 231/255 [04:12<00:26,  1.08s/it]

{'loss': tensor(1.6308, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 75.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.08s/it]

{'loss': tensor(1.9984, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 59.375}


 98%|█████████▊| 251/255 [04:34<00:04,  1.08s/it]

{'loss': tensor(1.9375, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 65.625}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:32<00:00,  1.94it/s]


{'validation accuracy': 57.1, 'validation loss': 0.46106116163964367}
evaluating on validation set
val loss: 0.4611, val acc: 57.1%
Epoch: 1  avg loss: 3.5172 avg acc: 26.1642%
Epoch 2


  0%|          | 1/255 [00:02<08:30,  2.01s/it]

{'loss': tensor(1.3903, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 87.5}


  4%|▍         | 11/255 [00:13<04:31,  1.11s/it]

{'loss': tensor(1.1380, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 75.0}


  8%|▊         | 21/255 [00:24<04:21,  1.12s/it]

{'loss': tensor(1.2982, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 59.375}


 12%|█▏        | 31/255 [00:35<04:05,  1.10s/it]

{'loss': tensor(1.0852, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 81.25}


 16%|█▌        | 41/255 [00:45<03:50,  1.08s/it]

{'loss': tensor(1.2575, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 65.625}


 20%|██        | 51/255 [00:56<03:41,  1.09s/it]

{'loss': tensor(1.1635, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 81.25}


 24%|██▍       | 61/255 [01:07<03:28,  1.08s/it]

{'loss': tensor(0.8605, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 84.375}


 28%|██▊       | 71/255 [01:18<03:19,  1.08s/it]

{'loss': tensor(0.7890, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 84.375}


 32%|███▏      | 81/255 [01:29<03:11,  1.10s/it]

{'loss': tensor(0.8717, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 84.375}


 36%|███▌      | 91/255 [01:40<02:59,  1.09s/it]

{'loss': tensor(0.7813, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 90.625}


 40%|███▉      | 101/255 [01:51<02:47,  1.09s/it]

{'loss': tensor(0.6735, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:02<02:38,  1.10s/it]

{'loss': tensor(1.0801, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 78.125}


 47%|████▋     | 121/255 [02:13<02:26,  1.09s/it]

{'loss': tensor(1.0012, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 81.25}


 51%|█████▏    | 131/255 [02:23<02:14,  1.08s/it]

{'loss': tensor(0.6965, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 87.5}


 55%|█████▌    | 141/255 [02:34<02:03,  1.09s/it]

{'loss': tensor(1.0741, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 78.125}


 59%|█████▉    | 151/255 [02:45<01:52,  1.08s/it]

{'loss': tensor(0.7803, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 84.375}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.5639, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 67%|██████▋   | 171/255 [03:07<01:31,  1.09s/it]

{'loss': tensor(0.4945, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:18<01:19,  1.08s/it]

{'loss': tensor(0.6914, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 81.25}


 75%|███████▍  | 191/255 [03:29<01:09,  1.09s/it]

{'loss': tensor(0.5057, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 93.75}


 79%|███████▉  | 201/255 [03:40<00:59,  1.10s/it]

{'loss': tensor(0.4846, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 90.625}


 83%|████████▎ | 211/255 [03:51<00:48,  1.10s/it]

{'loss': tensor(0.5472, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 87.5}


 87%|████████▋ | 221/255 [04:02<00:37,  1.09s/it]

{'loss': tensor(0.8602, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 81.25}


 91%|█████████ | 231/255 [04:13<00:26,  1.09s/it]

{'loss': tensor(0.6885, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 87.5}


 95%|█████████▍| 241/255 [04:24<00:15,  1.10s/it]

{'loss': tensor(0.4457, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.09s/it]

{'loss': tensor(0.4654, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 90.625}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:30<00:00,  2.04it/s]


{'validation accuracy': 77.55, 'validation loss': 0.24101463042053523}
evaluating on validation set
val loss: 0.241, val acc: 77.55%
Epoch: 2  avg loss: 0.8461 avg acc: 85.0123%
Epoch 3


  0%|          | 1/255 [00:02<08:52,  2.09s/it]

{'loss': tensor(0.2285, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


  4%|▍         | 11/255 [00:12<04:28,  1.10s/it]

{'loss': tensor(0.1903, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:23<04:15,  1.09s/it]

{'loss': tensor(0.2679, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 12%|█▏        | 31/255 [00:34<04:06,  1.10s/it]

{'loss': tensor(0.2195, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 16%|█▌        | 41/255 [00:45<03:52,  1.09s/it]

{'loss': tensor(0.2156, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:39,  1.08s/it]

{'loss': tensor(0.2150, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 93.75}


 24%|██▍       | 61/255 [01:07<03:30,  1.08s/it]

{'loss': tensor(0.1968, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 28%|██▊       | 71/255 [01:18<03:19,  1.08s/it]

{'loss': tensor(0.1959, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:09,  1.09s/it]

{'loss': tensor(0.2300, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 36%|███▌      | 91/255 [01:40<03:00,  1.10s/it]

{'loss': tensor(0.1321, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 40%|███▉      | 101/255 [01:51<02:47,  1.09s/it]

{'loss': tensor(0.2313, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.1345, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:25,  1.09s/it]

{'loss': tensor(0.1603, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:13,  1.08s/it]

{'loss': tensor(0.1686, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 55%|█████▌    | 141/255 [02:34<02:03,  1.09s/it]

{'loss': tensor(0.1687, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:51,  1.08s/it]

{'loss': tensor(0.0891, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.2583, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 93.75}


 67%|██████▋   | 171/255 [03:06<01:30,  1.08s/it]

{'loss': tensor(0.1711, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:19,  1.08s/it]

{'loss': tensor(0.1489, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:28<01:09,  1.08s/it]

{'loss': tensor(0.1260, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:59,  1.10s/it]

{'loss': tensor(0.1337, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:50<00:47,  1.09s/it]

{'loss': tensor(0.0929, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:37,  1.09s/it]

{'loss': tensor(0.1386, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.1431, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.10s/it]

{'loss': tensor(0.1317, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.09s/it]

{'loss': tensor(0.0979, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


{'validation accuracy': 82.6, 'validation loss': 0.18519987580822964}
evaluating on validation set
val loss: 0.1852, val acc: 82.6%
Epoch: 3  avg loss: 0.1685 avg acc: 98.3211%
Epoch 4


  0%|          | 1/255 [00:02<08:31,  2.01s/it]

{'loss': tensor(0.0827, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:13<04:33,  1.12s/it]

{'loss': tensor(0.0762, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:24<04:15,  1.09s/it]

{'loss': tensor(0.0606, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:34<04:02,  1.08s/it]

{'loss': tensor(0.0598, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:54,  1.09s/it]

{'loss': tensor(0.0562, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:42,  1.09s/it]

{'loss': tensor(0.0536, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:29,  1.08s/it]

{'loss': tensor(0.0586, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:19,  1.09s/it]

{'loss': tensor(0.0485, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:09,  1.09s/it]

{'loss': tensor(0.0431, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:39<02:58,  1.09s/it]

{'loss': tensor(0.1063, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 40%|███▉      | 101/255 [01:50<02:47,  1.09s/it]

{'loss': tensor(0.0516, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0739, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:26,  1.09s/it]

{'loss': tensor(0.0940, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:16,  1.10s/it]

{'loss': tensor(0.0555, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:03,  1.08s/it]

{'loss': tensor(0.0519, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:53,  1.09s/it]

{'loss': tensor(0.0649, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.0494, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:30,  1.08s/it]

{'loss': tensor(0.0614, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:20,  1.09s/it]

{'loss': tensor(0.0700, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 75%|███████▍  | 191/255 [03:28<01:09,  1.08s/it]

{'loss': tensor(0.0407, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:58,  1.09s/it]

{'loss': tensor(0.0610, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:50<00:47,  1.07s/it]

{'loss': tensor(0.0701, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:36,  1.09s/it]

{'loss': tensor(0.0431, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.0617, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.08s/it]

{'loss': tensor(0.0724, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:33<00:04,  1.08s/it]

{'loss': tensor(0.0592, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:37<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.02it/s]


{'validation accuracy': 83.25, 'validation loss': 0.17475214460316826}
evaluating on validation set
val loss: 0.1748, val acc: 83.25%
Epoch: 4  avg loss: 0.0603 avg acc: 99.7181%
Epoch 5


  0%|          | 1/255 [00:01<08:05,  1.91s/it]

{'loss': tensor(0.0326, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:12<04:28,  1.10s/it]

{'loss': tensor(0.0758, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


  8%|▊         | 21/255 [00:23<04:18,  1.11s/it]

{'loss': tensor(0.0557, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:34<04:04,  1.09s/it]

{'loss': tensor(0.1066, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 16%|█▌        | 41/255 [00:45<03:53,  1.09s/it]

{'loss': tensor(0.0472, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:40,  1.08s/it]

{'loss': tensor(0.0442, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:32,  1.09s/it]

{'loss': tensor(0.0375, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:18,  1.08s/it]

{'loss': tensor(0.0383, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:09,  1.09s/it]

{'loss': tensor(0.0449, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:40<02:58,  1.09s/it]

{'loss': tensor(0.0350, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:50<02:47,  1.09s/it]

{'loss': tensor(0.0364, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0336, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:26,  1.09s/it]

{'loss': tensor(0.0977, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 51%|█████▏    | 131/255 [02:23<02:14,  1.09s/it]

{'loss': tensor(0.0429, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:04,  1.09s/it]

{'loss': tensor(0.0323, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:51,  1.07s/it]

{'loss': tensor(0.0442, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.0391, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:31,  1.09s/it]

{'loss': tensor(0.0340, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:20,  1.09s/it]

{'loss': tensor(0.0359, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:28<01:09,  1.08s/it]

{'loss': tensor(0.0307, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:59,  1.09s/it]

{'loss': tensor(0.0691, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 83%|████████▎ | 211/255 [03:50<00:47,  1.09s/it]

{'loss': tensor(0.0365, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:37,  1.09s/it]

{'loss': tensor(0.0488, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.0328, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.08s/it]

{'loss': tensor(0.0549, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.09s/it]

{'loss': tensor(0.0317, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.00it/s]


{'validation accuracy': 83.65, 'validation loss': 0.17033157594063703}
evaluating on validation set
val loss: 0.1703, val acc: 83.65%
Epoch: 5  avg loss: 0.0405 avg acc: 99.7549%
Epoch 6


  0%|          | 1/255 [00:02<08:37,  2.04s/it]

{'loss': tensor(0.0376, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:13<04:30,  1.11s/it]

{'loss': tensor(0.0380, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:24<04:17,  1.10s/it]

{'loss': tensor(0.0280, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:35<04:05,  1.10s/it]

{'loss': tensor(0.0454, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:52,  1.09s/it]

{'loss': tensor(0.0260, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:39,  1.08s/it]

{'loss': tensor(0.0695, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 24%|██▍       | 61/255 [01:07<03:31,  1.09s/it]

{'loss': tensor(0.0298, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:17,  1.08s/it]

{'loss': tensor(0.0339, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:08,  1.08s/it]

{'loss': tensor(0.0284, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:40<02:58,  1.09s/it]

{'loss': tensor(0.0303, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:50<02:47,  1.09s/it]

{'loss': tensor(0.0361, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0476, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:25,  1.09s/it]

{'loss': tensor(0.0280, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:14,  1.08s/it]

{'loss': tensor(0.0411, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:03,  1.08s/it]

{'loss': tensor(0.0543, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 59%|█████▉    | 151/255 [02:45<01:52,  1.09s/it]

{'loss': tensor(0.0263, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.0313, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:30,  1.08s/it]

{'loss': tensor(0.0251, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:19,  1.08s/it]

{'loss': tensor(0.0344, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:28<01:09,  1.09s/it]

{'loss': tensor(0.0336, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:58,  1.08s/it]

{'loss': tensor(0.0305, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:50<00:48,  1.10s/it]

{'loss': tensor(0.0374, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:36,  1.08s/it]

{'loss': tensor(0.0474, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.0292, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.08s/it]

{'loss': tensor(0.0201, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:33<00:04,  1.08s/it]

{'loss': tensor(0.0329, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:37<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.00it/s]


{'validation accuracy': 84.0, 'validation loss': 0.1681934411619224}
evaluating on validation set
val loss: 0.1682, val acc: 84.0%
Epoch: 6  avg loss: 0.033 avg acc: 99.8039%
Epoch 7


  0%|          | 1/255 [00:02<09:27,  2.23s/it]

{'loss': tensor(0.0277, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:13<04:28,  1.10s/it]

{'loss': tensor(0.0590, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


  8%|▊         | 21/255 [00:24<04:16,  1.10s/it]

{'loss': tensor(0.0263, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:35<04:03,  1.09s/it]

{'loss': tensor(0.0269, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:51,  1.08s/it]

{'loss': tensor(0.0253, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:40,  1.08s/it]

{'loss': tensor(0.0257, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:30,  1.09s/it]

{'loss': tensor(0.0240, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:19,  1.08s/it]

{'loss': tensor(0.0500, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 32%|███▏      | 81/255 [01:29<03:08,  1.09s/it]

{'loss': tensor(0.0348, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:40<02:57,  1.08s/it]

{'loss': tensor(0.0264, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:51<02:47,  1.09s/it]

{'loss': tensor(0.0289, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:37,  1.09s/it]

{'loss': tensor(0.0265, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:25,  1.09s/it]

{'loss': tensor(0.0224, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:15,  1.09s/it]

{'loss': tensor(0.0265, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:03,  1.09s/it]

{'loss': tensor(0.0305, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:52,  1.08s/it]

{'loss': tensor(0.0294, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:41,  1.08s/it]

{'loss': tensor(0.0252, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:30,  1.07s/it]

{'loss': tensor(0.0236, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:19,  1.08s/it]

{'loss': tensor(0.0321, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:28<01:09,  1.09s/it]

{'loss': tensor(0.0246, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:58,  1.08s/it]

{'loss': tensor(0.0223, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:50<00:47,  1.09s/it]

{'loss': tensor(0.0251, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:37,  1.09s/it]

{'loss': tensor(0.0261, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.0631, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 95%|█████████▍| 241/255 [04:23<00:15,  1.09s/it]

{'loss': tensor(0.0248, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:33<00:04,  1.09s/it]

{'loss': tensor(0.0273, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:37<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


{'validation accuracy': 83.9, 'validation loss': 0.16752131628055197}
evaluating on validation set
val loss: 0.1675, val acc: 83.9%
Epoch: 7  avg loss: 0.0292 avg acc: 99.8039%
Epoch 8


  0%|          | 1/255 [00:01<08:24,  1.98s/it]

{'loss': tensor(0.0197, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:12<04:28,  1.10s/it]

{'loss': tensor(0.0462, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:23<04:16,  1.10s/it]

{'loss': tensor(0.0212, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:34<04:03,  1.09s/it]

{'loss': tensor(0.0262, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:50,  1.08s/it]

{'loss': tensor(0.0236, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:42,  1.09s/it]

{'loss': tensor(0.0321, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:30,  1.08s/it]

{'loss': tensor(0.0200, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:20,  1.09s/it]

{'loss': tensor(0.0261, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:09,  1.09s/it]

{'loss': tensor(0.0250, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:39<02:57,  1.08s/it]

{'loss': tensor(0.0309, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:50<02:48,  1.10s/it]

{'loss': tensor(0.0256, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0276, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:24,  1.08s/it]

{'loss': tensor(0.0217, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:14,  1.09s/it]

{'loss': tensor(0.0245, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:03,  1.09s/it]

{'loss': tensor(0.0231, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:51,  1.08s/it]

{'loss': tensor(0.0225, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:42,  1.09s/it]

{'loss': tensor(0.0215, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:06<01:31,  1.09s/it]

{'loss': tensor(0.0236, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:17<01:20,  1.09s/it]

{'loss': tensor(0.0270, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:28<01:09,  1.09s/it]

{'loss': tensor(0.0241, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:39<00:58,  1.08s/it]

{'loss': tensor(0.0264, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:50<00:47,  1.08s/it]

{'loss': tensor(0.0208, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:36,  1.08s/it]

{'loss': tensor(0.0229, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 91%|█████████ | 231/255 [04:12<00:26,  1.09s/it]

{'loss': tensor(0.0263, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.09s/it]

{'loss': tensor(0.0288, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.09s/it]

{'loss': tensor(0.0239, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:37<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.01it/s]


{'validation accuracy': 84.05, 'validation loss': 0.16714772570366954}
evaluating on validation set
val loss: 0.1671, val acc: 84.05%
Epoch: 8  avg loss: 0.027 avg acc: 99.8039%
Epoch 9


  0%|          | 1/255 [00:02<08:32,  2.02s/it]

{'loss': tensor(0.0260, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:12<04:28,  1.10s/it]

{'loss': tensor(0.0229, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:24<04:18,  1.10s/it]

{'loss': tensor(0.0212, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 12%|█▏        | 31/255 [00:35<04:05,  1.10s/it]

{'loss': tensor(0.0251, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:51,  1.08s/it]

{'loss': tensor(0.0237, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:39,  1.08s/it]

{'loss': tensor(0.0185, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:31,  1.09s/it]

{'loss': tensor(0.0193, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:18,  1.08s/it]

{'loss': tensor(0.0222, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:08,  1.09s/it]

{'loss': tensor(0.0231, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:40<02:58,  1.09s/it]

{'loss': tensor(0.0196, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:51<02:48,  1.10s/it]

{'loss': tensor(0.0254, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0223, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:25,  1.09s/it]

{'loss': tensor(0.0291, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:13,  1.08s/it]

{'loss': tensor(0.0245, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:04,  1.09s/it]

{'loss': tensor(0.0536, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 59%|█████▉    | 151/255 [02:45<01:53,  1.09s/it]

{'loss': tensor(0.0246, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:42,  1.09s/it]

{'loss': tensor(0.0242, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:31,  1.09s/it]

{'loss': tensor(0.0233, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:18<01:20,  1.09s/it]

{'loss': tensor(0.0233, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:29<01:09,  1.09s/it]

{'loss': tensor(0.0251, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:40<00:59,  1.10s/it]

{'loss': tensor(0.0199, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 83%|████████▎ | 211/255 [03:51<00:48,  1.10s/it]

{'loss': tensor(0.0210, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:37,  1.09s/it]

{'loss': tensor(0.0250, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 91%|█████████ | 231/255 [04:12<00:26,  1.11s/it]

{'loss': tensor(0.0233, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.09s/it]

{'loss': tensor(0.0270, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.09s/it]

{'loss': tensor(0.0229, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  1.99it/s]


{'validation accuracy': 84.0, 'validation loss': 0.1673577254893733}
evaluating on validation set
val loss: 0.1674, val acc: 84.0%
Epoch: 9  avg loss: 0.0257 avg acc: 99.7917%
Epoch 10


  0%|          | 1/255 [00:02<08:33,  2.02s/it]

{'loss': tensor(0.0180, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  4%|▍         | 11/255 [00:12<04:28,  1.10s/it]

{'loss': tensor(0.0243, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


  8%|▊         | 21/255 [00:23<04:17,  1.10s/it]

{'loss': tensor(0.0538, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 12%|█▏        | 31/255 [00:34<04:03,  1.09s/it]

{'loss': tensor(0.0228, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 16%|█▌        | 41/255 [00:45<03:51,  1.08s/it]

{'loss': tensor(0.0225, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 20%|██        | 51/255 [00:56<03:40,  1.08s/it]

{'loss': tensor(0.0232, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 24%|██▍       | 61/255 [01:07<03:29,  1.08s/it]

{'loss': tensor(0.0224, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 28%|██▊       | 71/255 [01:18<03:20,  1.09s/it]

{'loss': tensor(0.0243, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 32%|███▏      | 81/255 [01:29<03:08,  1.09s/it]

{'loss': tensor(0.0278, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 36%|███▌      | 91/255 [01:40<02:57,  1.08s/it]

{'loss': tensor(0.0225, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 40%|███▉      | 101/255 [01:51<02:48,  1.09s/it]

{'loss': tensor(0.0211, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 44%|████▎     | 111/255 [02:01<02:36,  1.09s/it]

{'loss': tensor(0.0257, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 47%|████▋     | 121/255 [02:12<02:27,  1.10s/it]

{'loss': tensor(0.0212, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 51%|█████▏    | 131/255 [02:23<02:15,  1.09s/it]

{'loss': tensor(0.0203, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 55%|█████▌    | 141/255 [02:34<02:05,  1.10s/it]

{'loss': tensor(0.0213, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 59%|█████▉    | 151/255 [02:45<01:53,  1.09s/it]

{'loss': tensor(0.0238, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 63%|██████▎   | 161/255 [02:56<01:42,  1.09s/it]

{'loss': tensor(0.0232, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 67%|██████▋   | 171/255 [03:07<01:31,  1.08s/it]

{'loss': tensor(0.0238, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 71%|███████   | 181/255 [03:18<01:20,  1.09s/it]

{'loss': tensor(0.0199, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 75%|███████▍  | 191/255 [03:29<01:09,  1.09s/it]

{'loss': tensor(0.0211, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 79%|███████▉  | 201/255 [03:40<00:59,  1.09s/it]

{'loss': tensor(0.0459, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 83%|████████▎ | 211/255 [03:50<00:48,  1.09s/it]

{'loss': tensor(0.0208, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 87%|████████▋ | 221/255 [04:01<00:37,  1.09s/it]

{'loss': tensor(0.0458, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 96.875}


 91%|█████████ | 231/255 [04:12<00:26,  1.10s/it]

{'loss': tensor(0.0215, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 95%|█████████▍| 241/255 [04:23<00:15,  1.10s/it]

{'loss': tensor(0.0259, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


 98%|█████████▊| 251/255 [04:34<00:04,  1.10s/it]

{'loss': tensor(0.0277, device='cuda:0', grad_fn=<NllLossBackward0>), 'acc': 100.0}


100%|██████████| 255/255 [04:38<00:00,  1.09s/it]
100%|██████████| 63/63 [00:31<00:00,  2.03it/s]

{'validation accuracy': 84.05, 'validation loss': 0.16716072594418246}
evaluating on validation set
val loss: 0.1672, val acc: 84.05%
Epoch: 10  avg loss: 0.0249 avg acc: 99.7794%





In [41]:
torch.save(model_V.state_dict(), "./trained_ViTForImageClassification_new.pt")
torch.save(model_V.state_dict(), 'ViTForImageClassification_new.h5')