In [1]:
import torch
from torch import nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.nn import functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import time
import numpy as np
import json, os, math
from tqdm import tqdm

## 1. PATCH EMBEDDING


In [2]:
LEARNING_RATE = 0.003
BATCHSIZE = 512
PATCH_SIZE = 4
EMBED_DIM = 512
NUM_BLOCKS = 8
INTERMEDIATE_SIZE = 1 * EMBED_DIM
IMG_SIZE = 32
DROPOUT = 0.2
NUM_ATTENTION_HEADS = 8
NUM_CLASSES = 100
INIT_WEIGHT_RANGE = 0.02
NUM_EPOCHS = 100

In [3]:
class PatchEmbedding(nn.Module):
    def __init__(self):
        super().__init__()
        self.patch_size = PATCH_SIZE
        self.embed_dim = EMBED_DIM
        self.num_patch = int((IMG_SIZE * IMG_SIZE) / (PATCH_SIZE * PATCH_SIZE))
        self.projection = nn.Conv2d(in_channels=3, out_channels=EMBED_DIM, kernel_size=PATCH_SIZE, stride=PATCH_SIZE)
    def forward(self, x):
        B, C, H, W = x.shape
        x = self.projection(x).flatten(2).transpose(1, 2) # (B C H W), flatten thanh (B,C,H*W), transpose(1, 2) doi vi tri thanh B H*W C
        return x

## 2. POSITIONAL EMBEDDING

In [4]:
class PositionalEmbedding(nn.Module):
    def __init__(self):
        super().__init__()
        self.cls_token = nn.Parameter(torch.randn(1, 1, EMBED_DIM))
        self.patch_embed = PatchEmbedding()
        self.pos_embed = nn.Parameter(torch.randn(1, self.patch_embed.num_patch + 1, EMBED_DIM)) # N chieu + 1 chieu cho cls token
        self.dropout = nn.Dropout(DROPOUT)
        
    def forward(self, x):
        patch_embed = self.patch_embed(x) # B N D
        batch_size, _, _ = patch_embed.shape
        cls_token = self.cls_token.expand(batch_size, -1, -1) # nhân bản lên batchsize lần và giữ nguyên 2 chiều còn lại B, 1, D
        x = torch.cat((cls_token, patch_embed), dim = 1) # B, N + 1, D, trộn 2 cái vô theo chiều = 1
        x = x + self.pos_embed # pos + patch
        x = self.dropout(x)
        return x

## 3. ATTENTION HEAD

In [5]:
class AttentionHead(nn.Module):
    def __init__(self):
        super().__init__()
        self.num_attention_size = EMBED_DIM // NUM_ATTENTION_HEADS # 768 / 12 = 64
        self.query = nn.Linear(EMBED_DIM, self.num_attention_size) # 768 -> 64
        self.key = nn.Linear(EMBED_DIM, self.num_attention_size) # 768 -> 64
        self.value = nn.Linear(EMBED_DIM, self.num_attention_size) # 768 -> 64
        self.dropout = nn.Dropout(DROPOUT)
        
    def forward(self, x):
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)
        
        attention_w = torch.matmul(Q, K.transpose(-2, - 1)) / (self.num_attention_size ** 0.5)
        attention_w = torch.softmax(attention_w, dim=-1)
        attention_w = self.dropout(attention_w)
        output = torch.matmul(attention_w, V)
        return output, attention_w

## 4. MULTIHEAD ATTENTION

In [6]:
class MultiHeadAttention(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed_dim = EMBED_DIM
        self.attention_head_size = EMBED_DIM // NUM_ATTENTION_HEADS # 768 / 12 = 64
        self.all_head_size = self.attention_head_size * NUM_ATTENTION_HEADS  # = 768
        self.heads = nn.ModuleList([])
        for _ in range(NUM_ATTENTION_HEADS):
            head = AttentionHead()
            self.heads.append(head)
        self.output_projection = nn.Linear(self.all_head_size, EMBED_DIM)
        self.output_dropout = nn.Dropout(DROPOUT)
    def forward(self, x):
        attention_outputs = [head(x) for head in self.heads]
        attention_output = torch.cat([attention_output for attention_output, _ in attention_outputs], dim=-1)
        attention_output = self.output_projection(attention_output)
        attention_output = self.output_dropout(attention_output)
        return attention_output

## 5. MLP

In [7]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(EMBED_DIM, INTERMEDIATE_SIZE)
        self.gelu = nn.GELU()
        self.fc2 = nn.Linear(INTERMEDIATE_SIZE, EMBED_DIM)
        self.dropout = nn.Dropout(DROPOUT)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.gelu(x)
        x = self.fc2(x)
        x = self.dropout(x)
        return x

## 6. BLOCK

In [8]:
class Block(nn.Module):
    def __init__(self):
        super().__init__()
        self.norm1 = nn.LayerNorm(EMBED_DIM)
        self.mha = MultiHeadAttention()
        self.norm2 = nn.LayerNorm(EMBED_DIM)
        self.mlp = MLP()
        
    def forward(self, x):
        x_norm = self.norm1(x)
        attention_output = self.mha(x_norm)
        x = x + attention_output  # Residual connection
        # LayerNorm trước MLP
        x_norm = self.norm2(x)
        mlp_output = self.mlp(x_norm)
        x = x + mlp_output  # Residual connection
        return x

## 7. ENCODER

In [9]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.ModuleList([])
        for _ in range(NUM_BLOCKS):
            block = Block()
            self.encoder.append(block)
    
    def forward(self, x):
        for block in self.encoder:
            x = block(x) # cho qua 12 block
        return x # B N + 1 D(embed dim)

## 8. ViT

In [10]:
class ViTClassification(nn.Module):
    def __init__(self):
        super().__init__()
        self.embeddings = PositionalEmbedding()
        self.encoder = Encoder()
        self.norm = nn.LayerNorm(EMBED_DIM)  # Thêm LayerNorm trước classifier
        self.classifier = nn.Linear(EMBED_DIM, NUM_CLASSES)
        
    def forward(self, x):
        x = self.embeddings(x)
        x = self.encoder(x)
        logits = self.classifier(x[:, 0, :]) # lay toan bo batch, phan tu 0 dau tien la cls token, toan bo dimension embed dim
        return logits

    def _init_weights(self, module):
        if isinstance(module, nn.Linear, nn.Conv2d):
            torch.nn.init.normal_(module.weight, mean=0.0, std=INIT_WEIGHT_RANGE)
        elif isinstance(module, nn.LayerNorm):
            module.weight.data.fill_(1.0)
        elif isinstance(module, PositionalEmbedding):
            module.position_embeddings.data = nn.init.trunc_normal_(
                module.position_embeddings.data.to(torch.float32),
                mean=0.0,
                std=INIT_WEIGHT_RANGE,
            ).to(module.position_embeddings.dtype)

            module.cls_token.data = nn.init.trunc_normal_(
                module.cls_token.data.to(torch.float32),
                mean=0.0,
                std=INIT_WEIGHT_RANGE,
            ).to(module.cls_token.dtype)

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [12]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_set, batch_size=BATCHSIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=BATCHSIZE, shuffle=False, num_workers=2)
classes = tuple(train_set.classes)
print(classes)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:02<00:00, 79.0MB/s] 


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
('apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 't

In [13]:
model = ViTClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=NUM_EPOCHS)

training_loss = []
training_acc = []
model.train()
# Training
for ep in tqdm(range(NUM_EPOCHS)):
    correct = 0
    total = 0
    running_loss = 0.0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        output = model(inputs)
        
        loss = criterion(output, labels)
        # backpropagation
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(output, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0) # cong voi so mau trong batch(batchsize)
        
    training_loss.append(running_loss/len(train_loader))
    training_acc.append((correct/total) * 100)
    scheduler.step()
    print(f"Epoch {ep + 1}: Training Loss: {running_loss/len(train_loader):.4f}, Training acc: {((correct/total) * 100):.2f}%")

  1%|          | 1/100 [01:55<3:11:17, 115.94s/it]

Epoch 1: Training Loss: 4.3994, Training acc: 4.10%


  2%|▏         | 2/100 [04:02<3:19:29, 122.14s/it]

Epoch 2: Training Loss: 4.0479, Training acc: 8.21%


  3%|▎         | 3/100 [06:08<3:20:39, 124.12s/it]

Epoch 3: Training Loss: 3.8016, Training acc: 11.90%


  4%|▍         | 4/100 [08:16<3:20:46, 125.48s/it]

Epoch 4: Training Loss: 3.6256, Training acc: 14.83%


  5%|▌         | 5/100 [10:24<3:19:51, 126.22s/it]

Epoch 5: Training Loss: 3.4825, Training acc: 17.29%


  6%|▌         | 6/100 [12:31<3:18:27, 126.67s/it]

Epoch 6: Training Loss: 3.3722, Training acc: 19.09%


  7%|▋         | 7/100 [14:39<3:16:46, 126.95s/it]

Epoch 7: Training Loss: 3.2746, Training acc: 20.81%


  8%|▊         | 8/100 [16:46<3:14:55, 127.12s/it]

Epoch 8: Training Loss: 3.2006, Training acc: 22.12%


  9%|▉         | 9/100 [18:53<3:12:56, 127.21s/it]

Epoch 9: Training Loss: 3.1254, Training acc: 23.40%


 10%|█         | 10/100 [21:01<3:10:52, 127.26s/it]

Epoch 10: Training Loss: 3.0678, Training acc: 24.62%


 11%|█         | 11/100 [23:08<3:08:49, 127.30s/it]

Epoch 11: Training Loss: 3.0073, Training acc: 25.88%


 12%|█▏        | 12/100 [25:16<3:06:46, 127.35s/it]

Epoch 12: Training Loss: 2.9570, Training acc: 27.09%


 13%|█▎        | 13/100 [27:23<3:04:43, 127.40s/it]

Epoch 13: Training Loss: 2.8968, Training acc: 27.95%


 14%|█▍        | 14/100 [29:31<3:02:41, 127.45s/it]

Epoch 14: Training Loss: 2.8443, Training acc: 28.75%


 15%|█▌        | 15/100 [31:38<3:00:37, 127.50s/it]

Epoch 15: Training Loss: 2.8052, Training acc: 29.76%


 16%|█▌        | 16/100 [33:46<2:58:31, 127.52s/it]

Epoch 16: Training Loss: 2.7740, Training acc: 30.28%


 17%|█▋        | 17/100 [35:53<2:56:23, 127.51s/it]

Epoch 17: Training Loss: 2.7227, Training acc: 31.21%


 18%|█▊        | 18/100 [38:01<2:54:10, 127.45s/it]

Epoch 18: Training Loss: 2.6910, Training acc: 32.08%


 19%|█▉        | 19/100 [40:08<2:52:03, 127.45s/it]

Epoch 19: Training Loss: 2.6495, Training acc: 32.88%


 20%|██        | 20/100 [42:16<2:50:01, 127.52s/it]

Epoch 20: Training Loss: 2.6218, Training acc: 33.41%


 21%|██        | 21/100 [44:23<2:47:52, 127.49s/it]

Epoch 21: Training Loss: 2.5829, Training acc: 34.15%


 22%|██▏       | 22/100 [46:31<2:45:43, 127.48s/it]

Epoch 22: Training Loss: 2.5543, Training acc: 34.76%


 23%|██▎       | 23/100 [48:38<2:43:20, 127.28s/it]

Epoch 23: Training Loss: 2.5204, Training acc: 35.45%


 24%|██▍       | 24/100 [50:45<2:41:09, 127.23s/it]

Epoch 24: Training Loss: 2.4904, Training acc: 36.25%


 25%|██▌       | 25/100 [52:52<2:38:59, 127.19s/it]

Epoch 25: Training Loss: 2.4557, Training acc: 36.64%


 26%|██▌       | 26/100 [54:59<2:36:54, 127.22s/it]

Epoch 26: Training Loss: 2.4268, Training acc: 37.42%


 27%|██▋       | 27/100 [57:07<2:34:52, 127.30s/it]

Epoch 27: Training Loss: 2.4039, Training acc: 37.74%


 28%|██▊       | 28/100 [59:14<2:32:51, 127.38s/it]

Epoch 28: Training Loss: 2.3743, Training acc: 38.55%


 29%|██▉       | 29/100 [1:01:22<2:30:47, 127.43s/it]

Epoch 29: Training Loss: 2.3389, Training acc: 39.00%


 30%|███       | 30/100 [1:03:29<2:28:38, 127.41s/it]

Epoch 30: Training Loss: 2.3120, Training acc: 39.99%


 31%|███       | 31/100 [1:05:36<2:26:31, 127.41s/it]

Epoch 31: Training Loss: 2.2831, Training acc: 40.09%


 32%|███▏      | 32/100 [1:07:44<2:24:23, 127.41s/it]

Epoch 32: Training Loss: 2.2561, Training acc: 40.62%


 33%|███▎      | 33/100 [1:09:51<2:22:15, 127.39s/it]

Epoch 33: Training Loss: 2.2347, Training acc: 41.16%


 34%|███▍      | 34/100 [1:11:59<2:20:10, 127.43s/it]

Epoch 34: Training Loss: 2.2049, Training acc: 41.97%


 35%|███▌      | 35/100 [1:14:05<2:17:43, 127.14s/it]

Epoch 35: Training Loss: 2.1828, Training acc: 42.50%


 36%|███▌      | 36/100 [1:16:12<2:15:28, 127.01s/it]

Epoch 36: Training Loss: 2.1531, Training acc: 42.91%


 37%|███▋      | 37/100 [1:18:19<2:13:31, 127.17s/it]

Epoch 37: Training Loss: 2.1339, Training acc: 43.33%


 38%|███▊      | 38/100 [1:20:27<2:11:26, 127.21s/it]

Epoch 38: Training Loss: 2.0978, Training acc: 44.18%


 39%|███▉      | 39/100 [1:22:34<2:09:21, 127.23s/it]

Epoch 39: Training Loss: 2.0801, Training acc: 44.51%


 40%|████      | 40/100 [1:24:41<2:07:12, 127.22s/it]

Epoch 40: Training Loss: 2.0533, Training acc: 45.17%


 41%|████      | 41/100 [1:26:49<2:05:08, 127.26s/it]

Epoch 41: Training Loss: 2.0352, Training acc: 45.48%


 42%|████▏     | 42/100 [1:28:56<2:03:05, 127.34s/it]

Epoch 42: Training Loss: 2.0031, Training acc: 46.13%


 43%|████▎     | 43/100 [1:31:03<2:00:58, 127.34s/it]

Epoch 43: Training Loss: 1.9905, Training acc: 46.65%


 44%|████▍     | 44/100 [1:33:11<1:58:52, 127.37s/it]

Epoch 44: Training Loss: 1.9678, Training acc: 46.84%


 45%|████▌     | 45/100 [1:35:18<1:56:49, 127.45s/it]

Epoch 45: Training Loss: 1.9387, Training acc: 47.57%


 46%|████▌     | 46/100 [1:37:26<1:54:44, 127.48s/it]

Epoch 46: Training Loss: 1.9218, Training acc: 48.14%


 47%|████▋     | 47/100 [1:39:34<1:52:38, 127.52s/it]

Epoch 47: Training Loss: 1.8951, Training acc: 48.57%


 48%|████▊     | 48/100 [1:41:41<1:50:28, 127.46s/it]

Epoch 48: Training Loss: 1.8780, Training acc: 48.96%


 49%|████▉     | 49/100 [1:43:48<1:48:21, 127.47s/it]

Epoch 49: Training Loss: 1.8473, Training acc: 49.64%


 50%|█████     | 50/100 [1:45:56<1:46:11, 127.44s/it]

Epoch 50: Training Loss: 1.8260, Training acc: 50.32%


 51%|█████     | 51/100 [1:48:03<1:44:04, 127.44s/it]

Epoch 51: Training Loss: 1.8086, Training acc: 50.57%


 52%|█████▏    | 52/100 [1:50:11<1:41:57, 127.45s/it]

Epoch 52: Training Loss: 1.7848, Training acc: 51.10%


 53%|█████▎    | 53/100 [1:52:18<1:39:52, 127.51s/it]

Epoch 53: Training Loss: 1.7605, Training acc: 51.63%


 54%|█████▍    | 54/100 [1:54:26<1:37:45, 127.52s/it]

Epoch 54: Training Loss: 1.7349, Training acc: 51.98%


 55%|█████▌    | 55/100 [1:56:34<1:35:39, 127.54s/it]

Epoch 55: Training Loss: 1.7202, Training acc: 52.49%


 56%|█████▌    | 56/100 [1:58:40<1:33:17, 127.23s/it]

Epoch 56: Training Loss: 1.7016, Training acc: 52.74%


 57%|█████▋    | 57/100 [2:00:47<1:31:09, 127.20s/it]

Epoch 57: Training Loss: 1.6801, Training acc: 53.38%


 58%|█████▊    | 58/100 [2:02:54<1:28:56, 127.06s/it]

Epoch 58: Training Loss: 1.6618, Training acc: 53.91%


 59%|█████▉    | 59/100 [2:05:01<1:26:51, 127.11s/it]

Epoch 59: Training Loss: 1.6332, Training acc: 54.72%


 60%|██████    | 60/100 [2:07:09<1:24:47, 127.19s/it]

Epoch 60: Training Loss: 1.6178, Training acc: 54.78%


 61%|██████    | 61/100 [2:09:16<1:22:45, 127.33s/it]

Epoch 61: Training Loss: 1.6077, Training acc: 54.97%


 62%|██████▏   | 62/100 [2:11:24<1:20:40, 127.38s/it]

Epoch 62: Training Loss: 1.5820, Training acc: 55.67%


 63%|██████▎   | 63/100 [2:13:31<1:18:35, 127.44s/it]

Epoch 63: Training Loss: 1.5718, Training acc: 55.78%


 64%|██████▍   | 64/100 [2:15:39<1:16:28, 127.46s/it]

Epoch 64: Training Loss: 1.5427, Training acc: 56.72%


 65%|██████▌   | 65/100 [2:17:46<1:14:22, 127.51s/it]

Epoch 65: Training Loss: 1.5193, Training acc: 57.20%


 66%|██████▌   | 66/100 [2:19:54<1:12:12, 127.42s/it]

Epoch 66: Training Loss: 1.5128, Training acc: 57.46%


 67%|██████▋   | 67/100 [2:22:01<1:10:03, 127.37s/it]

Epoch 67: Training Loss: 1.4912, Training acc: 57.77%


 68%|██████▊   | 68/100 [2:24:08<1:07:55, 127.35s/it]

Epoch 68: Training Loss: 1.4694, Training acc: 58.27%


 69%|██████▉   | 69/100 [2:26:15<1:05:47, 127.34s/it]

Epoch 69: Training Loss: 1.4457, Training acc: 59.02%


 70%|███████   | 70/100 [2:28:23<1:03:40, 127.36s/it]

Epoch 70: Training Loss: 1.4270, Training acc: 59.38%


 71%|███████   | 71/100 [2:30:30<1:01:34, 127.38s/it]

Epoch 71: Training Loss: 1.4200, Training acc: 59.49%


 72%|███████▏  | 72/100 [2:32:38<59:27, 127.40s/it]  

Epoch 72: Training Loss: 1.3994, Training acc: 60.02%


 73%|███████▎  | 73/100 [2:34:45<57:20, 127.42s/it]

Epoch 73: Training Loss: 1.3737, Training acc: 60.41%


 74%|███████▍  | 74/100 [2:36:53<55:13, 127.44s/it]

Epoch 74: Training Loss: 1.3539, Training acc: 61.02%


 75%|███████▌  | 75/100 [2:39:00<53:07, 127.51s/it]

Epoch 75: Training Loss: 1.3354, Training acc: 61.70%


 76%|███████▌  | 76/100 [2:41:08<51:01, 127.56s/it]

Epoch 76: Training Loss: 1.3180, Training acc: 61.98%


 77%|███████▋  | 77/100 [2:43:15<48:51, 127.46s/it]

Epoch 77: Training Loss: 1.2994, Training acc: 62.21%


 78%|███████▊  | 78/100 [2:45:23<46:43, 127.41s/it]

Epoch 78: Training Loss: 1.2855, Training acc: 62.69%


 79%|███████▉  | 79/100 [2:47:30<44:36, 127.47s/it]

Epoch 79: Training Loss: 1.2697, Training acc: 63.17%


 80%|████████  | 80/100 [2:49:37<42:28, 127.43s/it]

Epoch 80: Training Loss: 1.2508, Training acc: 63.42%


 81%|████████  | 81/100 [2:51:45<40:20, 127.40s/it]

Epoch 81: Training Loss: 1.2306, Training acc: 64.09%


 82%|████████▏ | 82/100 [2:53:52<38:13, 127.41s/it]

Epoch 82: Training Loss: 1.2105, Training acc: 64.60%


 83%|████████▎ | 83/100 [2:56:00<36:05, 127.41s/it]

Epoch 83: Training Loss: 1.2039, Training acc: 64.77%


 84%|████████▍ | 84/100 [2:58:07<33:58, 127.41s/it]

Epoch 84: Training Loss: 1.1859, Training acc: 65.04%


 85%|████████▌ | 85/100 [3:00:14<31:49, 127.33s/it]

Epoch 85: Training Loss: 1.1688, Training acc: 65.56%


 86%|████████▌ | 86/100 [3:02:21<29:42, 127.30s/it]

Epoch 86: Training Loss: 1.1525, Training acc: 66.06%


 87%|████████▋ | 87/100 [3:04:29<27:35, 127.35s/it]

Epoch 87: Training Loss: 1.1267, Training acc: 66.51%


 88%|████████▊ | 88/100 [3:06:37<25:29, 127.43s/it]

Epoch 88: Training Loss: 1.1175, Training acc: 67.06%


 89%|████████▉ | 89/100 [3:08:44<23:22, 127.50s/it]

Epoch 89: Training Loss: 1.0987, Training acc: 67.30%


 90%|█████████ | 90/100 [3:10:52<21:15, 127.56s/it]

Epoch 90: Training Loss: 1.0833, Training acc: 67.65%


 91%|█████████ | 91/100 [3:13:00<19:08, 127.58s/it]

Epoch 91: Training Loss: 1.0657, Training acc: 68.05%


 92%|█████████▏| 92/100 [3:15:07<17:00, 127.58s/it]

Epoch 92: Training Loss: 1.0497, Training acc: 68.73%


 93%|█████████▎| 93/100 [3:17:15<14:53, 127.61s/it]

Epoch 93: Training Loss: 1.0404, Training acc: 68.99%


 94%|█████████▍| 94/100 [3:19:22<12:45, 127.61s/it]

Epoch 94: Training Loss: 1.0201, Training acc: 69.27%


 95%|█████████▌| 95/100 [3:21:30<10:38, 127.62s/it]

Epoch 95: Training Loss: 0.9991, Training acc: 70.02%


 96%|█████████▌| 96/100 [3:23:38<08:30, 127.61s/it]

Epoch 96: Training Loss: 0.9757, Training acc: 70.43%


 97%|█████████▋| 97/100 [3:25:45<06:22, 127.52s/it]

Epoch 97: Training Loss: 0.9756, Training acc: 70.58%


 98%|█████████▊| 98/100 [3:27:52<04:14, 127.45s/it]

Epoch 98: Training Loss: 0.9614, Training acc: 71.09%


 99%|█████████▉| 99/100 [3:30:00<02:07, 127.45s/it]

Epoch 99: Training Loss: 0.9392, Training acc: 71.27%


100%|██████████| 100/100 [3:32:07<00:00, 127.28s/it]

Epoch 100: Training Loss: 0.9270, Training acc: 71.75%





In [14]:
torch.save(model.state_dict(), "bestweight.pth")

In [16]:
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
    
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()
        test_total += labels.size(0)

print(f"Test_loss: {test_loss/len(test_loader):.4f}, Test_acc: {(test_correct/test_total)*100 :.2f}")

Test_loss: 2.0845, Test_acc: 53.57
