<a href="https://colab.research.google.com/github/aumkeshchaudhary/AIRL_IISc/blob/main/q1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# installs & imports
!pip install -q torch torchvision tqdm einops

import math, os, random, time
from pathlib import Path
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchvision.transforms import AutoAugmentPolicy

from einops import rearrange

In [2]:
# config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

cfg = {
    "image_size": 32,
    "patch_size": 4,   # 4x4 patches -> 8x8 patches would be 4, 16x16 would be 16 but CIFAR small
    "in_channels": 3,
    "num_classes": 10,
    "emb_dim": 192,    # width
    "num_heads": 6,
    "depth": 8,        # transformer blocks
    "mlp_ratio": 4.0,
    "drop": 0.1,
    "batch_size": 128,
    "epochs": 300,
    "lr": 3e-4,
    "weight_decay": 0.05,
    "warmup_epochs": 5,
    "seed": 42
}

device: cuda


In [3]:
# reproducibility
torch.manual_seed(cfg["seed"])
random.seed(cfg["seed"])

In [4]:
# data loaders (augmentations)
mean = (0.4914, 0.4822, 0.4465)
std  = (0.247, 0.243, 0.261)

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(policy=AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

train_ds = datasets.CIFAR10(root="data", train=True, download=True, transform=train_transform)
test_ds  = datasets.CIFAR10(root="data", train=False, download=True, transform=test_transform)

train_loader = DataLoader(train_ds, batch_size=cfg["batch_size"], shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds, batch_size=256, shuffle=False, num_workers=2, pin_memory=True)

100%|██████████| 170M/170M [00:13<00:00, 12.3MB/s]


In [5]:
# ViT model implementation
class PatchEmbed(nn.Module):
    def __init__(self, img_size=32, patch_size=4, in_chans=3, embed_dim=192):
        super().__init__()
        assert img_size % patch_size == 0
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) ** 2
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        # x: (B, C, H, W)
        x = self.proj(x)                # (B, E, H/ps, W/ps)
        x = x.flatten(2)                # (B, E, N)
        x = x.transpose(1, 2)           # (B, N, E)
        return x

class MLP(nn.Module):
    def __init__(self, in_features, hidden_features=None, drop=0.):
        super().__init__()
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = nn.GELU()
        self.fc2 = nn.Linear(hidden_features, in_features)
        self.drop = nn.Dropout(drop)
    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

class Attention(nn.Module):
    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
        super().__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        self.qkv = nn.Linear(dim, dim*3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2,0,3,1,4)
        q, k, v = qkv[0], qkv[1], qkv[2]   # each: (B, heads, N, head_dim)
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)
        x = (attn @ v).transpose(1,2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

class Block(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio=4., drop=0., attn_drop=0., drop_path=0.):
        super().__init__()
        self.norm1 = nn.LayerNorm(dim)
        self.attn = Attention(dim, num_heads=num_heads, attn_drop=attn_drop, proj_drop=drop)
        self.drop_path = nn.Identity() if drop_path == 0. else _StochasticDepth(drop_path)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim, int(dim*mlp_ratio), drop=drop)

    def forward(self, x):
        x = x + self.drop_path(self.attn(self.norm1(x)))
        x = x + self.drop_path(self.mlp(self.norm2(x)))
        return x

# Simple implementation of stochastic depth
class _StochasticDepth(nn.Module):
    def __init__(self, p):
        super().__init__()
        self.p = p
    def forward(self, x):
        if not self.training or self.p == 0.:
            return x
        keep = torch.rand(x.shape[0], 1, 1, device=x.device) >= self.p
        return x * keep / (1 - self.p)

class ViT(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        img_size, patch_size = cfg["image_size"], cfg["patch_size"]
        self.patch_embed = PatchEmbed(img_size, patch_size, cfg["in_channels"], cfg["emb_dim"])
        n_patches = self.patch_embed.n_patches

        self.cls_token = nn.Parameter(torch.zeros(1,1,cfg["emb_dim"]))
        self.pos_embed = nn.Parameter(torch.zeros(1, 1 + n_patches, cfg["emb_dim"]))
        self.pos_drop = nn.Dropout(p=cfg["drop"])

        # transformer blocks
        dpr = [x.item() for x in torch.linspace(0, 0.1, cfg["depth"])]  # stochastic depth decay
        self.blocks = nn.ModuleList([
            Block(cfg["emb_dim"], num_heads=cfg["num_heads"], mlp_ratio=cfg["mlp_ratio"], drop=cfg["drop"], drop_path=dpr[i])
            for i in range(cfg["depth"])
        ])
        self.norm = nn.LayerNorm(cfg["emb_dim"])
        self.head = nn.Linear(cfg["emb_dim"], cfg["num_classes"])

        # init
        nn.init.trunc_normal_(self.pos_embed, std=.02)
        nn.init.trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, nn.LayerNorm):
            nn.init.zeros_(m.bias)
            nn.init.ones_(m.weight)

    def forward(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)             # (B, N, E)
        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)   # (B, 1+N, E)
        x = x + self.pos_embed
        x = self.pos_drop(x)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        cls = x[:, 0]
        out = self.head(cls)
        return out


In [6]:
# create model, optimizer, scheduler, loss
model = ViT(cfg).to(device)

# optimizer + scheduler with warmup cosine lr
optimizer = AdamW(model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"])

# Simple cosine scheduler with warmup implemented manually
def get_lr(epoch, total_epochs, base_lr, warmup_epochs):
    if epoch < warmup_epochs:
        return base_lr * (epoch + 1) / warmup_epochs
    else:
        # cosine decay
        t = (epoch - warmup_epochs) / (total_epochs - warmup_epochs)
        return base_lr * 0.5 * (1.0 + math.cos(math.pi * t))

criterion = nn.CrossEntropyLoss()

In [7]:
# train & eval loops
def train_one_epoch(model, loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    total = 0
    correct = 0
    pbar = tqdm(loader, desc=f"Train Epoch {epoch}")
    for images, targets in pbar:
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        total += targets.size(0)
        correct += preds.eq(targets).sum().item()
        pbar.set_postfix(loss=running_loss/total, acc=100.*correct/total)
    return running_loss/total, 100.*correct/total

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total, correct = 0, 0
    losses = 0.0
    for images, targets in loader:
        images, targets = images.to(device), targets.to(device)
        outputs = model(images)
        loss = criterion(outputs, targets)
        losses += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        total += targets.size(0)
        correct += preds.eq(targets).sum().item()
    return losses/total, 100.*correct/total

In [8]:
# main training loop
best_acc = 0.0
total_epochs = cfg["epochs"]
for epoch in range(total_epochs):
    lr = get_lr(epoch, total_epochs, cfg["lr"], cfg["warmup_epochs"])
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, epoch)
    val_loss, val_acc = evaluate(model, test_loader)
    print(f"Epoch {epoch}: train_loss={train_loss:.4f} train_acc={train_acc:.2f} val_loss={val_loss:.4f} val_acc={val_acc:.2f} lr={lr:.6f}")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_vit_cifar10.pt")
        print("Saved best model:", best_acc)

print("Best test accuracy: %.2f%%" % (best_acc))

Train Epoch 0: 100%|██████████| 391/391 [00:20<00:00, 18.86it/s, acc=20.4, loss=2.16]


Epoch 0: train_loss=2.1608 train_acc=20.39 val_loss=1.8286 val_acc=32.28 lr=0.000060
Saved best model: 32.28


Train Epoch 1: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=27.2, loss=1.96]


Epoch 1: train_loss=1.9605 train_acc=27.17 val_loss=1.7375 val_acc=37.21 lr=0.000120
Saved best model: 37.21


Train Epoch 2: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=32.2, loss=1.84]


Epoch 2: train_loss=1.8409 train_acc=32.19 val_loss=1.5922 val_acc=41.75 lr=0.000180
Saved best model: 41.75


Train Epoch 3: 100%|██████████| 391/391 [00:20<00:00, 19.45it/s, acc=36.7, loss=1.73]


Epoch 3: train_loss=1.7294 train_acc=36.72 val_loss=1.4877 val_acc=46.34 lr=0.000240
Saved best model: 46.34


Train Epoch 4: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=39.8, loss=1.65]


Epoch 4: train_loss=1.6531 train_acc=39.83 val_loss=1.4149 val_acc=48.31 lr=0.000300
Saved best model: 48.31


Train Epoch 5: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=42.7, loss=1.57]


Epoch 5: train_loss=1.5734 train_acc=42.71 val_loss=1.3171 val_acc=52.25 lr=0.000300
Saved best model: 52.25


Train Epoch 6: 100%|██████████| 391/391 [00:20<00:00, 19.01it/s, acc=45, loss=1.52]


Epoch 6: train_loss=1.5163 train_acc=45.04 val_loss=1.1940 val_acc=56.51 lr=0.000300
Saved best model: 56.51


Train Epoch 7: 100%|██████████| 391/391 [00:20<00:00, 19.06it/s, acc=46.6, loss=1.47]


Epoch 7: train_loss=1.4740 train_acc=46.57 val_loss=1.1924 val_acc=56.82 lr=0.000300
Saved best model: 56.82


Train Epoch 8: 100%|██████████| 391/391 [00:20<00:00, 18.87it/s, acc=48.5, loss=1.43]


Epoch 8: train_loss=1.4349 train_acc=48.50 val_loss=1.1444 val_acc=59.17 lr=0.000300
Saved best model: 59.17


Train Epoch 9: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=49.5, loss=1.4]


Epoch 9: train_loss=1.3988 train_acc=49.45 val_loss=1.1352 val_acc=57.88 lr=0.000300


Train Epoch 10: 100%|██████████| 391/391 [00:20<00:00, 19.15it/s, acc=50.6, loss=1.37]


Epoch 10: train_loss=1.3695 train_acc=50.59 val_loss=1.0648 val_acc=60.67 lr=0.000300
Saved best model: 60.67


Train Epoch 11: 100%|██████████| 391/391 [00:20<00:00, 19.49it/s, acc=51.9, loss=1.34]


Epoch 11: train_loss=1.3426 train_acc=51.89 val_loss=1.0523 val_acc=62.07 lr=0.000300
Saved best model: 62.07


Train Epoch 12: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=52.4, loss=1.32]


Epoch 12: train_loss=1.3180 train_acc=52.40 val_loss=0.9949 val_acc=64.21 lr=0.000300
Saved best model: 64.21


Train Epoch 13: 100%|██████████| 391/391 [00:20<00:00, 19.01it/s, acc=54, loss=1.29]


Epoch 13: train_loss=1.2871 train_acc=53.97 val_loss=1.0069 val_acc=63.43 lr=0.000299


Train Epoch 14: 100%|██████████| 391/391 [00:20<00:00, 19.14it/s, acc=54.5, loss=1.27]


Epoch 14: train_loss=1.2656 train_acc=54.51 val_loss=0.9458 val_acc=64.99 lr=0.000299
Saved best model: 64.99


Train Epoch 15: 100%|██████████| 391/391 [00:20<00:00, 18.93it/s, acc=55.5, loss=1.24]


Epoch 15: train_loss=1.2395 train_acc=55.49 val_loss=0.9268 val_acc=66.78 lr=0.000299
Saved best model: 66.78


Train Epoch 16: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=56.5, loss=1.21]


Epoch 16: train_loss=1.2115 train_acc=56.54 val_loss=0.9119 val_acc=67.33 lr=0.000299
Saved best model: 67.33


Train Epoch 17: 100%|██████████| 391/391 [00:20<00:00, 19.35it/s, acc=57.4, loss=1.19]


Epoch 17: train_loss=1.1892 train_acc=57.39 val_loss=0.8966 val_acc=68.07 lr=0.000299
Saved best model: 68.07


Train Epoch 18: 100%|██████████| 391/391 [00:19<00:00, 19.61it/s, acc=58.5, loss=1.17]


Epoch 18: train_loss=1.1666 train_acc=58.50 val_loss=0.8773 val_acc=68.11 lr=0.000299
Saved best model: 68.11


Train Epoch 19: 100%|██████████| 391/391 [00:20<00:00, 19.26it/s, acc=59.1, loss=1.14]


Epoch 19: train_loss=1.1443 train_acc=59.08 val_loss=0.8826 val_acc=68.63 lr=0.000298
Saved best model: 68.63


Train Epoch 20: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=60.1, loss=1.12]


Epoch 20: train_loss=1.1241 train_acc=60.07 val_loss=0.8584 val_acc=70.17 lr=0.000298
Saved best model: 70.17


Train Epoch 21: 100%|██████████| 391/391 [00:20<00:00, 19.43it/s, acc=60.9, loss=1.1]


Epoch 21: train_loss=1.1031 train_acc=60.86 val_loss=0.7921 val_acc=72.13 lr=0.000298
Saved best model: 72.13


Train Epoch 22: 100%|██████████| 391/391 [00:20<00:00, 19.33it/s, acc=61.3, loss=1.09]


Epoch 22: train_loss=1.0867 train_acc=61.34 val_loss=0.8279 val_acc=70.39 lr=0.000298


Train Epoch 23: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=61.8, loss=1.07]


Epoch 23: train_loss=1.0680 train_acc=61.82 val_loss=0.7670 val_acc=73.05 lr=0.000297
Saved best model: 73.05


Train Epoch 24: 100%|██████████| 391/391 [00:20<00:00, 19.46it/s, acc=62.5, loss=1.05]


Epoch 24: train_loss=1.0514 train_acc=62.53 val_loss=0.7695 val_acc=73.01 lr=0.000297


Train Epoch 25: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=63.2, loss=1.04]


Epoch 25: train_loss=1.0364 train_acc=63.19 val_loss=0.7372 val_acc=73.81 lr=0.000297
Saved best model: 73.81


Train Epoch 26: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=63.6, loss=1.02]


Epoch 26: train_loss=1.0239 train_acc=63.57 val_loss=0.7536 val_acc=73.36 lr=0.000296


Train Epoch 27: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=64.4, loss=1.01]


Epoch 27: train_loss=1.0070 train_acc=64.36 val_loss=0.7289 val_acc=74.23 lr=0.000296
Saved best model: 74.23


Train Epoch 28: 100%|██████████| 391/391 [00:20<00:00, 18.94it/s, acc=64.6, loss=0.998]


Epoch 28: train_loss=0.9976 train_acc=64.65 val_loss=0.6899 val_acc=75.69 lr=0.000296
Saved best model: 75.69


Train Epoch 29: 100%|██████████| 391/391 [00:20<00:00, 19.52it/s, acc=65.2, loss=0.984]


Epoch 29: train_loss=0.9837 train_acc=65.16 val_loss=0.7101 val_acc=74.41 lr=0.000295


Train Epoch 30: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=65.2, loss=0.977]


Epoch 30: train_loss=0.9766 train_acc=65.24 val_loss=0.7303 val_acc=73.94 lr=0.000295


Train Epoch 31: 100%|██████████| 391/391 [00:19<00:00, 19.57it/s, acc=66, loss=0.962]


Epoch 31: train_loss=0.9619 train_acc=65.97 val_loss=0.6911 val_acc=75.80 lr=0.000294
Saved best model: 75.8


Train Epoch 32: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=66.8, loss=0.941]


Epoch 32: train_loss=0.9409 train_acc=66.75 val_loss=0.6708 val_acc=76.17 lr=0.000294
Saved best model: 76.17


Train Epoch 33: 100%|██████████| 391/391 [00:20<00:00, 19.36it/s, acc=66.8, loss=0.936]


Epoch 33: train_loss=0.9362 train_acc=66.78 val_loss=0.6446 val_acc=77.51 lr=0.000293
Saved best model: 77.51


Train Epoch 34: 100%|██████████| 391/391 [00:20<00:00, 18.96it/s, acc=67.5, loss=0.917]


Epoch 34: train_loss=0.9168 train_acc=67.47 val_loss=0.6835 val_acc=76.00 lr=0.000293


Train Epoch 35: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=67.8, loss=0.907]


Epoch 35: train_loss=0.9068 train_acc=67.80 val_loss=0.6381 val_acc=78.15 lr=0.000292
Saved best model: 78.15


Train Epoch 36: 100%|██████████| 391/391 [00:20<00:00, 19.39it/s, acc=67.9, loss=0.908]


Epoch 36: train_loss=0.9075 train_acc=67.94 val_loss=0.6712 val_acc=76.37 lr=0.000292


Train Epoch 37: 100%|██████████| 391/391 [00:20<00:00, 19.09it/s, acc=68.3, loss=0.895]


Epoch 37: train_loss=0.8954 train_acc=68.32 val_loss=0.6228 val_acc=77.83 lr=0.000291


Train Epoch 38: 100%|██████████| 391/391 [00:20<00:00, 19.11it/s, acc=68.9, loss=0.885]


Epoch 38: train_loss=0.8847 train_acc=68.89 val_loss=0.6499 val_acc=77.44 lr=0.000291


Train Epoch 39: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=69.3, loss=0.871]


Epoch 39: train_loss=0.8715 train_acc=69.29 val_loss=0.6363 val_acc=77.39 lr=0.000290


Train Epoch 40: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=69.4, loss=0.864]


Epoch 40: train_loss=0.8644 train_acc=69.42 val_loss=0.6041 val_acc=78.74 lr=0.000290
Saved best model: 78.74


Train Epoch 41: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=69.9, loss=0.854]


Epoch 41: train_loss=0.8543 train_acc=69.88 val_loss=0.6047 val_acc=78.61 lr=0.000289


Train Epoch 42: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=70, loss=0.847]


Epoch 42: train_loss=0.8472 train_acc=70.01 val_loss=0.5866 val_acc=79.41 lr=0.000289
Saved best model: 79.41


Train Epoch 43: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=70.5, loss=0.836]


Epoch 43: train_loss=0.8363 train_acc=70.54 val_loss=0.5591 val_acc=80.64 lr=0.000288
Saved best model: 80.64


Train Epoch 44: 100%|██████████| 391/391 [00:20<00:00, 19.16it/s, acc=70.7, loss=0.829]


Epoch 44: train_loss=0.8292 train_acc=70.74 val_loss=0.5712 val_acc=80.12 lr=0.000287


Train Epoch 45: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=71.1, loss=0.818]


Epoch 45: train_loss=0.8183 train_acc=71.09 val_loss=0.5590 val_acc=80.36 lr=0.000287


Train Epoch 46: 100%|██████████| 391/391 [00:20<00:00, 19.48it/s, acc=71, loss=0.813]


Epoch 46: train_loss=0.8133 train_acc=71.02 val_loss=0.5925 val_acc=79.07 lr=0.000286


Train Epoch 47: 100%|██████████| 391/391 [00:20<00:00, 19.11it/s, acc=71.1, loss=0.815]


Epoch 47: train_loss=0.8150 train_acc=71.07 val_loss=0.5620 val_acc=80.35 lr=0.000285


Train Epoch 48: 100%|██████████| 391/391 [00:20<00:00, 19.36it/s, acc=71.8, loss=0.797]


Epoch 48: train_loss=0.7973 train_acc=71.76 val_loss=0.5722 val_acc=79.31 lr=0.000285


Train Epoch 49: 100%|██████████| 391/391 [00:20<00:00, 19.47it/s, acc=71.7, loss=0.796]


Epoch 49: train_loss=0.7956 train_acc=71.69 val_loss=0.5940 val_acc=79.40 lr=0.000284


Train Epoch 50: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=72.5, loss=0.782]


Epoch 50: train_loss=0.7818 train_acc=72.45 val_loss=0.5411 val_acc=80.93 lr=0.000283
Saved best model: 80.93


Train Epoch 51: 100%|██████████| 391/391 [00:20<00:00, 19.46it/s, acc=72.3, loss=0.782]


Epoch 51: train_loss=0.7824 train_acc=72.29 val_loss=0.5535 val_acc=80.89 lr=0.000282


Train Epoch 52: 100%|██████████| 391/391 [00:20<00:00, 19.35it/s, acc=72.8, loss=0.77]


Epoch 52: train_loss=0.7704 train_acc=72.79 val_loss=0.5461 val_acc=81.14 lr=0.000282
Saved best model: 81.14


Train Epoch 53: 100%|██████████| 391/391 [00:20<00:00, 19.38it/s, acc=73.2, loss=0.755]


Epoch 53: train_loss=0.7545 train_acc=73.20 val_loss=0.5290 val_acc=81.50 lr=0.000281
Saved best model: 81.5


Train Epoch 54: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=73.4, loss=0.757]


Epoch 54: train_loss=0.7568 train_acc=73.45 val_loss=0.5308 val_acc=81.56 lr=0.000280
Saved best model: 81.56


Train Epoch 55: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=73.6, loss=0.75]


Epoch 55: train_loss=0.7502 train_acc=73.57 val_loss=0.5258 val_acc=81.41 lr=0.000279


Train Epoch 56: 100%|██████████| 391/391 [00:20<00:00, 18.92it/s, acc=74, loss=0.739]


Epoch 56: train_loss=0.7394 train_acc=73.99 val_loss=0.5539 val_acc=80.72 lr=0.000278


Train Epoch 57: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=74.1, loss=0.735]


Epoch 57: train_loss=0.7354 train_acc=74.10 val_loss=0.5343 val_acc=81.66 lr=0.000278
Saved best model: 81.66


Train Epoch 58: 100%|██████████| 391/391 [00:20<00:00, 18.98it/s, acc=74.3, loss=0.728]


Epoch 58: train_loss=0.7283 train_acc=74.30 val_loss=0.5149 val_acc=81.91 lr=0.000277
Saved best model: 81.91


Train Epoch 59: 100%|██████████| 391/391 [00:20<00:00, 19.41it/s, acc=74.4, loss=0.729]


Epoch 59: train_loss=0.7291 train_acc=74.36 val_loss=0.5116 val_acc=82.45 lr=0.000276
Saved best model: 82.45


Train Epoch 60: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=74.6, loss=0.719]


Epoch 60: train_loss=0.7194 train_acc=74.56 val_loss=0.5068 val_acc=82.32 lr=0.000275


Train Epoch 61: 100%|██████████| 391/391 [00:20<00:00, 18.94it/s, acc=75.4, loss=0.7]


Epoch 61: train_loss=0.6996 train_acc=75.36 val_loss=0.5270 val_acc=82.16 lr=0.000274


Train Epoch 62: 100%|██████████| 391/391 [00:20<00:00, 19.33it/s, acc=75.2, loss=0.702]


Epoch 62: train_loss=0.7016 train_acc=75.18 val_loss=0.5069 val_acc=82.36 lr=0.000273


Train Epoch 63: 100%|██████████| 391/391 [00:19<00:00, 19.58it/s, acc=75.4, loss=0.696]


Epoch 63: train_loss=0.6961 train_acc=75.43 val_loss=0.5575 val_acc=81.00 lr=0.000272


Train Epoch 64: 100%|██████████| 391/391 [00:20<00:00, 18.80it/s, acc=75.6, loss=0.688]


Epoch 64: train_loss=0.6880 train_acc=75.60 val_loss=0.4876 val_acc=83.16 lr=0.000271
Saved best model: 83.16


Train Epoch 65: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=75.6, loss=0.688]


Epoch 65: train_loss=0.6877 train_acc=75.62 val_loss=0.4960 val_acc=82.43 lr=0.000270


Train Epoch 66: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=76.2, loss=0.672]


Epoch 66: train_loss=0.6718 train_acc=76.22 val_loss=0.4919 val_acc=83.07 lr=0.000269


Train Epoch 67: 100%|██████████| 391/391 [00:20<00:00, 19.52it/s, acc=76.1, loss=0.676]


Epoch 67: train_loss=0.6758 train_acc=76.13 val_loss=0.4775 val_acc=83.58 lr=0.000268
Saved best model: 83.58


Train Epoch 68: 100%|██████████| 391/391 [00:20<00:00, 18.66it/s, acc=76.6, loss=0.666]


Epoch 68: train_loss=0.6655 train_acc=76.57 val_loss=0.4847 val_acc=83.32 lr=0.000267


Train Epoch 69: 100%|██████████| 391/391 [00:20<00:00, 18.88it/s, acc=76.6, loss=0.66]


Epoch 69: train_loss=0.6598 train_acc=76.63 val_loss=0.4885 val_acc=83.42 lr=0.000266


Train Epoch 70: 100%|██████████| 391/391 [00:20<00:00, 19.45it/s, acc=77, loss=0.653]


Epoch 70: train_loss=0.6532 train_acc=76.95 val_loss=0.4790 val_acc=83.52 lr=0.000265


Train Epoch 71: 100%|██████████| 391/391 [00:20<00:00, 18.92it/s, acc=77.5, loss=0.644]


Epoch 71: train_loss=0.6444 train_acc=77.46 val_loss=0.4884 val_acc=82.82 lr=0.000264


Train Epoch 72: 100%|██████████| 391/391 [00:20<00:00, 19.29it/s, acc=77.3, loss=0.644]


Epoch 72: train_loss=0.6437 train_acc=77.32 val_loss=0.4817 val_acc=83.20 lr=0.000263


Train Epoch 73: 100%|██████████| 391/391 [00:20<00:00, 19.36it/s, acc=77.4, loss=0.643]


Epoch 73: train_loss=0.6426 train_acc=77.44 val_loss=0.4746 val_acc=83.58 lr=0.000262


Train Epoch 74: 100%|██████████| 391/391 [00:19<00:00, 19.57it/s, acc=77.6, loss=0.637]


Epoch 74: train_loss=0.6371 train_acc=77.59 val_loss=0.4998 val_acc=82.94 lr=0.000261


Train Epoch 75: 100%|██████████| 391/391 [00:20<00:00, 18.96it/s, acc=77.8, loss=0.624]


Epoch 75: train_loss=0.6241 train_acc=77.82 val_loss=0.4700 val_acc=83.23 lr=0.000260


Train Epoch 76: 100%|██████████| 391/391 [00:20<00:00, 19.32it/s, acc=78.3, loss=0.615]


Epoch 76: train_loss=0.6154 train_acc=78.34 val_loss=0.4715 val_acc=83.81 lr=0.000259
Saved best model: 83.81


Train Epoch 77: 100%|██████████| 391/391 [00:20<00:00, 19.38it/s, acc=78.2, loss=0.611]


Epoch 77: train_loss=0.6108 train_acc=78.23 val_loss=0.4480 val_acc=84.82 lr=0.000258
Saved best model: 84.82


Train Epoch 78: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=78.4, loss=0.61]


Epoch 78: train_loss=0.6102 train_acc=78.41 val_loss=0.4657 val_acc=84.52 lr=0.000257


Train Epoch 79: 100%|██████████| 391/391 [00:20<00:00, 19.54it/s, acc=78.5, loss=0.61]


Epoch 79: train_loss=0.6100 train_acc=78.50 val_loss=0.4560 val_acc=84.43 lr=0.000256


Train Epoch 80: 100%|██████████| 391/391 [00:20<00:00, 19.29it/s, acc=78.8, loss=0.601]


Epoch 80: train_loss=0.6014 train_acc=78.79 val_loss=0.4612 val_acc=84.33 lr=0.000255


Train Epoch 81: 100%|██████████| 391/391 [00:20<00:00, 19.16it/s, acc=78.9, loss=0.597]


Epoch 81: train_loss=0.5969 train_acc=78.86 val_loss=0.4682 val_acc=84.19 lr=0.000253


Train Epoch 82: 100%|██████████| 391/391 [00:20<00:00, 19.38it/s, acc=78.8, loss=0.596]


Epoch 82: train_loss=0.5956 train_acc=78.83 val_loss=0.4751 val_acc=84.08 lr=0.000252


Train Epoch 83: 100%|██████████| 391/391 [00:20<00:00, 19.53it/s, acc=79.4, loss=0.586]


Epoch 83: train_loss=0.5858 train_acc=79.41 val_loss=0.4548 val_acc=84.59 lr=0.000251


Train Epoch 84: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=79.2, loss=0.591]


Epoch 84: train_loss=0.5912 train_acc=79.17 val_loss=0.4514 val_acc=84.84 lr=0.000250
Saved best model: 84.84


Train Epoch 85: 100%|██████████| 391/391 [00:20<00:00, 18.73it/s, acc=79.6, loss=0.578]


Epoch 85: train_loss=0.5778 train_acc=79.59 val_loss=0.4425 val_acc=84.95 lr=0.000249
Saved best model: 84.95


Train Epoch 86: 100%|██████████| 391/391 [00:20<00:00, 19.39it/s, acc=79.5, loss=0.58]


Epoch 86: train_loss=0.5800 train_acc=79.54 val_loss=0.4501 val_acc=84.80 lr=0.000248


Train Epoch 87: 100%|██████████| 391/391 [00:20<00:00, 19.52it/s, acc=80, loss=0.566]


Epoch 87: train_loss=0.5660 train_acc=80.00 val_loss=0.4497 val_acc=84.87 lr=0.000246


Train Epoch 88: 100%|██████████| 391/391 [00:20<00:00, 19.35it/s, acc=79.9, loss=0.567]


Epoch 88: train_loss=0.5666 train_acc=79.91 val_loss=0.4361 val_acc=85.35 lr=0.000245
Saved best model: 85.35


Train Epoch 89: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=80.1, loss=0.564]


Epoch 89: train_loss=0.5640 train_acc=80.08 val_loss=0.4386 val_acc=85.80 lr=0.000244
Saved best model: 85.8


Train Epoch 90: 100%|██████████| 391/391 [00:20<00:00, 19.39it/s, acc=80.4, loss=0.554]


Epoch 90: train_loss=0.5539 train_acc=80.45 val_loss=0.4525 val_acc=85.26 lr=0.000243


Train Epoch 91: 100%|██████████| 391/391 [00:20<00:00, 19.29it/s, acc=80.5, loss=0.552]


Epoch 91: train_loss=0.5524 train_acc=80.46 val_loss=0.4211 val_acc=85.88 lr=0.000241
Saved best model: 85.88


Train Epoch 92: 100%|██████████| 391/391 [00:20<00:00, 19.40it/s, acc=80.9, loss=0.545]


Epoch 92: train_loss=0.5447 train_acc=80.91 val_loss=0.4378 val_acc=85.02 lr=0.000240


Train Epoch 93: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=81, loss=0.538]


Epoch 93: train_loss=0.5382 train_acc=80.98 val_loss=0.4288 val_acc=86.10 lr=0.000239
Saved best model: 86.1


Train Epoch 94: 100%|██████████| 391/391 [00:20<00:00, 18.93it/s, acc=81.2, loss=0.534]


Epoch 94: train_loss=0.5341 train_acc=81.19 val_loss=0.4498 val_acc=85.25 lr=0.000238


Train Epoch 95: 100%|██████████| 391/391 [00:20<00:00, 19.17it/s, acc=81.1, loss=0.537]


Epoch 95: train_loss=0.5367 train_acc=81.07 val_loss=0.4348 val_acc=85.60 lr=0.000236


Train Epoch 96: 100%|██████████| 391/391 [00:20<00:00, 19.38it/s, acc=81.1, loss=0.532]


Epoch 96: train_loss=0.5324 train_acc=81.13 val_loss=0.4320 val_acc=85.68 lr=0.000235


Train Epoch 97: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=81.3, loss=0.531]


Epoch 97: train_loss=0.5314 train_acc=81.28 val_loss=0.4369 val_acc=85.74 lr=0.000234


Train Epoch 98: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=81.4, loss=0.526]


Epoch 98: train_loss=0.5262 train_acc=81.43 val_loss=0.4102 val_acc=86.24 lr=0.000232
Saved best model: 86.24


Train Epoch 99: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=81.5, loss=0.519]


Epoch 99: train_loss=0.5191 train_acc=81.54 val_loss=0.4194 val_acc=85.95 lr=0.000231


Train Epoch 100: 100%|██████████| 391/391 [00:20<00:00, 19.48it/s, acc=81.6, loss=0.518]


Epoch 100: train_loss=0.5178 train_acc=81.62 val_loss=0.4174 val_acc=86.32 lr=0.000230
Saved best model: 86.32


Train Epoch 101: 100%|██████████| 391/391 [00:20<00:00, 19.15it/s, acc=82.3, loss=0.504]


Epoch 101: train_loss=0.5040 train_acc=82.31 val_loss=0.4302 val_acc=85.99 lr=0.000228


Train Epoch 102: 100%|██████████| 391/391 [00:20<00:00, 19.30it/s, acc=82.1, loss=0.511]


Epoch 102: train_loss=0.5114 train_acc=82.09 val_loss=0.4122 val_acc=86.33 lr=0.000227
Saved best model: 86.33


Train Epoch 103: 100%|██████████| 391/391 [00:20<00:00, 19.03it/s, acc=82.1, loss=0.507]


Epoch 103: train_loss=0.5072 train_acc=82.11 val_loss=0.4653 val_acc=85.53 lr=0.000225


Train Epoch 104: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=81.8, loss=0.514]


Epoch 104: train_loss=0.5140 train_acc=81.81 val_loss=0.4215 val_acc=86.04 lr=0.000224


Train Epoch 105: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=82.4, loss=0.501]


Epoch 105: train_loss=0.5005 train_acc=82.36 val_loss=0.4168 val_acc=86.33 lr=0.000223


Train Epoch 106: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=82.6, loss=0.497]


Epoch 106: train_loss=0.4966 train_acc=82.58 val_loss=0.4364 val_acc=86.30 lr=0.000221


Train Epoch 107: 100%|██████████| 391/391 [00:19<00:00, 19.64it/s, acc=82.5, loss=0.495]


Epoch 107: train_loss=0.4952 train_acc=82.45 val_loss=0.4450 val_acc=85.80 lr=0.000220


Train Epoch 108: 100%|██████████| 391/391 [00:20<00:00, 19.45it/s, acc=82.7, loss=0.489]


Epoch 108: train_loss=0.4890 train_acc=82.72 val_loss=0.4331 val_acc=86.36 lr=0.000218
Saved best model: 86.36


Train Epoch 109: 100%|██████████| 391/391 [00:20<00:00, 18.95it/s, acc=82.8, loss=0.486]


Epoch 109: train_loss=0.4862 train_acc=82.76 val_loss=0.4144 val_acc=86.39 lr=0.000217
Saved best model: 86.39


Train Epoch 110: 100%|██████████| 391/391 [00:20<00:00, 19.00it/s, acc=83, loss=0.48]


Epoch 110: train_loss=0.4803 train_acc=83.04 val_loss=0.4241 val_acc=86.43 lr=0.000216
Saved best model: 86.43


Train Epoch 111: 100%|██████████| 391/391 [00:20<00:00, 18.85it/s, acc=83.2, loss=0.476]


Epoch 111: train_loss=0.4759 train_acc=83.20 val_loss=0.3872 val_acc=86.95 lr=0.000214
Saved best model: 86.95


Train Epoch 112: 100%|██████████| 391/391 [00:20<00:00, 18.79it/s, acc=83, loss=0.479]


Epoch 112: train_loss=0.4794 train_acc=83.02 val_loss=0.4184 val_acc=86.58 lr=0.000213


Train Epoch 113: 100%|██████████| 391/391 [00:20<00:00, 18.87it/s, acc=83.1, loss=0.479]


Epoch 113: train_loss=0.4787 train_acc=83.09 val_loss=0.3999 val_acc=87.16 lr=0.000211
Saved best model: 87.16


Train Epoch 114: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=83.7, loss=0.467]


Epoch 114: train_loss=0.4671 train_acc=83.66 val_loss=0.4086 val_acc=86.92 lr=0.000210


Train Epoch 115: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=83.8, loss=0.465]


Epoch 115: train_loss=0.4648 train_acc=83.77 val_loss=0.4172 val_acc=86.71 lr=0.000208


Train Epoch 116: 100%|██████████| 391/391 [00:20<00:00, 18.84it/s, acc=83.7, loss=0.459]


Epoch 116: train_loss=0.4588 train_acc=83.66 val_loss=0.4142 val_acc=86.40 lr=0.000207


Train Epoch 117: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=83.8, loss=0.463]


Epoch 117: train_loss=0.4629 train_acc=83.83 val_loss=0.4336 val_acc=86.33 lr=0.000205


Train Epoch 118: 100%|██████████| 391/391 [00:20<00:00, 18.74it/s, acc=83.9, loss=0.461]


Epoch 118: train_loss=0.4606 train_acc=83.85 val_loss=0.4058 val_acc=87.28 lr=0.000204
Saved best model: 87.28


Train Epoch 119: 100%|██████████| 391/391 [00:20<00:00, 19.02it/s, acc=84.1, loss=0.452]


Epoch 119: train_loss=0.4522 train_acc=84.05 val_loss=0.4047 val_acc=87.06 lr=0.000202


Train Epoch 120: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=83.9, loss=0.453]


Epoch 120: train_loss=0.4534 train_acc=83.86 val_loss=0.4250 val_acc=86.71 lr=0.000201


Train Epoch 121: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=84.1, loss=0.452]


Epoch 121: train_loss=0.4522 train_acc=84.10 val_loss=0.4078 val_acc=86.98 lr=0.000199


Train Epoch 122: 100%|██████████| 391/391 [00:20<00:00, 18.81it/s, acc=84.4, loss=0.443]


Epoch 122: train_loss=0.4431 train_acc=84.37 val_loss=0.4043 val_acc=87.24 lr=0.000198


Train Epoch 123: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=84.4, loss=0.442]


Epoch 123: train_loss=0.4424 train_acc=84.41 val_loss=0.4164 val_acc=87.19 lr=0.000196


Train Epoch 124: 100%|██████████| 391/391 [00:20<00:00, 19.16it/s, acc=84.4, loss=0.441]


Epoch 124: train_loss=0.4414 train_acc=84.35 val_loss=0.4109 val_acc=86.99 lr=0.000195


Train Epoch 125: 100%|██████████| 391/391 [00:20<00:00, 19.29it/s, acc=84.3, loss=0.441]


Epoch 125: train_loss=0.4407 train_acc=84.33 val_loss=0.4544 val_acc=86.10 lr=0.000193


Train Epoch 126: 100%|██████████| 391/391 [00:20<00:00, 19.52it/s, acc=84.5, loss=0.434]


Epoch 126: train_loss=0.4338 train_acc=84.54 val_loss=0.4325 val_acc=86.88 lr=0.000192


Train Epoch 127: 100%|██████████| 391/391 [00:20<00:00, 19.00it/s, acc=85.1, loss=0.425]


Epoch 127: train_loss=0.4248 train_acc=85.10 val_loss=0.4008 val_acc=87.22 lr=0.000190


Train Epoch 128: 100%|██████████| 391/391 [00:20<00:00, 19.28it/s, acc=84.7, loss=0.43]


Epoch 128: train_loss=0.4298 train_acc=84.72 val_loss=0.4129 val_acc=87.15 lr=0.000189


Train Epoch 129: 100%|██████████| 391/391 [00:20<00:00, 19.48it/s, acc=85.3, loss=0.424]


Epoch 129: train_loss=0.4244 train_acc=85.30 val_loss=0.4243 val_acc=86.92 lr=0.000187


Train Epoch 130: 100%|██████████| 391/391 [00:20<00:00, 19.32it/s, acc=85.1, loss=0.423]


Epoch 130: train_loss=0.4229 train_acc=85.13 val_loss=0.4311 val_acc=86.59 lr=0.000186


Train Epoch 131: 100%|██████████| 391/391 [00:20<00:00, 18.97it/s, acc=85, loss=0.427]


Epoch 131: train_loss=0.4267 train_acc=84.99 val_loss=0.3972 val_acc=87.45 lr=0.000184
Saved best model: 87.45


Train Epoch 132: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=85, loss=0.421]


Epoch 132: train_loss=0.4214 train_acc=85.00 val_loss=0.4017 val_acc=87.66 lr=0.000182
Saved best model: 87.66


Train Epoch 133: 100%|██████████| 391/391 [00:20<00:00, 19.38it/s, acc=85.3, loss=0.414]


Epoch 133: train_loss=0.4141 train_acc=85.33 val_loss=0.4138 val_acc=87.19 lr=0.000181


Train Epoch 134: 100%|██████████| 391/391 [00:20<00:00, 18.97it/s, acc=85.4, loss=0.414]


Epoch 134: train_loss=0.4145 train_acc=85.39 val_loss=0.4195 val_acc=87.25 lr=0.000179


Train Epoch 135: 100%|██████████| 391/391 [00:20<00:00, 19.39it/s, acc=85.6, loss=0.405]


Epoch 135: train_loss=0.4046 train_acc=85.57 val_loss=0.4133 val_acc=87.26 lr=0.000178


Train Epoch 136: 100%|██████████| 391/391 [00:20<00:00, 19.28it/s, acc=85.8, loss=0.404]


Epoch 136: train_loss=0.4043 train_acc=85.80 val_loss=0.4051 val_acc=87.48 lr=0.000176


Train Epoch 137: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=85.9, loss=0.402]


Epoch 137: train_loss=0.4024 train_acc=85.87 val_loss=0.4105 val_acc=87.30 lr=0.000175


Train Epoch 138: 100%|██████████| 391/391 [00:20<00:00, 19.30it/s, acc=85.8, loss=0.407]


Epoch 138: train_loss=0.4067 train_acc=85.77 val_loss=0.4120 val_acc=87.46 lr=0.000173


Train Epoch 139: 100%|██████████| 391/391 [00:20<00:00, 19.44it/s, acc=85.9, loss=0.402]


Epoch 139: train_loss=0.4023 train_acc=85.85 val_loss=0.4031 val_acc=87.91 lr=0.000171
Saved best model: 87.91


Train Epoch 140: 100%|██████████| 391/391 [00:20<00:00, 19.32it/s, acc=85.9, loss=0.401]


Epoch 140: train_loss=0.4012 train_acc=85.92 val_loss=0.4103 val_acc=87.85 lr=0.000170


Train Epoch 141: 100%|██████████| 391/391 [00:20<00:00, 19.15it/s, acc=86, loss=0.397]


Epoch 141: train_loss=0.3974 train_acc=86.02 val_loss=0.3994 val_acc=87.89 lr=0.000168


Train Epoch 142: 100%|██████████| 391/391 [00:20<00:00, 19.06it/s, acc=86.2, loss=0.395]


Epoch 142: train_loss=0.3949 train_acc=86.15 val_loss=0.4196 val_acc=87.27 lr=0.000167


Train Epoch 143: 100%|██████████| 391/391 [00:20<00:00, 19.48it/s, acc=86.2, loss=0.393]


Epoch 143: train_loss=0.3932 train_acc=86.18 val_loss=0.4119 val_acc=87.34 lr=0.000165


Train Epoch 144: 100%|██████████| 391/391 [00:20<00:00, 18.94it/s, acc=86.2, loss=0.393]


Epoch 144: train_loss=0.3926 train_acc=86.23 val_loss=0.4122 val_acc=87.71 lr=0.000164


Train Epoch 145: 100%|██████████| 391/391 [00:20<00:00, 19.27it/s, acc=86.3, loss=0.383]


Epoch 145: train_loss=0.3835 train_acc=86.29 val_loss=0.3919 val_acc=87.77 lr=0.000162


Train Epoch 146: 100%|██████████| 391/391 [00:20<00:00, 19.00it/s, acc=86.5, loss=0.382]


Epoch 146: train_loss=0.3818 train_acc=86.52 val_loss=0.4066 val_acc=87.82 lr=0.000160


Train Epoch 147: 100%|██████████| 391/391 [00:20<00:00, 19.11it/s, acc=86.3, loss=0.385]


Epoch 147: train_loss=0.3852 train_acc=86.30 val_loss=0.3984 val_acc=87.90 lr=0.000159


Train Epoch 148: 100%|██████████| 391/391 [00:20<00:00, 19.50it/s, acc=86.9, loss=0.373]


Epoch 148: train_loss=0.3727 train_acc=86.90 val_loss=0.4035 val_acc=87.67 lr=0.000157


Train Epoch 149: 100%|██████████| 391/391 [00:20<00:00, 19.06it/s, acc=86.8, loss=0.373]


Epoch 149: train_loss=0.3729 train_acc=86.81 val_loss=0.4140 val_acc=87.76 lr=0.000156


Train Epoch 150: 100%|██████████| 391/391 [00:20<00:00, 19.35it/s, acc=86.7, loss=0.378]


Epoch 150: train_loss=0.3777 train_acc=86.74 val_loss=0.4312 val_acc=87.19 lr=0.000154


Train Epoch 151: 100%|██████████| 391/391 [00:20<00:00, 19.17it/s, acc=86.9, loss=0.371]


Epoch 151: train_loss=0.3705 train_acc=86.88 val_loss=0.4048 val_acc=87.84 lr=0.000152


Train Epoch 152: 100%|██████████| 391/391 [00:20<00:00, 19.03it/s, acc=86.9, loss=0.369]


Epoch 152: train_loss=0.3689 train_acc=86.92 val_loss=0.4240 val_acc=88.05 lr=0.000151
Saved best model: 88.05


Train Epoch 153: 100%|██████████| 391/391 [00:20<00:00, 19.00it/s, acc=87, loss=0.368]


Epoch 153: train_loss=0.3677 train_acc=86.99 val_loss=0.3863 val_acc=88.24 lr=0.000149
Saved best model: 88.24


Train Epoch 154: 100%|██████████| 391/391 [00:20<00:00, 18.95it/s, acc=86.9, loss=0.372]


Epoch 154: train_loss=0.3718 train_acc=86.90 val_loss=0.4008 val_acc=88.14 lr=0.000148


Train Epoch 155: 100%|██████████| 391/391 [00:20<00:00, 19.34it/s, acc=87.3, loss=0.364]


Epoch 155: train_loss=0.3635 train_acc=87.25 val_loss=0.4143 val_acc=87.62 lr=0.000146


Train Epoch 156: 100%|██████████| 391/391 [00:20<00:00, 18.98it/s, acc=87.2, loss=0.364]


Epoch 156: train_loss=0.3639 train_acc=87.22 val_loss=0.4077 val_acc=87.70 lr=0.000144


Train Epoch 157: 100%|██████████| 391/391 [00:20<00:00, 19.46it/s, acc=87.2, loss=0.362]


Epoch 157: train_loss=0.3624 train_acc=87.24 val_loss=0.4056 val_acc=87.98 lr=0.000143


Train Epoch 158: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=87.3, loss=0.358]


Epoch 158: train_loss=0.3578 train_acc=87.35 val_loss=0.4090 val_acc=87.94 lr=0.000141


Train Epoch 159: 100%|██████████| 391/391 [00:20<00:00, 19.18it/s, acc=87.4, loss=0.359]


Epoch 159: train_loss=0.3588 train_acc=87.38 val_loss=0.4268 val_acc=87.65 lr=0.000140


Train Epoch 160: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=87.3, loss=0.354]


Epoch 160: train_loss=0.3543 train_acc=87.27 val_loss=0.4322 val_acc=87.85 lr=0.000138


Train Epoch 161: 100%|██████████| 391/391 [00:20<00:00, 19.18it/s, acc=87.7, loss=0.349]


Epoch 161: train_loss=0.3493 train_acc=87.74 val_loss=0.3949 val_acc=88.19 lr=0.000136


Train Epoch 162: 100%|██████████| 391/391 [00:19<00:00, 19.60it/s, acc=87.6, loss=0.355]


Epoch 162: train_loss=0.3553 train_acc=87.59 val_loss=0.4032 val_acc=88.08 lr=0.000135


Train Epoch 163: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=87.8, loss=0.348]


Epoch 163: train_loss=0.3482 train_acc=87.84 val_loss=0.3934 val_acc=88.11 lr=0.000133


Train Epoch 164: 100%|██████████| 391/391 [00:20<00:00, 19.30it/s, acc=87.8, loss=0.345]


Epoch 164: train_loss=0.3452 train_acc=87.79 val_loss=0.4070 val_acc=87.98 lr=0.000132


Train Epoch 165: 100%|██████████| 391/391 [00:20<00:00, 19.41it/s, acc=87.8, loss=0.344]


Epoch 165: train_loss=0.3444 train_acc=87.79 val_loss=0.4075 val_acc=88.24 lr=0.000130


Train Epoch 166: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=87.9, loss=0.343]


Epoch 166: train_loss=0.3429 train_acc=87.92 val_loss=0.4112 val_acc=88.07 lr=0.000129


Train Epoch 167: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=88.2, loss=0.34]


Epoch 167: train_loss=0.3399 train_acc=88.19 val_loss=0.4057 val_acc=88.35 lr=0.000127
Saved best model: 88.35


Train Epoch 168: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=88.2, loss=0.335]


Epoch 168: train_loss=0.3351 train_acc=88.15 val_loss=0.4005 val_acc=88.22 lr=0.000125


Train Epoch 169: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=88.1, loss=0.339]


Epoch 169: train_loss=0.3395 train_acc=88.07 val_loss=0.3772 val_acc=89.02 lr=0.000124
Saved best model: 89.02


Train Epoch 170: 100%|██████████| 391/391 [00:20<00:00, 19.50it/s, acc=88.2, loss=0.338]


Epoch 170: train_loss=0.3378 train_acc=88.17 val_loss=0.3926 val_acc=88.50 lr=0.000122


Train Epoch 171: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=88.6, loss=0.331]


Epoch 171: train_loss=0.3313 train_acc=88.62 val_loss=0.4162 val_acc=88.27 lr=0.000121


Train Epoch 172: 100%|██████████| 391/391 [00:20<00:00, 18.88it/s, acc=88.4, loss=0.331]


Epoch 172: train_loss=0.3313 train_acc=88.40 val_loss=0.4038 val_acc=88.68 lr=0.000119


Train Epoch 173: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=88.3, loss=0.334]


Epoch 173: train_loss=0.3343 train_acc=88.28 val_loss=0.4107 val_acc=88.37 lr=0.000118


Train Epoch 174: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=88.5, loss=0.33]


Epoch 174: train_loss=0.3304 train_acc=88.50 val_loss=0.4070 val_acc=88.54 lr=0.000116


Train Epoch 175: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=88.5, loss=0.329]


Epoch 175: train_loss=0.3288 train_acc=88.47 val_loss=0.4226 val_acc=88.03 lr=0.000114


Train Epoch 176: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=88.5, loss=0.324]


Epoch 176: train_loss=0.3242 train_acc=88.53 val_loss=0.3980 val_acc=88.58 lr=0.000113


Train Epoch 177: 100%|██████████| 391/391 [00:20<00:00, 19.35it/s, acc=88.8, loss=0.321]


Epoch 177: train_loss=0.3208 train_acc=88.81 val_loss=0.4106 val_acc=88.55 lr=0.000111


Train Epoch 178: 100%|██████████| 391/391 [00:20<00:00, 19.03it/s, acc=88.6, loss=0.324]


Epoch 178: train_loss=0.3238 train_acc=88.62 val_loss=0.4300 val_acc=88.13 lr=0.000110


Train Epoch 179: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=88.8, loss=0.322]


Epoch 179: train_loss=0.3218 train_acc=88.76 val_loss=0.4043 val_acc=88.75 lr=0.000108


Train Epoch 180: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=88.8, loss=0.317]


Epoch 180: train_loss=0.3168 train_acc=88.81 val_loss=0.3989 val_acc=88.54 lr=0.000107


Train Epoch 181: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=88.9, loss=0.315]


Epoch 181: train_loss=0.3147 train_acc=88.90 val_loss=0.4315 val_acc=87.88 lr=0.000105


Train Epoch 182: 100%|██████████| 391/391 [00:20<00:00, 18.98it/s, acc=89, loss=0.315]


Epoch 182: train_loss=0.3154 train_acc=88.98 val_loss=0.3978 val_acc=88.74 lr=0.000104


Train Epoch 183: 100%|██████████| 391/391 [00:20<00:00, 19.01it/s, acc=89, loss=0.312]


Epoch 183: train_loss=0.3121 train_acc=89.00 val_loss=0.4050 val_acc=88.87 lr=0.000102


Train Epoch 184: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=88.8, loss=0.317]


Epoch 184: train_loss=0.3168 train_acc=88.84 val_loss=0.4005 val_acc=88.74 lr=0.000101


Train Epoch 185: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=89.2, loss=0.306]


Epoch 185: train_loss=0.3059 train_acc=89.22 val_loss=0.4086 val_acc=88.47 lr=0.000099


Train Epoch 186: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=89.2, loss=0.31]


Epoch 186: train_loss=0.3103 train_acc=89.19 val_loss=0.4247 val_acc=88.37 lr=0.000098


Train Epoch 187: 100%|██████████| 391/391 [00:20<00:00, 19.15it/s, acc=89.2, loss=0.308]


Epoch 187: train_loss=0.3079 train_acc=89.22 val_loss=0.4018 val_acc=88.75 lr=0.000096


Train Epoch 188: 100%|██████████| 391/391 [00:20<00:00, 19.34it/s, acc=89.3, loss=0.307]


Epoch 188: train_loss=0.3066 train_acc=89.29 val_loss=0.4006 val_acc=88.92 lr=0.000095


Train Epoch 189: 100%|██████████| 391/391 [00:20<00:00, 19.32it/s, acc=89.4, loss=0.303]


Epoch 189: train_loss=0.3026 train_acc=89.37 val_loss=0.4047 val_acc=88.88 lr=0.000093


Train Epoch 190: 100%|██████████| 391/391 [00:20<00:00, 18.90it/s, acc=89.7, loss=0.295]


Epoch 190: train_loss=0.2949 train_acc=89.67 val_loss=0.4071 val_acc=88.81 lr=0.000092


Train Epoch 191: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=89.4, loss=0.3]


Epoch 191: train_loss=0.2997 train_acc=89.43 val_loss=0.4043 val_acc=88.68 lr=0.000090


Train Epoch 192: 100%|██████████| 391/391 [00:20<00:00, 19.17it/s, acc=89.4, loss=0.304]


Epoch 192: train_loss=0.3040 train_acc=89.41 val_loss=0.4004 val_acc=88.59 lr=0.000089


Train Epoch 193: 100%|██████████| 391/391 [00:20<00:00, 18.88it/s, acc=89.7, loss=0.296]


Epoch 193: train_loss=0.2963 train_acc=89.68 val_loss=0.3854 val_acc=89.42 lr=0.000087
Saved best model: 89.42


Train Epoch 194: 100%|██████████| 391/391 [00:20<00:00, 18.91it/s, acc=89.6, loss=0.297]


Epoch 194: train_loss=0.2973 train_acc=89.61 val_loss=0.3976 val_acc=88.89 lr=0.000086


Train Epoch 195: 100%|██████████| 391/391 [00:20<00:00, 19.16it/s, acc=89.8, loss=0.292]


Epoch 195: train_loss=0.2922 train_acc=89.76 val_loss=0.3891 val_acc=89.05 lr=0.000084


Train Epoch 196: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=89.8, loss=0.29]


Epoch 196: train_loss=0.2899 train_acc=89.84 val_loss=0.4067 val_acc=88.82 lr=0.000083


Train Epoch 197: 100%|██████████| 391/391 [00:20<00:00, 19.00it/s, acc=89.6, loss=0.293]


Epoch 197: train_loss=0.2930 train_acc=89.59 val_loss=0.4119 val_acc=88.78 lr=0.000082


Train Epoch 198: 100%|██████████| 391/391 [00:20<00:00, 19.42it/s, acc=90, loss=0.288]


Epoch 198: train_loss=0.2877 train_acc=89.99 val_loss=0.4134 val_acc=88.82 lr=0.000080


Train Epoch 199: 100%|██████████| 391/391 [00:20<00:00, 19.43it/s, acc=90, loss=0.287]


Epoch 199: train_loss=0.2872 train_acc=90.05 val_loss=0.4062 val_acc=88.93 lr=0.000079


Train Epoch 200: 100%|██████████| 391/391 [00:20<00:00, 19.34it/s, acc=90, loss=0.288]


Epoch 200: train_loss=0.2875 train_acc=89.99 val_loss=0.3988 val_acc=89.16 lr=0.000077


Train Epoch 201: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=90.1, loss=0.283]


Epoch 201: train_loss=0.2834 train_acc=90.10 val_loss=0.4159 val_acc=88.81 lr=0.000076


Train Epoch 202: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=90.1, loss=0.284]


Epoch 202: train_loss=0.2837 train_acc=90.10 val_loss=0.4117 val_acc=89.04 lr=0.000075


Train Epoch 203: 100%|██████████| 391/391 [00:20<00:00, 19.08it/s, acc=90.1, loss=0.283]


Epoch 203: train_loss=0.2830 train_acc=90.12 val_loss=0.4367 val_acc=88.40 lr=0.000073


Train Epoch 204: 100%|██████████| 391/391 [00:20<00:00, 19.14it/s, acc=90.2, loss=0.283]


Epoch 204: train_loss=0.2830 train_acc=90.21 val_loss=0.4081 val_acc=89.06 lr=0.000072


Train Epoch 205: 100%|██████████| 391/391 [00:20<00:00, 19.45it/s, acc=90.2, loss=0.28]


Epoch 205: train_loss=0.2797 train_acc=90.17 val_loss=0.3925 val_acc=89.07 lr=0.000070


Train Epoch 206: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=90.2, loss=0.278]


Epoch 206: train_loss=0.2779 train_acc=90.22 val_loss=0.4033 val_acc=89.18 lr=0.000069


Train Epoch 207: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=90.3, loss=0.276]


Epoch 207: train_loss=0.2762 train_acc=90.27 val_loss=0.4057 val_acc=88.90 lr=0.000068


Train Epoch 208: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=90.4, loss=0.275]


Epoch 208: train_loss=0.2755 train_acc=90.42 val_loss=0.4084 val_acc=88.90 lr=0.000066


Train Epoch 209: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=90.4, loss=0.273]


Epoch 209: train_loss=0.2733 train_acc=90.39 val_loss=0.4096 val_acc=89.04 lr=0.000065


Train Epoch 210: 100%|██████████| 391/391 [00:20<00:00, 18.99it/s, acc=90.5, loss=0.271]


Epoch 210: train_loss=0.2708 train_acc=90.50 val_loss=0.4091 val_acc=88.69 lr=0.000064


Train Epoch 211: 100%|██████████| 391/391 [00:20<00:00, 19.02it/s, acc=90.4, loss=0.273]


Epoch 211: train_loss=0.2733 train_acc=90.41 val_loss=0.4055 val_acc=89.21 lr=0.000062


Train Epoch 212: 100%|██████████| 391/391 [00:20<00:00, 19.27it/s, acc=90.3, loss=0.276]


Epoch 212: train_loss=0.2758 train_acc=90.32 val_loss=0.3999 val_acc=89.47 lr=0.000061
Saved best model: 89.47


Train Epoch 213: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=90.5, loss=0.275]


Epoch 213: train_loss=0.2746 train_acc=90.51 val_loss=0.3968 val_acc=89.17 lr=0.000060


Train Epoch 214: 100%|██████████| 391/391 [00:20<00:00, 19.28it/s, acc=90.9, loss=0.266]


Epoch 214: train_loss=0.2659 train_acc=90.90 val_loss=0.3983 val_acc=89.45 lr=0.000059


Train Epoch 215: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=90.7, loss=0.267]


Epoch 215: train_loss=0.2674 train_acc=90.65 val_loss=0.3972 val_acc=89.20 lr=0.000057


Train Epoch 216: 100%|██████████| 391/391 [00:20<00:00, 19.26it/s, acc=90.8, loss=0.264]


Epoch 216: train_loss=0.2641 train_acc=90.82 val_loss=0.3983 val_acc=89.19 lr=0.000056


Train Epoch 217: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=90.5, loss=0.27]


Epoch 217: train_loss=0.2705 train_acc=90.54 val_loss=0.3848 val_acc=89.64 lr=0.000055
Saved best model: 89.64


Train Epoch 218: 100%|██████████| 391/391 [00:20<00:00, 19.15it/s, acc=90.5, loss=0.271]


Epoch 218: train_loss=0.2712 train_acc=90.47 val_loss=0.4042 val_acc=89.10 lr=0.000054


Train Epoch 219: 100%|██████████| 391/391 [00:20<00:00, 19.11it/s, acc=90.7, loss=0.266]


Epoch 219: train_loss=0.2656 train_acc=90.65 val_loss=0.3986 val_acc=89.27 lr=0.000052


Train Epoch 220: 100%|██████████| 391/391 [00:20<00:00, 19.12it/s, acc=90.9, loss=0.262]


Epoch 220: train_loss=0.2623 train_acc=90.88 val_loss=0.3943 val_acc=89.54 lr=0.000051


Train Epoch 221: 100%|██████████| 391/391 [00:20<00:00, 18.73it/s, acc=90.9, loss=0.262]


Epoch 221: train_loss=0.2622 train_acc=90.93 val_loss=0.3916 val_acc=90.08 lr=0.000050
Saved best model: 90.08


Train Epoch 222: 100%|██████████| 391/391 [00:20<00:00, 19.40it/s, acc=90.9, loss=0.263]


Epoch 222: train_loss=0.2625 train_acc=90.88 val_loss=0.4077 val_acc=89.38 lr=0.000049


Train Epoch 223: 100%|██████████| 391/391 [00:20<00:00, 19.18it/s, acc=91.2, loss=0.252]


Epoch 223: train_loss=0.2519 train_acc=91.17 val_loss=0.4070 val_acc=89.16 lr=0.000048


Train Epoch 224: 100%|██████████| 391/391 [00:20<00:00, 19.05it/s, acc=91, loss=0.257]


Epoch 224: train_loss=0.2565 train_acc=91.02 val_loss=0.4028 val_acc=89.39 lr=0.000047


Train Epoch 225: 100%|██████████| 391/391 [00:20<00:00, 18.97it/s, acc=91.1, loss=0.258]


Epoch 225: train_loss=0.2576 train_acc=91.09 val_loss=0.4028 val_acc=89.30 lr=0.000045


Train Epoch 226: 100%|██████████| 391/391 [00:20<00:00, 19.06it/s, acc=91, loss=0.259]


Epoch 226: train_loss=0.2586 train_acc=90.99 val_loss=0.4011 val_acc=89.57 lr=0.000044


Train Epoch 227: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=91.1, loss=0.254]


Epoch 227: train_loss=0.2542 train_acc=91.11 val_loss=0.3969 val_acc=89.42 lr=0.000043


Train Epoch 228: 100%|██████████| 391/391 [00:20<00:00, 19.28it/s, acc=91.1, loss=0.254]


Epoch 228: train_loss=0.2544 train_acc=91.13 val_loss=0.4025 val_acc=89.74 lr=0.000042


Train Epoch 229: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=91.3, loss=0.251]


Epoch 229: train_loss=0.2507 train_acc=91.35 val_loss=0.3943 val_acc=89.51 lr=0.000041


Train Epoch 230: 100%|██████████| 391/391 [00:20<00:00, 18.85it/s, acc=91.1, loss=0.254]


Epoch 230: train_loss=0.2541 train_acc=91.11 val_loss=0.4064 val_acc=89.28 lr=0.000040


Train Epoch 231: 100%|██████████| 391/391 [00:20<00:00, 18.92it/s, acc=91.4, loss=0.245]


Epoch 231: train_loss=0.2454 train_acc=91.41 val_loss=0.3964 val_acc=89.49 lr=0.000039


Train Epoch 232: 100%|██████████| 391/391 [00:20<00:00, 19.27it/s, acc=91.2, loss=0.252]


Epoch 232: train_loss=0.2519 train_acc=91.25 val_loss=0.3933 val_acc=89.65 lr=0.000038


Train Epoch 233: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=91.4, loss=0.25]


Epoch 233: train_loss=0.2502 train_acc=91.40 val_loss=0.4048 val_acc=89.61 lr=0.000037


Train Epoch 234: 100%|██████████| 391/391 [00:20<00:00, 19.12it/s, acc=91.3, loss=0.25]


Epoch 234: train_loss=0.2500 train_acc=91.30 val_loss=0.4083 val_acc=89.19 lr=0.000036


Train Epoch 235: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=91.7, loss=0.239]


Epoch 235: train_loss=0.2391 train_acc=91.69 val_loss=0.4014 val_acc=89.57 lr=0.000035


Train Epoch 236: 100%|██████████| 391/391 [00:20<00:00, 18.80it/s, acc=91.4, loss=0.247]


Epoch 236: train_loss=0.2467 train_acc=91.41 val_loss=0.3935 val_acc=89.66 lr=0.000034


Train Epoch 237: 100%|██████████| 391/391 [00:20<00:00, 19.08it/s, acc=91.4, loss=0.245]


Epoch 237: train_loss=0.2448 train_acc=91.40 val_loss=0.4107 val_acc=89.41 lr=0.000033


Train Epoch 238: 100%|██████████| 391/391 [00:20<00:00, 18.84it/s, acc=91.4, loss=0.244]


Epoch 238: train_loss=0.2440 train_acc=91.45 val_loss=0.4025 val_acc=89.52 lr=0.000032


Train Epoch 239: 100%|██████████| 391/391 [00:20<00:00, 18.90it/s, acc=91.6, loss=0.239]


Epoch 239: train_loss=0.2390 train_acc=91.58 val_loss=0.4061 val_acc=89.69 lr=0.000031


Train Epoch 240: 100%|██████████| 391/391 [00:20<00:00, 19.01it/s, acc=91.7, loss=0.239]


Epoch 240: train_loss=0.2393 train_acc=91.70 val_loss=0.3981 val_acc=89.41 lr=0.000030


Train Epoch 241: 100%|██████████| 391/391 [00:20<00:00, 19.30it/s, acc=91.6, loss=0.242]


Epoch 241: train_loss=0.2417 train_acc=91.64 val_loss=0.3978 val_acc=89.49 lr=0.000029


Train Epoch 242: 100%|██████████| 391/391 [00:20<00:00, 19.01it/s, acc=91.6, loss=0.244]


Epoch 242: train_loss=0.2438 train_acc=91.61 val_loss=0.3905 val_acc=89.76 lr=0.000028


Train Epoch 243: 100%|██████████| 391/391 [00:20<00:00, 18.78it/s, acc=91.5, loss=0.24]


Epoch 243: train_loss=0.2400 train_acc=91.52 val_loss=0.4071 val_acc=89.47 lr=0.000027


Train Epoch 244: 100%|██████████| 391/391 [00:20<00:00, 19.19it/s, acc=91.6, loss=0.24]


Epoch 244: train_loss=0.2398 train_acc=91.62 val_loss=0.3931 val_acc=89.58 lr=0.000026


Train Epoch 245: 100%|██████████| 391/391 [00:20<00:00, 18.97it/s, acc=91.7, loss=0.235]


Epoch 245: train_loss=0.2354 train_acc=91.74 val_loss=0.4071 val_acc=89.52 lr=0.000025


Train Epoch 246: 100%|██████████| 391/391 [00:20<00:00, 19.49it/s, acc=91.7, loss=0.237]


Epoch 246: train_loss=0.2374 train_acc=91.69 val_loss=0.4000 val_acc=89.83 lr=0.000024


Train Epoch 247: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=91.7, loss=0.238]


Epoch 247: train_loss=0.2383 train_acc=91.67 val_loss=0.3930 val_acc=89.81 lr=0.000023


Train Epoch 248: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=91.9, loss=0.236]


Epoch 248: train_loss=0.2359 train_acc=91.86 val_loss=0.4005 val_acc=89.72 lr=0.000022


Train Epoch 249: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=91.9, loss=0.229]


Epoch 249: train_loss=0.2294 train_acc=91.91 val_loss=0.4071 val_acc=89.73 lr=0.000022


Train Epoch 250: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=91.9, loss=0.234]


Epoch 250: train_loss=0.2338 train_acc=91.88 val_loss=0.4187 val_acc=89.41 lr=0.000021


Train Epoch 251: 100%|██████████| 391/391 [00:20<00:00, 19.17it/s, acc=92, loss=0.229]


Epoch 251: train_loss=0.2293 train_acc=91.96 val_loss=0.4083 val_acc=89.73 lr=0.000020


Train Epoch 252: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=91.8, loss=0.236]


Epoch 252: train_loss=0.2357 train_acc=91.80 val_loss=0.4069 val_acc=89.83 lr=0.000019


Train Epoch 253: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=91.8, loss=0.234]


Epoch 253: train_loss=0.2343 train_acc=91.83 val_loss=0.4007 val_acc=89.87 lr=0.000018


Train Epoch 254: 100%|██████████| 391/391 [00:20<00:00, 19.32it/s, acc=92.1, loss=0.228]


Epoch 254: train_loss=0.2277 train_acc=92.08 val_loss=0.4104 val_acc=89.81 lr=0.000018


Train Epoch 255: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=92, loss=0.227]


Epoch 255: train_loss=0.2275 train_acc=92.00 val_loss=0.4035 val_acc=89.79 lr=0.000017


Train Epoch 256: 100%|██████████| 391/391 [00:20<00:00, 19.14it/s, acc=91.9, loss=0.232]


Epoch 256: train_loss=0.2319 train_acc=91.93 val_loss=0.3967 val_acc=89.93 lr=0.000016


Train Epoch 257: 100%|██████████| 391/391 [00:20<00:00, 19.28it/s, acc=92, loss=0.227]


Epoch 257: train_loss=0.2270 train_acc=92.03 val_loss=0.4066 val_acc=89.77 lr=0.000015


Train Epoch 258: 100%|██████████| 391/391 [00:20<00:00, 19.22it/s, acc=92, loss=0.228]


Epoch 258: train_loss=0.2279 train_acc=92.04 val_loss=0.4003 val_acc=89.79 lr=0.000015


Train Epoch 259: 100%|██████████| 391/391 [00:20<00:00, 18.87it/s, acc=92.1, loss=0.228]


Epoch 259: train_loss=0.2276 train_acc=92.05 val_loss=0.4038 val_acc=89.75 lr=0.000014


Train Epoch 260: 100%|██████████| 391/391 [00:20<00:00, 19.20it/s, acc=92.1, loss=0.225]


Epoch 260: train_loss=0.2247 train_acc=92.10 val_loss=0.3966 val_acc=89.92 lr=0.000013


Train Epoch 261: 100%|██████████| 391/391 [00:20<00:00, 18.98it/s, acc=92.2, loss=0.224]


Epoch 261: train_loss=0.2239 train_acc=92.21 val_loss=0.4069 val_acc=89.57 lr=0.000013


Train Epoch 262: 100%|██████████| 391/391 [00:20<00:00, 19.24it/s, acc=92.3, loss=0.225]


Epoch 262: train_loss=0.2254 train_acc=92.26 val_loss=0.4063 val_acc=89.79 lr=0.000012


Train Epoch 263: 100%|██████████| 391/391 [00:20<00:00, 18.65it/s, acc=92, loss=0.225]


Epoch 263: train_loss=0.2251 train_acc=92.05 val_loss=0.4024 val_acc=89.85 lr=0.000011


Train Epoch 264: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=92.1, loss=0.227]


Epoch 264: train_loss=0.2274 train_acc=92.14 val_loss=0.4054 val_acc=89.86 lr=0.000011


Train Epoch 265: 100%|██████████| 391/391 [00:20<00:00, 18.93it/s, acc=92.2, loss=0.225]


Epoch 265: train_loss=0.2247 train_acc=92.20 val_loss=0.3983 val_acc=89.99 lr=0.000010


Train Epoch 266: 100%|██████████| 391/391 [00:20<00:00, 18.82it/s, acc=92.3, loss=0.221]


Epoch 266: train_loss=0.2212 train_acc=92.33 val_loss=0.3975 val_acc=89.97 lr=0.000010


Train Epoch 267: 100%|██████████| 391/391 [00:20<00:00, 18.82it/s, acc=92.1, loss=0.226]


Epoch 267: train_loss=0.2257 train_acc=92.10 val_loss=0.3967 val_acc=89.81 lr=0.000009


Train Epoch 268: 100%|██████████| 391/391 [00:20<00:00, 18.78it/s, acc=92.1, loss=0.226]


Epoch 268: train_loss=0.2263 train_acc=92.13 val_loss=0.4007 val_acc=90.09 lr=0.000009
Saved best model: 90.09


Train Epoch 269: 100%|██████████| 391/391 [00:20<00:00, 18.98it/s, acc=92.2, loss=0.225]


Epoch 269: train_loss=0.2250 train_acc=92.17 val_loss=0.3979 val_acc=89.90 lr=0.000008


Train Epoch 270: 100%|██████████| 391/391 [00:20<00:00, 19.12it/s, acc=92, loss=0.229]


Epoch 270: train_loss=0.2288 train_acc=92.00 val_loss=0.4042 val_acc=89.93 lr=0.000008


Train Epoch 271: 100%|██████████| 391/391 [00:20<00:00, 19.13it/s, acc=92.2, loss=0.223]


Epoch 271: train_loss=0.2229 train_acc=92.19 val_loss=0.4003 val_acc=89.90 lr=0.000007


Train Epoch 272: 100%|██████████| 391/391 [00:21<00:00, 18.34it/s, acc=92.1, loss=0.225]


Epoch 272: train_loss=0.2249 train_acc=92.12 val_loss=0.4025 val_acc=89.92 lr=0.000007


Train Epoch 273: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=92.3, loss=0.223]


Epoch 273: train_loss=0.2225 train_acc=92.28 val_loss=0.4014 val_acc=89.90 lr=0.000006


Train Epoch 274: 100%|██████████| 391/391 [00:20<00:00, 19.14it/s, acc=92.2, loss=0.224]


Epoch 274: train_loss=0.2238 train_acc=92.19 val_loss=0.4039 val_acc=89.83 lr=0.000006


Train Epoch 275: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=92.4, loss=0.222]


Epoch 275: train_loss=0.2220 train_acc=92.42 val_loss=0.4086 val_acc=89.84 lr=0.000005


Train Epoch 276: 100%|██████████| 391/391 [00:20<00:00, 19.40it/s, acc=92.5, loss=0.219]


Epoch 276: train_loss=0.2195 train_acc=92.45 val_loss=0.4063 val_acc=90.10 lr=0.000005
Saved best model: 90.1


Train Epoch 277: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=92.4, loss=0.221]


Epoch 277: train_loss=0.2209 train_acc=92.41 val_loss=0.3999 val_acc=90.08 lr=0.000004


Train Epoch 278: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=92.3, loss=0.22]


Epoch 278: train_loss=0.2203 train_acc=92.26 val_loss=0.4018 val_acc=89.96 lr=0.000004


Train Epoch 279: 100%|██████████| 391/391 [00:20<00:00, 19.37it/s, acc=92.5, loss=0.219]


Epoch 279: train_loss=0.2185 train_acc=92.49 val_loss=0.4020 val_acc=90.06 lr=0.000004


Train Epoch 280: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=92.4, loss=0.219]


Epoch 280: train_loss=0.2194 train_acc=92.35 val_loss=0.4048 val_acc=90.05 lr=0.000003


Train Epoch 281: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=92.3, loss=0.221]


Epoch 281: train_loss=0.2206 train_acc=92.30 val_loss=0.4063 val_acc=89.90 lr=0.000003


Train Epoch 282: 100%|██████████| 391/391 [00:20<00:00, 19.25it/s, acc=92.2, loss=0.223]


Epoch 282: train_loss=0.2233 train_acc=92.25 val_loss=0.4038 val_acc=89.88 lr=0.000003


Train Epoch 283: 100%|██████████| 391/391 [00:20<00:00, 19.51it/s, acc=92.4, loss=0.221]


Epoch 283: train_loss=0.2209 train_acc=92.38 val_loss=0.4005 val_acc=90.03 lr=0.000002


Train Epoch 284: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=92.5, loss=0.217]


Epoch 284: train_loss=0.2173 train_acc=92.46 val_loss=0.4023 val_acc=90.02 lr=0.000002


Train Epoch 285: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, acc=92.3, loss=0.224]


Epoch 285: train_loss=0.2236 train_acc=92.27 val_loss=0.4017 val_acc=89.96 lr=0.000002


Train Epoch 286: 100%|██████████| 391/391 [00:20<00:00, 19.36it/s, acc=92.3, loss=0.222]


Epoch 286: train_loss=0.2219 train_acc=92.29 val_loss=0.4008 val_acc=90.01 lr=0.000002


Train Epoch 287: 100%|██████████| 391/391 [00:20<00:00, 19.08it/s, acc=92.2, loss=0.224]


Epoch 287: train_loss=0.2240 train_acc=92.22 val_loss=0.4004 val_acc=90.02 lr=0.000001


Train Epoch 288: 100%|██████████| 391/391 [00:20<00:00, 19.27it/s, acc=92.3, loss=0.221]


Epoch 288: train_loss=0.2212 train_acc=92.33 val_loss=0.4009 val_acc=90.04 lr=0.000001


Train Epoch 289: 100%|██████████| 391/391 [00:20<00:00, 19.03it/s, acc=92.5, loss=0.215]


Epoch 289: train_loss=0.2152 train_acc=92.49 val_loss=0.4019 val_acc=90.01 lr=0.000001


Train Epoch 290: 100%|██████████| 391/391 [00:20<00:00, 19.10it/s, acc=92.5, loss=0.215]


Epoch 290: train_loss=0.2152 train_acc=92.51 val_loss=0.4030 val_acc=89.89 lr=0.000001


Train Epoch 291: 100%|██████████| 391/391 [00:20<00:00, 18.96it/s, acc=92.3, loss=0.218]


Epoch 291: train_loss=0.2179 train_acc=92.31 val_loss=0.4019 val_acc=89.99 lr=0.000001


Train Epoch 292: 100%|██████████| 391/391 [00:20<00:00, 19.31it/s, acc=92.4, loss=0.219]


Epoch 292: train_loss=0.2186 train_acc=92.42 val_loss=0.4022 val_acc=89.99 lr=0.000001


Train Epoch 293: 100%|██████████| 391/391 [00:20<00:00, 19.14it/s, acc=92.3, loss=0.22]


Epoch 293: train_loss=0.2203 train_acc=92.34 val_loss=0.4018 val_acc=89.98 lr=0.000000


Train Epoch 294: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, acc=92.4, loss=0.218]


Epoch 294: train_loss=0.2175 train_acc=92.45 val_loss=0.4018 val_acc=89.97 lr=0.000000


Train Epoch 295: 100%|██████████| 391/391 [00:20<00:00, 19.07it/s, acc=92.8, loss=0.21]


Epoch 295: train_loss=0.2097 train_acc=92.80 val_loss=0.4018 val_acc=89.95 lr=0.000000


Train Epoch 296: 100%|██████████| 391/391 [00:20<00:00, 19.12it/s, acc=92.6, loss=0.213]


Epoch 296: train_loss=0.2132 train_acc=92.63 val_loss=0.4019 val_acc=89.96 lr=0.000000


Train Epoch 297: 100%|██████████| 391/391 [00:20<00:00, 19.17it/s, acc=92.4, loss=0.216]


Epoch 297: train_loss=0.2158 train_acc=92.44 val_loss=0.4018 val_acc=89.96 lr=0.000000


Train Epoch 298: 100%|██████████| 391/391 [00:20<00:00, 19.52it/s, acc=92.4, loss=0.219]


Epoch 298: train_loss=0.2186 train_acc=92.37 val_loss=0.4018 val_acc=89.96 lr=0.000000


Train Epoch 299: 100%|██████████| 391/391 [00:20<00:00, 19.21it/s, acc=92.1, loss=0.221]


Epoch 299: train_loss=0.2211 train_acc=92.09 val_loss=0.4017 val_acc=89.96 lr=0.000000
Best test accuracy: 90.10%
