<a href="https://colab.research.google.com/github/akanshasun/cnn-vgg6-config-assignment/blob/main/vgg6_akansha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import os, math, time, random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn.init as init
import pandas as pd
import argparse
import wandb

SEED = 42
random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True  # OK for CNNs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


Device: cuda


In [2]:
from PIL import Image, ImageEnhance, ImageOps
import random
import numpy as np
import torch


class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask
        return img

class SubPolicy(object):
    def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)):
        self.p1 = p1
        self.op1=operation1
        self.magnitude_idx1=magnitude_idx1
        self.p2 = p2
        self.op2=operation2
        self.magnitude_idx2=magnitude_idx2
        self.fillcolor=fillcolor
        self.init = 0

    def gen(self, operation1, magnitude_idx1, operation2, magnitude_idx2, fillcolor):
        ranges = {
            "shearX": np.linspace(0, 0.3, 10),
            "shearY": np.linspace(0, 0.3, 10),
            "translateX": np.linspace(0, 150 / 331, 10),
            "translateY": np.linspace(0, 150 / 331, 10),
            "rotate": np.linspace(0, 30, 10),
            "color": np.linspace(0.0, 0.9, 10),
            "posterize": np.round(np.linspace(8, 4, 10), 0).astype(int),
            "solarize": np.linspace(256, 0, 10),
            "contrast": np.linspace(0.0, 0.9, 10),
            "sharpness": np.linspace(0.0, 0.9, 10),
            "brightness": np.linspace(0.0, 0.9, 10),
            "autocontrast": [0] * 10,
            "equalize": [0] * 10,
            "invert": [0] * 10
        }
        def rotate_with_fill(img, magnitude):
            rot = img.convert("RGBA").rotate(magnitude)
            return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(img.mode)

        func = {
            "shearX": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, magnitude *
                                         random.choice([-1, 1]), 0, 0, 1, 0),
                Image.BICUBIC, fillcolor=fillcolor),
            "shearY": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, 0, magnitude *
                                         random.choice([-1, 1]), 1, 0),
                Image.BICUBIC, fillcolor=fillcolor),
            "translateX": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, magnitude *
                                         img.size[0] * random.choice([-1, 1]), 0, 1, 0),
                fillcolor=fillcolor),
            "translateY": lambda img, magnitude: img.transform(
                img.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude *
                                         img.size[1] * random.choice([-1, 1])),
                fillcolor=fillcolor),
            "rotate": lambda img, magnitude: rotate_with_fill(img, magnitude),
            # "rotate": lambda img, magnitude: img.rotate(magnitude * random.choice([-1, 1])),
            "color": lambda img, magnitude: ImageEnhance.Color(img).enhance(1 + magnitude * random.choice([-1, 1])),
            "posterize": lambda img, magnitude: ImageOps.posterize(img, magnitude),
            "solarize": lambda img, magnitude: ImageOps.solarize(img, magnitude),
            "contrast": lambda img, magnitude: ImageEnhance.Contrast(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "sharpness": lambda img, magnitude: ImageEnhance.Sharpness(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "brightness": lambda img, magnitude: ImageEnhance.Brightness(img).enhance(
                1 + magnitude * random.choice([-1, 1])),
            "autocontrast": lambda img, magnitude: ImageOps.autocontrast(img),
            "equalize": lambda img, magnitude: ImageOps.equalize(img),
            "invert": lambda img, magnitude: ImageOps.invert(img)
        }

        self.operation1 = func[operation1]
        self.magnitude1 = ranges[operation1][magnitude_idx1]
        self.operation2 = func[operation2]
        self.magnitude2 = ranges[operation2][magnitude_idx2]

    def __call__(self, img):
        if self.init == 0:
            self.gen(self.op1, self.magnitude_idx1, self.op2, self.magnitude_idx2, self.fillcolor)
            self.init = 1
        if random.random() < self.p1:
            img = self.operation1(img, self.magnitude1)
        if random.random() < self.p2:
            img = self.operation2(img, self.magnitude2)
        return img

class ImageNetPolicy(object):
    """ Randomly choose one of the best 24 Sub-policies on ImageNet.
        Example:
        >>> policy = ImageNetPolicy()
        >>> transformed = policy(image)
        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     ImageNetPolicy(),
        >>>     transforms.ToTensor()])
    """

    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor),
            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor),
            SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor),
            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),

            SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor),
            SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor),
            SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor),
            SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor),
            SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor),

            SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor),
            SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor),
            SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor),
            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor),

            SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor),
            SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor),
            SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor),
            SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor),
            SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor),

            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment ImageNet Policy"

class CIFAR10Policy(object):
    """ Randomly choose one of the best 25 Sub-policies on CIFAR10.

        Example:
        >>> policy = CIFAR10Policy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     CIFAR10Policy(),
        >>>     transforms.ToTensor()])
    """

    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor),
            SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor),
            SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor),
            SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor),

            SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor),
            SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor),
            SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor),
            SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor),
            SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor),

            SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor),
            SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor),
            SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor),
            SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor),
            SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor),

            SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor),
            SubPolicy(0.2, "equalize", 8, 0.8, "equalize", 4, fillcolor),
            SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor),
            SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor),
            SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor),

            SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor),
            SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor),
            SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment CIFAR10 Policy"


In [3]:
# -----------------------------
# CIFAR-10 Normalization
# -----------------------------
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD  = (0.2023, 0.1994, 0.2010)

In [4]:
def get_transforms(strong_aug=False):
    train_list = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(p=0.5),
    ]
    if strong_aug:
        from torchvision.transforms import AutoAugment, AutoAugmentPolicy
        train_list.append(AutoAugment(AutoAugmentPolicy.CIFAR10))

    train_list += [
        transforms.ToTensor(),
        transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD),
    ]
    if strong_aug:
        train_list.append(transforms.RandomErasing(p=0.25, scale=(0.02,0.1), ratio=(0.3,3.3)))

    train_tfms = transforms.Compose(train_list)
    test_tfms  = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
    ])
    return train_tfms, test_tfms

def make_loaders(batch_size=128, val_ratio=0.1, strong_aug=False):
    train_tfms, test_tfms = get_transforms(strong_aug)
    train_full = datasets.CIFAR10("./data", train=True,  transform=train_tfms, download=True)
    test_set   = datasets.CIFAR10("./data", train=False, transform=test_tfms, download=True)

    val_size = int(len(train_full)*val_ratio)
    train_size = len(train_full)-val_size
    train_set, val_set = random_split(train_full, [train_size,val_size],
                                      generator=torch.Generator().manual_seed(SEED))
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True,  num_workers=2, pin_memory=True)
    val_loader   = DataLoader(val_set,   batch_size=256,      shuffle=False, num_workers=2)
    test_loader  = DataLoader(test_set,  batch_size=256,      shuffle=False, num_workers=2)
    return train_loader, val_loader, test_loader


In [5]:
def get_activation(name):
    n = name.lower()
    if n=="relu": return nn.ReLU(inplace=True)
    if n=="silu": return nn.SiLU(inplace=True)
    if n=="gelu": return nn.GELU()
    if n=="tanh": return nn.Tanh()
    if n=="sigmoid": return nn.Sigmoid()
    return nn.ReLU(inplace=True)

class VGG6(nn.Module):
    def __init__(self, num_classes=10, act="relu", use_bn=True, p_drop=0.5):
        super().__init__()
        Act=lambda: get_activation(act)
        def conv_block(i,o):
            layers=[nn.Conv2d(i,o,3,padding=1)]
            if use_bn: layers.append(nn.BatchNorm2d(o))
            layers.append(Act())
            return nn.Sequential(*layers)

        self.features=nn.Sequential(
            conv_block(3,64), conv_block(64,64), nn.MaxPool2d(2),
            conv_block(64,128), conv_block(128,128), nn.MaxPool2d(2),
            conv_block(128,256), conv_block(256,256), nn.MaxPool2d(2)
        )
        self.classifier=nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*4*4,512), Act(), nn.Dropout(p_drop),
            nn.Linear(512,num_classes)
        )

    def forward(self,x):
        return self.classifier(self.features(x))


In [6]:
def accuracy(logits, y):
    return (logits.argmax(1)==y).float().mean().item()

@torch.no_grad()
def evaluate(model, loader, crit):
    model.eval(); tot_loss=tot_acc=tot=0
    for x,y in loader:
        x,y=x.to(device),y.to(device)
        out=model(x)
        loss=crit(out,y)
        tot_loss+=loss.item()*x.size(0)
        tot_acc+=(out.argmax(1)==y).sum().item()
        tot+=x.size(0)
    return tot_loss/tot, tot_acc/tot


In [7]:
def train_vgg6(config):
    wandb.init(project="cifar10-vgg6-assignment", config=config)
    cfg=wandb.config

    train_loader,val_loader,test_loader=make_loaders(cfg.batch_size,0.1,cfg.strong_aug)
    model=VGG6(act=cfg.act).to(device)
    crit=nn.CrossEntropyLoss(label_smoothing=0.1)
    opt=optim.SGD(model.parameters(), lr=cfg.lr, momentum=0.9, weight_decay=5e-4)
    sch=optim.lr_scheduler.CosineAnnealingLR(opt,T_max=cfg.epochs)

    wandb.define_metric("global_step")
    wandb.define_metric("epoch")
    wandb.define_metric("train/*", step_metric="global_step")
    wandb.define_metric("val/*", step_metric="epoch")

    global_step=0
    best_acc=0

    for epoch in range(1,cfg.epochs+1):
        model.train()
        ep_loss=ep_acc=seen=0
        for x,y in train_loader:
            x,y=x.to(device),y.to(device)
            opt.zero_grad(set_to_none=True)
            out=model(x)
            loss=crit(out,y)
            loss.backward(); opt.step()
            acc=accuracy(out,y)
            ep_loss+=loss.item()*x.size(0); ep_acc+=acc*x.size(0); seen+=x.size(0)
            global_step+=1
            wandb.log({"global_step":global_step,"train/loss":loss.item(),"train/acc":acc})
        sch.step()
        train_loss,train_acc=ep_loss/seen,ep_acc/seen
        val_loss,val_acc=evaluate(model,val_loader,crit)
        test_loss,test_acc=evaluate(model,test_loader,crit)
        wandb.log({"epoch":epoch,
                   "train/epoch_loss":train_loss,"train/epoch_acc":train_acc,
                   "val/loss":val_loss,"val/acc":val_acc,
                   "test/loss":test_loss,"test/acc":test_acc})
        if test_acc>best_acc: best_acc=test_acc
        print(f"Epoch {epoch:03d}: train_acc={train_acc*100:.2f} val_acc={val_acc*100:.2f} test_acc={test_acc*100:.2f}")
    wandb.summary["best_test_acc"]=best_acc
    wandb.finish()


In [9]:
config={
    "act":"relu",          # try: relu, silu, gelu, tanh, sigmoid
    "optimizer":"sgd",
    "lr":0.1,
    "batch_size":128,
    "epochs":5,           # increase to 60 for better acc
    "strong_aug":False
}
train_vgg6(config)


0,1
epoch,▁
global_step,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇███
test/acc,▁
test/loss,▁
train/acc,▁▁▁▁▁▄▂▃▄▄▃▅▃▃▂▃▆▅▄▂▇▅▅▄▄▃▇▆▅▃▄█▃▅▅▅▅▅▇▇
train/epoch_acc,▁
train/epoch_loss,▁
train/loss,██▇▇▇▅▇▅▆▅▆▅▄▆▄▅▄▅▄▂▃▃▂▃▁▁▃▁▅▁▂▂▃▂▃▁▃▂▁▃
val/acc,▁
val/loss,▁

0,1
epoch,1.0
global_step,523.0
test/acc,0.2129
test/loss,2.0386
train/acc,0.30469
train/epoch_acc,0.16653
train/epoch_loss,2.19586
train/loss,2.06074
val/acc,0.2076
val/loss,2.04809


Epoch 001: train_acc=17.08 val_acc=24.24 test_acc=25.44
Epoch 002: train_acc=30.03 val_acc=37.50 test_acc=39.26
Epoch 003: train_acc=45.56 val_acc=41.76 test_acc=43.85
Epoch 004: train_acc=57.98 val_acc=60.28 test_acc=61.15
Epoch 005: train_acc=66.16 val_acc=68.38 test_acc=68.97


0,1
epoch,▁▃▅▆█
global_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█████
test/acc,▁▃▄▇█
test/loss,█▆▆▂▁
train/acc,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▄▅▄▆▄▅▇▇▆█▇▇▇▇▇▇█▆
train/epoch_acc,▁▃▅▇█
train/epoch_loss,█▆▄▂▁
train/loss,█▄▄▄▄▄▄▄▃▃▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val/acc,▁▃▄▇█
val/loss,█▆▆▃▁

0,1
best_test_acc,0.6897
epoch,5
global_step,1760
test/acc,0.6897
test/loss,1.20129
train/acc,0.61111
train/epoch_acc,0.66156
train/epoch_loss,1.30194
train/loss,1.28831
val/acc,0.6838


In [10]:
import wandb

# Wrapper so W&B can call your trainer without args
def sweep_train_vgg6():
    with wandb.init(project="cifar10-vgg6-assignment") as run:
        cfg = dict(wandb.config)
        # hand off to your existing trainer that expects a dict-like config
        train_vgg6(cfg)

# Safer ranges: work across SGD-like & Adam-like optimizers
sweep_config = {
    "method": "random",  # or "bayes" / "grid"
    "metric": {"name": "val/acc", "goal": "maximize"},
    "parameters": {
        # 10 activations
        "act": {
            "values": [
                "relu", "silu", "gelu", "tanh", "sigmoid",
                "leakyrelu", "elu", "selu", "softplus", "hardtanh"
            ]
        },
        # 10 optimizers
        "optimizer": {
            "values": [
                "sgd", "nesterov-sgd", "adam", "adamw", "nadam",
                "rmsprop", "adagrad", "adamax", "asgd", "lbfgs"
            ]
        },
        "lr": {"values": [0.1, 0.05, 0.01, 0.005, 0.001]},
        "batch_size": {"values": [64, 128, 256, 512]},
        "epochs": {"values": [10, 20, 40, 60]},
        "strong_aug": {"values": [False]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="cifar10-vgg6-assignment")
print("Sweep ID:", sweep_id)

# Launch N trials (each will call sweep_train_vgg6 -> train_vgg6(config))
wandb.agent(sweep_id, function=sweep_train_vgg6, count=6)


Create sweep with ID: g9ou8anb
Sweep URL: https://wandb.ai/ee23m505-iitmaana/cifar10-vgg6-assignment/sweeps/g9ou8anb
Sweep ID: g9ou8anb


[34m[1mwandb[0m: Agent Starting Run: 35fhivby with config:
[34m[1mwandb[0m: 	act: silu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=31.70 val_acc=43.18 test_acc=45.14
Epoch 002: train_acc=46.23 val_acc=51.46 test_acc=54.04
Epoch 003: train_acc=53.19 val_acc=57.02 test_acc=58.40
Epoch 004: train_acc=58.20 val_acc=61.64 test_acc=63.08
Epoch 005: train_acc=61.17 val_acc=63.40 test_acc=65.49
Epoch 006: train_acc=63.54 val_acc=65.38 test_acc=67.23
Epoch 007: train_acc=64.90 val_acc=65.58 test_acc=68.32
Epoch 008: train_acc=65.89 val_acc=66.50 test_acc=69.00
Epoch 009: train_acc=66.40 val_acc=67.14 test_acc=68.92
Epoch 010: train_acc=66.81 val_acc=67.72 test_acc=69.51


0,1
epoch,▁▂▃▃▄▅▆▆▇█
global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
test/acc,▁▄▅▆▇▇████
test/loss,█▆▄▃▂▂▁▁▁▁
train/acc,▁▁▂▂▂▃▄▄▄▅▄▅▅▅▆▅▆▆▆▇▇▆▆▆▇▇▆▇▇▇▇▇█▇▇▇▇█▇▇
train/epoch_acc,▁▄▅▆▇▇████
train/epoch_loss,█▅▄▃▂▂▁▁▁▁
train/loss,█▇▆▆▆▅▅▅▄▅▄▄▃▃▄▃▂▃▃▃▂▂▂▂▃▂▂▃▁▂▂▂▁▂▂▂▃▂▂▂
val/acc,▁▃▅▆▇▇▇███
val/loss,█▆▄▃▂▂▂▁▁▁

0,1
best_test_acc,0.6951
epoch,10
global_step,1760
test/acc,0.6951
test/loss,1.21114
train/acc,0.66
train/epoch_acc,0.66807
train/epoch_loss,1.26796
train/loss,1.2902
val/acc,0.6772


[34m[1mwandb[0m: Agent Starting Run: 96eenjrd with config:
[34m[1mwandb[0m: 	act: softplus
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 0.1
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=13.80 val_acc=15.92 test_acc=15.89
Epoch 002: train_acc=19.16 val_acc=28.72 test_acc=30.43
Epoch 003: train_acc=35.29 val_acc=48.74 test_acc=50.12
Epoch 004: train_acc=52.12 val_acc=54.66 test_acc=54.72
Epoch 005: train_acc=62.49 val_acc=63.08 test_acc=63.54
Epoch 006: train_acc=69.48 val_acc=58.98 test_acc=57.59
Epoch 007: train_acc=74.38 val_acc=70.08 test_acc=70.75
Epoch 008: train_acc=77.48 val_acc=74.72 test_acc=74.78
Epoch 009: train_acc=79.46 val_acc=75.70 test_acc=76.88
Epoch 010: train_acc=81.46 val_acc=78.28 test_acc=78.51
Epoch 011: train_acc=83.43 val_acc=80.26 test_acc=82.36
Epoch 012: train_acc=85.18 val_acc=80.82 test_acc=81.51
Epoch 013: train_acc=86.64 val_acc=83.20 test_acc=83.90
Epoch 014: train_acc=88.06 val_acc=85.78 test_acc=86.16
Epoch 015: train_acc=89.61 val_acc=85.78 test_acc=86.57
Epoch 016: train_acc=91.05 val_acc=87.14 test_acc=87.57
Epoch 017: train_acc=92.27 val_acc=88.34 test_acc=89.44
Epoch 018: train_acc=93.38 val_acc=88.70 test_ac

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇████
test/acc,▁▂▄▅▅▅▆▇▇▇▇▇▇███████
test/loss,█▇▅▅▄▅▃▃▃▂▂▂▂▂▁▁▁▁▁▁
train/acc,▁▁▁▁▂▄▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇███▇▇▇█████
train/epoch_acc,▁▁▃▄▅▆▆▇▇▇▇▇▇▇██████
train/epoch_loss,█▇▆▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
train/loss,██▇▆▅▄▄▄▃▃▃▄▃▂▃▂▂▂▂▃▂▂▂▂▂▂▁▂▂▂▂▂▁▁▁▁▁▁▁▁
val/acc,▁▂▄▅▅▅▆▇▇▇▇▇▇███████
val/loss,█▇▅▅▄▅▃▃▃▂▂▂▂▂▁▁▁▁▁▁

0,1
best_test_acc,0.9058
epoch,20
global_step,7040
test/acc,0.9058
test/loss,0.74414
train/acc,0.95833
train/epoch_acc,0.94451
train/epoch_loss,0.70264
train/loss,0.66037
val/acc,0.903


[34m[1mwandb[0m: Agent Starting Run: k5xufcgk with config:
[34m[1mwandb[0m: 	act: silu
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	optimizer: adamax
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=36.16 val_acc=48.30 test_acc=52.32
Epoch 002: train_acc=53.06 val_acc=58.70 test_acc=61.76
Epoch 003: train_acc=61.60 val_acc=64.50 test_acc=67.14
Epoch 004: train_acc=66.58 val_acc=66.82 test_acc=70.64
Epoch 005: train_acc=69.96 val_acc=71.12 test_acc=73.25
Epoch 006: train_acc=72.56 val_acc=71.62 test_acc=74.92
Epoch 007: train_acc=74.62 val_acc=73.36 test_acc=75.44
Epoch 008: train_acc=76.04 val_acc=74.80 test_acc=75.83
Epoch 009: train_acc=77.60 val_acc=76.32 test_acc=78.86
Epoch 010: train_acc=78.67 val_acc=77.80 test_acc=79.30
Epoch 011: train_acc=79.77 val_acc=77.70 test_acc=79.16
Epoch 012: train_acc=80.72 val_acc=78.74 test_acc=79.86
Epoch 013: train_acc=81.73 val_acc=78.42 test_acc=79.83
Epoch 014: train_acc=82.24 val_acc=81.64 test_acc=82.11
Epoch 015: train_acc=83.25 val_acc=80.20 test_acc=81.50
Epoch 016: train_acc=83.78 val_acc=79.82 test_acc=81.86
Epoch 017: train_acc=84.06 val_acc=80.94 test_acc=82.02
Epoch 018: train_acc=84.87 val_acc=82.18 test_ac

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
global_step,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
test/acc,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█▇████████████████████
test/loss,█▆▅▅▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/acc,▁▂▂▃▄▅▅▅▅▅▆▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇██▇██▇▇███
train/epoch_acc,▁▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██████████████████████
train/epoch_loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,█▇▆▅▄▄▅▄▄▄▄▄▄▄▃▃▃▄▃▃▃▂▃▂▂▂▂▂▁▂▁▁▂▁▂▁▁▁▁▁
val/acc,▁▃▄▅▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇███████████████████
val/loss,█▇▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_acc,0.8805
epoch,60
global_step,5280
test/acc,0.8795
test/loss,0.82365
train/acc,0.9386
train/epoch_acc,0.93451
train/epoch_loss,0.72828
train/loss,0.71256
val/acc,0.8762


[34m[1mwandb[0m: Agent Starting Run: 7ikw94x8 with config:
[34m[1mwandb[0m: 	act: selu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=44.71 val_acc=55.86 test_acc=58.81
Epoch 002: train_acc=61.82 val_acc=66.72 test_acc=69.30
Epoch 003: train_acc=68.56 val_acc=68.78 test_acc=70.68
Epoch 004: train_acc=72.72 val_acc=73.68 test_acc=74.67
Epoch 005: train_acc=75.73 val_acc=77.14 test_acc=77.87
Epoch 006: train_acc=77.82 val_acc=77.72 test_acc=79.05
Epoch 007: train_acc=79.52 val_acc=78.90 test_acc=80.76
Epoch 008: train_acc=81.20 val_acc=78.48 test_acc=79.58
Epoch 009: train_acc=82.46 val_acc=80.06 test_acc=80.16
Epoch 010: train_acc=83.59 val_acc=81.10 test_acc=81.73
Epoch 011: train_acc=84.66 val_acc=82.74 test_acc=83.98
Epoch 012: train_acc=85.89 val_acc=83.78 test_acc=84.08
Epoch 013: train_acc=86.64 val_acc=84.56 test_acc=84.76
Epoch 014: train_acc=87.50 val_acc=85.40 test_acc=86.20
Epoch 015: train_acc=88.13 val_acc=85.98 test_acc=86.45
Epoch 016: train_acc=88.78 val_acc=85.84 test_acc=86.79
Epoch 017: train_acc=89.26 val_acc=86.18 test_acc=86.45
Epoch 018: train_acc=89.54 val_acc=86.76 test_ac

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
global_step,▁▁▁▂▂▂▂▂▂▂▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
test/acc,▁▄▄▅▆▆▆▆▆▇▇▇▇███████
test/loss,█▆▅▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁
train/acc,▁▂▃▃▃▄▄▄▄▄▅▅▇▅▆▆▆▆▆▆▇▆▆▇▆▆▇▇▇▇▆▆▇▇█▆▇▇▇▇
train/epoch_acc,▁▄▅▅▆▆▆▇▇▇▇▇▇███████
train/epoch_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁
train/loss,█▇▆▅▄▃▃▃▃▂▃▃▂▃▂▂▂▃▂▃▂▁▂▁▂▂▁▂▂▂▂▂▁▁▁▁▁▁▁▁
val/acc,▁▃▄▅▆▆▆▆▆▇▇▇▇███████
val/loss,█▆▅▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁

0,1
best_test_acc,0.8771
epoch,20
global_step,7040
test/acc,0.8752
test/loss,0.82391
train/acc,0.86111
train/epoch_acc,0.9016
train/epoch_loss,0.78583
train/loss,0.84514
val/acc,0.872


[34m[1mwandb[0m: Agent Starting Run: dk2zyb22 with config:
[34m[1mwandb[0m: 	act: softplus
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	optimizer: lbfgs
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=39.55 val_acc=49.68 test_acc=52.12
Epoch 002: train_acc=55.88 val_acc=50.52 test_acc=54.60
Epoch 003: train_acc=63.65 val_acc=67.02 test_acc=69.44
Epoch 004: train_acc=68.97 val_acc=70.62 test_acc=73.01
Epoch 005: train_acc=73.18 val_acc=67.30 test_acc=69.63
Epoch 006: train_acc=75.85 val_acc=75.22 test_acc=76.69
Epoch 007: train_acc=77.90 val_acc=75.70 test_acc=76.47
Epoch 008: train_acc=79.97 val_acc=79.10 test_acc=79.67
Epoch 009: train_acc=81.84 val_acc=80.28 test_acc=80.76
Epoch 010: train_acc=83.19 val_acc=80.84 test_acc=82.51
Epoch 011: train_acc=84.69 val_acc=81.90 test_acc=82.18
Epoch 012: train_acc=85.47 val_acc=79.36 test_acc=80.18
Epoch 013: train_acc=86.62 val_acc=83.14 test_acc=84.05
Epoch 014: train_acc=87.58 val_acc=85.66 test_acc=85.90
Epoch 015: train_acc=88.54 val_acc=86.20 test_acc=85.66
Epoch 016: train_acc=89.34 val_acc=86.68 test_acc=85.72
Epoch 017: train_acc=90.11 val_acc=88.16 test_acc=87.23
Epoch 018: train_acc=90.40 val_acc=87.68 test_ac

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
global_step,▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
test/acc,▁▁▄▅▄▆▆▆▇▇▇▇▇███████
test/loss,██▅▄▅▃▃▃▃▂▂▃▂▁▁▁▁▁▁▁
train/acc,▁▂▃▃▃▄▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█▇▇█▇███████████
train/epoch_acc,▁▃▄▅▆▆▆▆▇▇▇▇▇███████
train/epoch_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁
train/loss,██▇▇▇▇▆▅▅▅▅▅▅▅▄▃▂▃▃▂▂▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val/acc,▁▁▄▅▄▆▆▆▇▇▇▆▇███████
val/loss,██▅▄▅▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁

0,1
best_test_acc,0.8782
epoch,20
global_step,1760
test/acc,0.8782
test/loss,0.80828
train/acc,0.89912
train/epoch_acc,0.91118
train/epoch_loss,0.77884
train/loss,0.78834
val/acc,0.8802


[34m[1mwandb[0m: Agent Starting Run: 2m9frs43 with config:
[34m[1mwandb[0m: 	act: gelu
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	optimizer: adamax
[34m[1mwandb[0m: 	strong_aug: False


Epoch 001: train_acc=42.67 val_acc=55.26 test_acc=57.00
Epoch 002: train_acc=60.79 val_acc=64.58 test_acc=67.52
Epoch 003: train_acc=69.26 val_acc=69.58 test_acc=70.48
Epoch 004: train_acc=73.62 val_acc=71.98 test_acc=74.09
Epoch 005: train_acc=76.92 val_acc=76.34 test_acc=77.77
Epoch 006: train_acc=79.23 val_acc=78.74 test_acc=80.64
Epoch 007: train_acc=81.33 val_acc=77.22 test_acc=77.22
Epoch 008: train_acc=83.02 val_acc=78.68 test_acc=80.86
Epoch 009: train_acc=84.45 val_acc=81.94 test_acc=83.06
Epoch 010: train_acc=85.32 val_acc=81.64 test_acc=82.01
Epoch 011: train_acc=86.44 val_acc=78.46 test_acc=78.75
Epoch 012: train_acc=87.40 val_acc=80.76 test_acc=81.44
Epoch 013: train_acc=88.22 val_acc=82.48 test_acc=84.00
Epoch 014: train_acc=89.15 val_acc=84.12 test_acc=84.54
Epoch 015: train_acc=89.87 val_acc=86.80 test_acc=87.39
Epoch 016: train_acc=90.37 val_acc=86.44 test_acc=86.73
Epoch 017: train_acc=90.98 val_acc=86.42 test_acc=86.72
Epoch 018: train_acc=91.45 val_acc=86.48 test_ac

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
global_step,▁▁▁▁▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇████
test/acc,▁▃▄▄▅▆▅▆▆▆▅▆▇▇▇▇▇▇▇▇▇▇██████████████████
test/loss,█▆▅▅▄▃▄▃▃▃▄▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/acc,▁▂▄▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇██▇▇█▇█▇███████████
train/epoch_acc,▁▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
train/epoch_loss,█▆▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,█▅▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/acc,▁▃▄▄▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█▇█▇██████████████
val/loss,█▆▅▅▄▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_test_acc,0.9127
epoch,40
global_step,3520
test/acc,0.9115
test/loss,0.72993
train/acc,0.97149
train/epoch_acc,0.98129
train/epoch_loss,0.61155
train/loss,0.61846
val/acc,0.9126
