In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

tejada24m025_inaturalist_path = kagglehub.dataset_download('tejada24m025/inaturalist')


print('Data source import complete.')


In [None]:
import os
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import torch

def prepare_data(data_dir, batch_size, augment):
    """
    Prepare training, validation, and test data loaders.
    The original dataset is assumed to have a 'train' and 'test' folder.
    A validation set is created by a stratified 80/20 split of the train folder.

    Args:
        data_dir (str): Path to dataset folder.
        batch_size (int): Batch size.
        augment (bool): Whether to apply data augmentation.

    Returns:
        train_loader, val_loader, test_loader, num_classes, class_weights
    """
    # Enhanced augmentation for training
    train_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]) if augment else transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    # Validation uses a fixed transform without augmentation.
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    # Test transform same as validation.
    test_transform = val_transform

    # Load full training dataset
    full_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transform)
    num_classes = len(full_dataset.classes)
    targets = [s[1] for s in full_dataset.samples]

    # Stratified split (80% train, 20% val)
    train_idx, val_idx = train_test_split(list(range(len(full_dataset))), test_size=0.2, stratify=targets, random_state=42)
    train_dataset = Subset(full_dataset, train_idx)
    # For validation, use non-augmented transform:
    full_dataset_no_aug = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=val_transform)
    val_dataset = Subset(full_dataset_no_aug, val_idx)

    # Compute class weights for balanced training
    class_weights = compute_class_weight('balanced', classes=np.unique(targets), y=[targets[i] for i in train_idx])
    class_weights = torch.tensor(class_weights, dtype=torch.float)

    # Load test dataset (assumed available in 'test' folder)
    test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=test_transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    return train_loader, val_loader, test_loader, num_classes, class_weights

In [None]:
import torch
import torch.nn as nn

class CustomCNN(nn.Module):
    def __init__(self, num_filters, filter_size, activation, dense_neurons, batch_norm=False, dropout=0.0, num_classes=10, use_residual=False):
        """
        Custom CNN with 5 conv blocks, optional residual connections, and a dense classifier.

        Args:
            num_filters (list): List of 5 integers for the number of filters in each block.
            filter_size (int): Kernel size for each conv layer.
            activation (class): Activation function class (e.g. nn.ReLU).
            dense_neurons (int): Number of neurons in the dense layer.
            batch_norm (bool): Whether to use BatchNorm.
            dropout (float): Dropout rate.
            num_classes (int): Number of output classes.
            use_residual (bool): Whether to add residual (skip) connections in each block.
        """
        super(CustomCNN, self).__init__()
        self.use_residual = use_residual
        layers = []
        in_channels = 3  # RGB input

        for i in range(5):
            # Dynamic padding to maintain spatial dimensions
            padding = (filter_size - 1) // 2
            conv = nn.Conv2d(in_channels, num_filters[i], kernel_size=filter_size, padding=padding)
            bn = nn.BatchNorm2d(num_filters[i]) if batch_norm else nn.Identity()
            act = activation()
            # Save block in a sequential container but optionally add residual after activation if possible.
            # We will wrap this in a custom block that does: out = act(bn(conv(x))); if use_residual and in_channels == out_channels then out = out + x; then apply maxpool.
            block = ResidualBlock(conv, bn, act, use_residual)
            pool = nn.MaxPool2d(2, 2)
            layers.append(nn.Sequential(block, pool))
            in_channels = num_filters[i]

        self.features = nn.Sequential(*layers)
        # After 5 pooling layers, for 224 input, size becomes 224 / 32 = 7
        self.flatten_size = num_filters[-1] * 7 * 7
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flatten_size, dense_neurons),
            activation(),
            nn.Dropout(dropout) if dropout > 0 else nn.Identity(),
            nn.Linear(dense_neurons, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, conv, bn, activation, use_residual):
        super(ResidualBlock, self).__init__()
        self.conv = conv
        self.bn = bn
        self.activation = activation
        self.use_residual = use_residual

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.activation(out)
        if self.use_residual and x.shape == out.shape:
            out = out + x
        return out


In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
from torch.cuda.amp import autocast, GradScaler
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix


os.environ["WANDB_API_KEY"] = "e095fbd374bc0fa234acb179a6ec7620b57abf28"
def get_activation(name):
    return {
        'ReLU': nn.ReLU,
        'GELU': nn.GELU,
        'SiLU': nn.SiLU,
        'Mish': nn.Mish
    }[name]

def generate_filters(base_filters, organization):
    if organization == 'same':
        return [base_filters] * 5
    elif organization == 'double':
        return [base_filters * (2 ** i) for i in range(5)]
    elif organization == 'half':
        return [base_filters // (2 ** i) for i in range(5)][::-1]  # Reverse to start small
    return [base_filters] * 5
def train():
    # this will seed everything for us
    wandb.init()
    config = wandb.config

    train_loader, val_loader, test_loader, num_classes, class_weights = prepare_data(
        data_dir=config.data_dir,
        batch_size=config.batch_size,
        augment=config.data_augmentation
    )

    model = CustomCNN(
        num_filters      = generate_filters(config.num_filters, config.filter_organization),
        filter_size      = config.filter_size,
        activation       = get_activation(config.activation),
        dense_neurons    = config.dense_neurons,
        batch_norm       = config.batch_norm,
        dropout          = config.dropout,
        num_classes      = num_classes,
        use_residual     = config.use_residual
    ).to(wandb.config.device)

    criterion = nn.CrossEntropyLoss(weight=class_weights.to(config.device))
    optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5)
    scaler    = GradScaler()

    best_val_acc = 0.0s
    no_improve   = 0

    for epoch in range(config.epochs):
        # TRAIN
        model.train()
        train_loss = 0.0
        for x,y in train_loader:
            x,y = x.to(config.device), y.to(config.device)
            optimizer.zero_grad()
            with autocast():
                logits = model(x)
                loss   = criterion(logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            train_loss += loss.item()
        train_loss /= len(train_loader)

        # VALIDATE
        model.eval()
        val_loss = 0.0
        preds, targs = [], []
        with torch.no_grad():
            for x,y in val_loader:
                x,y = x.to(config.device), y.to(config.device)
                with autocast():
                    logits = model(x)
                    loss   = criterion(logits, y)
                val_loss += loss.item()
                preds .extend(logits.argmax(1).cpu().numpy())
                targs .extend(y.cpu().numpy())
        val_loss    /= len(val_loader)
        val_accuracy = 100 * np.mean(np.array(preds)==np.array(targs))

        # LOG EVERYTHING
        wandb.log({
            # losses & metrics
            "epoch":         epoch,
            "train_loss":    train_loss,
            "val_loss":      val_loss,
            "val_accuracy":  val_accuracy,
            # optimizer state
            "lr":            optimizer.param_groups[0]['lr'],
            # *** and all your hyperparameters again so they show up in the run table ***
            "hp/num_filters":        config.num_filters,
            "hp/filter_size":        config.filter_size,
            "hp/activation":         config.activation,
            "hp/filter_organization":config.filter_organization,
            "hp/data_augmentation":  config.data_augmentation,
            "hp/batch_norm":         config.batch_norm,
            "hp/dropout":            config.dropout,
            "hp/dense_neurons":      config.dense_neurons,
            "hp/batch_size":         config.batch_size,
            "hp/lr":                 config.lr,
            "hp/use_residual":       config.use_residual,
        })

        scheduler.step(val_accuracy)

        # early‑stop & checkpoint
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            no_improve   = 0
            torch.save(model.state_dict(), os.path.join(wandb.run.dir, "best.pth"))
        else:
            no_improve += 1
            if no_improve >= config.early_stop_patience:
                break


if __name__ == "__main__":
    sweep_config = {
      'method': 'bayes',
      'metric': { 'name': 'val_accuracy', 'goal': 'maximize' },
      'parameters': {
        'num_filters':        {'values': [32,64,128]},
        'filter_size':        {'values': [3,5]},
        'activation':         {'values': ['ReLU','GELU','SiLU','Mish']},
        'filter_organization':{'values': ['same','double','half']},
        'data_augmentation':  {'values': [True,False]},
        'batch_norm':         {'values': [True,False]},
        'dropout':            {'values': [0.0,0.2,0.3]},
        'dense_neurons':      {'values': [256,512]},
        'batch_size':         {'values': [32,64]},
        'lr':                 {'min': 1e-3, 'max': 1e-1},
        'epochs':             {'value': 20},
        'use_residual':       {'values':[False]},
        'early_stop_patience':{'value': 5},
        'data_dir':           {'value': '/kaggle/input/inaturalist/inaturalist_12K'},
        'device':             {'value': 'cuda' if torch.cuda.is_available() else 'cpu'}
      }
    }

    sweep_id = wandb.sweep(sweep_config, project="inaturalist_cnn_from_scratchj")
    wandb.agent(sweep_id, function=train, count = 25)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: tz0427hh
Sweep URL: https://wandb.ai/fgbb66579-iit-madras-foundation/inaturalist_cnn_from_scratchj/sweeps/tz0427hh


[34m[1mwandb[0m: Agent Starting Run: uqkhobg8 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.04234037859427504
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False
[34m[1mwandb[0m: Currently logged in as: [33mfgbb66579[0m ([33mfgbb66579-iit-madras-foundation[0m). Use [1m`wandb login --relogin`[0m to force relogin


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,███████████████▁▁▁▁▁
train_loss,█▇▆▆▆▅▅▅▄▄▄▄▄▃▃▂▂▁▁▁
val_accuracy,▁▃▅▅▆▆▇▆▇▇▇▆▇▇▇█████

0,1
epoch,19
hp/activation,GELU
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,256
hp/dropout,0
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.04234


[34m[1mwandb[0m: Agent Starting Run: 6mpco1ol with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.09949458598329218
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████▃▃▃▃▃▃▃▃▁▁▁▁
train_loss,█▇▇▇▇▇▇▇▄▄▄▃▃▃▃▃▂▂▂▁
val_accuracy,▁▃▄▄▃▃▃▃▅▆▅▇▆▆▇▅▇▇█▇

0,1
epoch,19
hp/activation,Mish
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.3
hp/filter_organization,half
hp/filter_size,3
hp/lr,0.09949


[34m[1mwandb[0m: Agent Starting Run: z66xdjm6 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.09121751410884428
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▃▅▆▇█
hp/batch_size,▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁
lr,██████▁
train_loss,█▁▂▁▁▁▁
val_accuracy,▁██████

0,1
epoch,6
hp/activation,ReLU
hp/batch_norm,True
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.09122


[34m[1mwandb[0m: Agent Starting Run: 7jf34xil with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.007506549152565881
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████▁
train_loss,█▇▇▇▇▇▆▆▅▄▃▂▁
val_accuracy,▁▄▆▇▇▇▇██▇▆▅▇

0,1
epoch,12
hp/activation,Mish
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,double
hp/filter_size,5
hp/lr,0.00751


[34m[1mwandb[0m: Agent Starting Run: bdmqa529 with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.05672087444122528
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████▁
train_loss,████▇▆▆▅▅▅▄▄▁
val_accuracy,▁▁▁▃▆▆▇██▇▇▆█

0,1
epoch,12
hp/activation,SiLU
hp/batch_norm,False
hp/batch_size,64
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0
hp/filter_organization,half
hp/filter_size,5
hp/lr,0.05672


[34m[1mwandb[0m: Agent Starting Run: 178e5mu1 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.06423022012320995
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▂▁▃▄▄▄▄▆▆▆▆▇▇▇▇▇█▇█▇

0,1
epoch,19
hp/activation,ReLU
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,False
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,double
hp/filter_size,5
hp/lr,0.06423


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jbawg5qy with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.08967481847601547
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▇▇▆▆▆▆▆▅▅▅▄▄▃▃▂▂▁
val_accuracy,▁▃▃▃▅▆▆▄▆▆▇▆▅▇▇▇█▇▇█

0,1
epoch,19
hp/activation,SiLU
hp/batch_norm,True
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.08967


[34m[1mwandb[0m: Agent Starting Run: iununyoa with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.07004630622194292
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▃▃▄▅▆▆▇█
hp/batch_size,▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁
lr,█████████▁
train_loss,█▇▅▄▃▂▃▃▃▁
val_accuracy,▃▁▆▆█▇▇▄▅▇

0,1
epoch,9
hp/activation,ReLU
hp/batch_norm,False
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,half
hp/filter_size,3
hp/lr,0.07005


[34m[1mwandb[0m: Agent Starting Run: ba3mcsgv with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.008915665271506452
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▂▃▃▄▄▅▅▆▆▇▇█
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,█████████████▁
train_loss,██▇▇▇▆▆▆▅▅▅▄▃▁
val_accuracy,▁▃▄▅▆▇▇████▇▆▇

0,1
epoch,13
hp/activation,ReLU
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,512
hp/dropout,0.3
hp/filter_organization,half
hp/filter_size,3
hp/lr,0.00892


[34m[1mwandb[0m: Agent Starting Run: 6h9mrzr1 with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.07101342042330809
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
val_accuracy,▁▁▁▁▁▁

0,1
epoch,5
hp/activation,Mish
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,double
hp/filter_size,3
hp/lr,0.07101


[34m[1mwandb[0m: Agent Starting Run: 7rijx8cj with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.05120537583761722
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
train_loss,▁
val_accuracy,█▁▁▁▁▁

0,1
epoch,5
hp/activation,SiLU
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0
hp/filter_organization,double
hp/filter_size,5
hp/lr,0.05121


[34m[1mwandb[0m: Agent Starting Run: uw42e0rt with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.0013445675100836891
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▃▅▆▇█
hp/batch_size,▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁
lr,██████▁
train_loss,█▇▆▅▄▃▁
val_accuracy,▁█▁██▁▁

0,1
epoch,6
hp/activation,GELU
hp/batch_norm,False
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,half
hp/filter_size,5
hp/lr,0.00134


[34m[1mwandb[0m: Agent Starting Run: g8xr6789 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.09631725728918218
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
val_accuracy,▁▁▁▁▁▁

0,1
epoch,5
hp/activation,GELU
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,half
hp/filter_size,5
hp/lr,0.09632


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jenz31lf with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.007154994697524877
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,█████████████████▁▁▁
train_loss,██▇▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁▁
val_accuracy,▁▁▃▄▆▆▆▆▇▇▇▇█▆▆▆▇███

0,1
epoch,19
hp/activation,GELU
hp/batch_norm,True
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.2
hp/filter_organization,same
hp/filter_size,5
hp/lr,0.00715


[34m[1mwandb[0m: Agent Starting Run: fi1a09l7 with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.09971920587001512
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
val_accuracy,▁▁▁▁▁▁

0,1
epoch,5
hp/activation,Mish
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,256
hp/dropout,0.2
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.09972


[34m[1mwandb[0m: Agent Starting Run: udiz7mck with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.0680958740344733
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
train_loss,▁
val_accuracy,█▁▁▁▁▁

0,1
epoch,5
hp/activation,SiLU
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,same
hp/filter_size,5
hp/lr,0.0681


[34m[1mwandb[0m: Agent Starting Run: er3bp6a5 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.07315101748496584
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,█████████▃▃▃▃▃▃▁
train_loss,██▇▇▇▇▇▇▇▇▆▅▄▄▃▁
val_accuracy,▁▅▅▅▇▆▆▆▄▇█▇▇▇▇█

0,1
epoch,15
hp/activation,GELU
hp/batch_norm,False
hp/batch_size,64
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.07315


[34m[1mwandb[0m: Agent Starting Run: 2ta2eqe8 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.026991514377150905
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▆▅▅▅▅▅▄▄▃▃▂▂▂▁▁▁
val_accuracy,▁▃▃▅▅▅▅▆▆▆▆▆▇▇█▇▇███

0,1
epoch,19
hp/activation,GELU
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.02699


[34m[1mwandb[0m: Agent Starting Run: b69l3gzr with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.08198551634780565
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,███████████████████▁
train_loss,█▇▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▂▁
val_accuracy,▁▂▃▂▄▄▅▆▆▅▇▇█▇█▇▇███

0,1
epoch,19
hp/activation,Mish
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,half
hp/filter_size,3
hp/lr,0.08199


[34m[1mwandb[0m: Agent Starting Run: ffckw0on with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.0230426988576886
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▇▆▆▆▅▄▄▄▃▃▃▂▂▂▂▁▁▁
val_accuracy,▁▂▃▃▃▄▄▄▅▄▆▆▇▆▇██▇▇█

0,1
epoch,19
hp/activation,ReLU
hp/batch_norm,True
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0
hp/filter_organization,half
hp/filter_size,5
hp/lr,0.02304


[34m[1mwandb[0m: Agent Starting Run: jpblp50w with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.04177919714387469
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▂▁▁▄▁▆▆▄▅▆▆▆▇▇▆█▇▇█

0,1
epoch,19
hp/activation,GELU
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.2
hp/filter_organization,double
hp/filter_size,3
hp/lr,0.04178


[34m[1mwandb[0m: Agent Starting Run: t9pbew1s with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.09146477552971416
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████████▁▁▁▁
train_loss,█▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▃▂▂▁
val_accuracy,▁▄▄▄▄▅▅▆▆▆▇█▇▇▇▇████

0,1
epoch,19
hp/activation,GELU
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.3
hp/filter_organization,same
hp/filter_size,5
hp/lr,0.09146


[34m[1mwandb[0m: Agent Starting Run: kojs61sg with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.044899444720785726
[34m[1mwandb[0m: 	num_filters: 128
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▁▁
val_accuracy,▁▂▂▃▄▄▅▄▆▆▆▇▆▇▇▇▆███

0,1
epoch,19
hp/activation,Mish
hp/batch_norm,True
hp/batch_size,64
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0.3
hp/filter_organization,same
hp/filter_size,5
hp/lr,0.0449


[34m[1mwandb[0m: Agent Starting Run: 1fogdur3 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	lr: 0.04378198350343739
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▂▄▅▇█
hp/batch_size,▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁
lr,█████▁
train_loss,▅▂█▆▆▁
val_accuracy,█▁▁▁▁▁

0,1
epoch,5
hp/activation,ReLU
hp/batch_norm,True
hp/batch_size,32
hp/data_augmentation,False
hp/dense_neurons,256
hp/dropout,0.2
hp/filter_organization,half
hp/filter_size,5
hp/lr,0.04378


[34m[1mwandb[0m: Agent Starting Run: uqmu15la with config:
[34m[1mwandb[0m: 	activation: Mish
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	data_dir: /kaggle/input/inaturalist/inaturalist_12K
[34m[1mwandb[0m: 	dense_neurons: 512
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	lr: 0.014141113502358017
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	use_residual: False


  scaler    = GradScaler()
  with autocast():
  with autocast():


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
hp/batch_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dense_neurons,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/dropout,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/filter_size,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
hp/num_filters,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▇▆▆▅▅▄▄▄▄▃▃▃▃▂▂▂▁▁▁
val_accuracy,▁▂▄▅▅▆▆▆▇▇▇▇▇▇▇▇█▇█▇

0,1
epoch,19
hp/activation,Mish
hp/batch_norm,False
hp/batch_size,32
hp/data_augmentation,True
hp/dense_neurons,512
hp/dropout,0
hp/filter_organization,same
hp/filter_size,3
hp/lr,0.01414
