In [16]:
import torch
import torch.nn as nn
import numpy as np

class CNN(nn.Module):
    def __init__(
        self,
        input_dimension: tuple,
        number_of_filters: int,
        filter_size: tuple,
        stride: int,
        padding: int,
        max_pooling_size: tuple,
        n_neurons: int,
        n_classes: int,
        conv_activation: nn.Module,
        dense_activation: nn.Module,
        dropout_rate: float,
        use_batchnorm: bool,
        factor: float,
        dropout_organisation: int,
    ):
        super(CNN, self).__init__()

        self.conv_blocks = nn.ModuleList()
        in_channels = input_dimension[0]

        for i in range(5):
            out_channels = int((factor ** i) * number_of_filters)
            out_channels = max(out_channels, 3)

            add_dropout = (i % dropout_organisation) > 0

            conv_block = self.create_conv_block(
                in_channels,
                out_channels,
                filter_size,
                max_pooling_size,
                stride,
                padding,
                conv_activation,
                dropout_rate,
                use_batchnorm,
                add_dropout,
            )
            self.conv_blocks.append(conv_block)
            in_channels = out_channels

        self.flatten = nn.Flatten()

        # Compute the size after conv layers
        dummy_input = torch.ones(1, *input_dimension)
        with torch.no_grad():
            x = dummy_input
            for block in self.conv_blocks:
                x = block(x)
        in_features = x.view(1, -1).shape[1]

        # Define dense layers
        self.dense_block1 = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=n_neurons),
            dense_activation,
            nn.Linear(n_neurons, n_classes),
            nn.LogSoftmax(dim=1),
        )

    def create_conv_block(
        self,
        in_c,
        out_c,
        kernel_size,
        max_pooling_size,
        stride,
        padding,
        conv_activation,
        dropout_rate,
        use_batchnorm,
        add_dropout,
    ):
        layers = [
            nn.Conv2d(in_c, out_c, kernel_size=kernel_size, stride=stride, padding=padding),
            conv_activation
        ]
        if use_batchnorm:
            layers.append(nn.BatchNorm2d(out_c))
        layers.append(nn.MaxPool2d(kernel_size=max_pooling_size))
        if add_dropout:
            layers.append(nn.Dropout(p=dropout_rate))
        return nn.Sequential(*layers)

    def forward(self, x):
        for block in self.conv_blocks:
            x = block(x)
        x = self.flatten(x)
        return self.dense_block1(x)


In [17]:
import gc
import torch
import numpy as np
from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder

import wandb
from torch.cuda.amp import GradScaler, autocast

In [18]:
# Device setup
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
# Dataset path
training_data_path = "/kaggle/input/nature-12k/inaturalist_12K/train"

In [20]:
torch.backends.cudnn.benchmark = True

In [31]:
# Fixed hyperparameters
config = {
    'number_of_filters': 64,
    'filter_size': 3,
    'stride': 1,
    'padding': 1,
    'max_pooling_size': 2,
    'n_neurons': 256,
    'n_classes': 10,
    'conv_activation': 'relu',
    'dense_activation': 'relu',
    'dropout_rate': 0.3,
    'use_batchnorm': True,
    'factor': 2,
    'learning_rate': 1e-3,
    'batch_size': 16,
    'epochs': 10,
    'use_augmentation': True,
    'dropout_organisation': 3
}

In [22]:
def get_transform(use_augmentation):
    if use_augmentation:
        return transforms.Compose([
            transforms.RandomCrop(50, padding=1),
            transforms.RandomGrayscale(p=0.1),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(degrees=(0, 20)),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            )
        ])
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
    ])

In [30]:
sweep_config = {
    'method': 'bayes',
    'name':'PART_A_Q2_SWEEP_1',
    'metric': {'name': "val_accuracy", 'goal': 'maximize'},
    'parameters': {
        'number_of_filters': {'values': [16, 32, 64, 128, 256]},
        'filter_size': {'value': 3},
        'stride': {'value': 1},
        'padding': {'value': 1},
        'max_pooling_size': {'value': 2},
        'n_neurons': {'values': [64, 128, 256, 512, 1024]},
        'n_classes': {'value': 10},
        'conv_activation': {'values': ['relu', 'gelu', 'silu', 'mish', 'relu6', 'tanh', 'sigmoid']},
        'dense_activation': {'values': ['relu', 'gelu', 'silu', 'mish', 'relu6', 'tanh', 'sigmoid']},
        'dropout_rate': {'values': [0.2, 0.3, 0.4, 0.5]},
        'use_batchnorm': {'values': [True, False]},
        'factor': {'values': [1, 2, 3, 0.5]},
        'learning_rate': {'values': [1e-2, 1e-3, 1e-4, 1e-5]},
        'batch_size': {'value': 16},
        'epochs': {'values': [5, 10, 15]},
        'use_augmentation': {'values': [True, False]},
        'dropout_organisation': {'values': [1, 2, 3, 4, 5]},
    },
}

In [24]:
def train_model():
    # Dataset
    dataset = ImageFolder(root=training_data_path, transform=get_transform(config['use_augmentation']))
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_set, val_set = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_set, batch_size=config['batch_size'], shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_set, batch_size=config['batch_size'], shuffle=False, num_workers=4, pin_memory=True)

    # Activation options
    activations = {
        'relu': nn.ReLU(),
        'gelu': nn.GELU(),
        'silu': nn.SiLU(),
        'mish': nn.Mish(),
        'relu6': nn.ReLU6(),
        'tanh': nn.Tanh(),
        'sigmoid': nn.Sigmoid(),
    }

    # Model
    gc.collect()
    torch.cuda.empty_cache()
    model = CNN(
        input_dimension=(3, 224, 224),
        number_of_filters=config['number_of_filters'],
        filter_size=(config['filter_size'], config['filter_size']),
        stride=config['stride'],
        padding=config['padding'],
        max_pooling_size=(config['max_pooling_size'], config['max_pooling_size']),
        n_neurons=config['n_neurons'],
        n_classes=config['n_classes'],
        conv_activation=activations[config['conv_activation']],
        dense_activation=activations[config['dense_activation']],
        dropout_rate=config['dropout_rate'],
        use_batchnorm=config['use_batchnorm'],
        factor=config['factor'],
        dropout_organisation=config['dropout_organisation'],
    ).to(device)

    # Optimizer and loss
    optimizer = Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    # Training
    for epoch in range(config["epochs"]):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for x, y in train_loader:
            x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
            optimizer.zero_grad()
            
            # Mixed precision training
            with autocast():
                pred = model(x)
                loss = criterion(pred, y)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item() * x.size(0)
            correct += (pred.argmax(1) == y).sum().item()
            total += y.size(0)
            del x, y

        train_accuracy = 100 * correct / total
        avg_train_loss = train_loss / total

        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
                pred = model(x)
                loss = criterion(pred, y)
                val_loss += loss.item() * x.size(0)
                correct += (pred.argmax(1) == y).sum().item()
                total += y.size(0)
                del x, y

        val_accuracy = 100 * correct / total
        avg_val_loss = val_loss / total

        print(f"Epoch [{epoch+1}/{config['epochs']}]")
        print(f"Train Loss: {avg_train_loss:.4f} | Train Accuracy: {train_accuracy:.2f}%")
        print(f"Val Loss: {avg_val_loss:.4f} | Val Accuracy: {val_accuracy:.2f}%")
        print("-" * 50)

    # Save the trained model
    save_path = "/kaggle/input/nature-12k/inaturalist_12K"
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")
    
    return model

In [33]:
def wandb_sweep():
    with wandb.init() as run:
        sweep_config = dict(wandb.config)
        dataset = ImageFolder(root=training_data_path, transform=get_transform(sweep_config['use_augmentation']))
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_set, val_set = random_split(dataset, [train_size, val_size])
        
        # Optimized DataLoaders
        train_loader = DataLoader(
            train_set, 
            batch_size=sweep_config['batch_size'], 
            shuffle=True,
            num_workers=4,
            pin_memory=True
        )
        val_loader = DataLoader(
            val_set, 
            batch_size=sweep_config['batch_size'], 
            shuffle=False,
            num_workers=4,
            pin_memory=True
        )

        activations = {
            'relu': nn.ReLU(),
            'gelu': nn.GELU(),
            'silu': nn.SiLU(),
            'mish': nn.Mish(),
            'relu6': nn.ReLU6(),
            'tanh': nn.Tanh(),
            'sigmoid': nn.Sigmoid(),
        }
        gc.collect()
        torch.cuda.empty_cache()
        model = CNN(
            input_dimension=(3, 224, 224),
            number_of_filters=sweep_config['number_of_filters'],
            filter_size=(sweep_config['filter_size'], sweep_config['filter_size']),
            stride=sweep_config['stride'],
            padding=sweep_config['padding'],
            max_pooling_size=(sweep_config['max_pooling_size'], sweep_config['max_pooling_size']),
            n_neurons=sweep_config['n_neurons'],
            n_classes=sweep_config['n_classes'],
            conv_activation=activations[sweep_config['conv_activation']],
            dense_activation=activations[sweep_config['dense_activation']],
            dropout_rate=sweep_config['dropout_rate'],
            use_batchnorm=sweep_config['use_batchnorm'],
            factor=sweep_config['factor'],
            dropout_organisation=sweep_config['dropout_organisation'],
        ).to(device)
        optimizer = Adam(model.parameters(), lr=sweep_config['learning_rate'])
        criterion = nn.CrossEntropyLoss()
        scaler = GradScaler()
        best_val_accuracy = 0.0
        for epoch in range(sweep_config["epochs"]):
            model.train()
            train_loss = 0
            correct = 0
            total = 0
            for x, y in train_loader:
                x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
                optimizer.zero_grad()
                
                # Mixed precision training
                with autocast():
                    pred = model(x)
                    loss = criterion(pred, y)
                
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                train_loss += loss.item() * x.size(0)
                correct += (pred.argmax(1) == y).sum().item()
                total += y.size(0)
                del x, y
            train_accuracy = 100 * correct / total
            avg_train_loss = train_loss / total
            model.eval()
            val_loss = 0
            correct = 0
            total = 0
            with torch.no_grad():
                for x, y in val_loader:
                    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
                    pred = model(x)
                    loss = criterion(pred, y)
                    val_loss += loss.item() * x.size(0)
                    correct += (pred.argmax(1) == y).sum().item()
                    total += y.size(0)
                    del x, y
            val_accuracy = 100 * correct / total
            avg_val_loss = val_loss / total
            wandb.log({
                'epoch': epoch+1,
                'train_loss': avg_train_loss,
                'train_accuracy': train_accuracy,
                'val_loss': avg_val_loss,
                'val_accuracy': val_accuracy
            })
            print(f"Epoch [{epoch+1}/{sweep_config['epochs']}]\nTrain Loss: {avg_train_loss:.4f} | Train Accuracy: {train_accuracy:.2f}%\nVal Loss: {avg_val_loss:.4f} | Val Accuracy: {val_accuracy:.2f}%\n" + "-" * 50)
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                torch.save(model.state_dict(), "best_model_sweep.pth")
        print(f"Best model saved to best_model.pth with val_accuracy={best_val_accuracy:.2f}%")


In [34]:
if __name__ == "__main__":
    wandb.login(key='f15dba29e56f32e9c31d598bce5bc7a3c76de62e')
    sweep_id = wandb.sweep(sweep_config, project="DA6401_Assignment2")
    wandb.agent(sweep_id, function=wandb_sweep, count=20)  
    wandb.finish()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: 01tppmv2
Sweep URL: https://wandb.ai/ma23c047-indian-institute-of-technology-madras/DA6401_Assignment2/sweeps/01tppmv2


[34m[1mwandb[0m: Agent Starting Run: h5f54wn1 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: tanh
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dropout_organisation: 1
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 1024
[34m[1mwandb[0m: 	number_of_filters: 256
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: True


  scaler = GradScaler()
  with autocast():


Epoch [1/5]
Train Loss: 2.9309 | Train Accuracy: 10.70%
Val Loss: 2.3053 | Val Accuracy: 9.95%
--------------------------------------------------
Epoch [2/5]
Train Loss: 2.3241 | Train Accuracy: 10.00%
Val Loss: 2.3059 | Val Accuracy: 9.50%
--------------------------------------------------
Epoch [3/5]
Train Loss: 2.3102 | Train Accuracy: 10.29%
Val Loss: 2.3055 | Val Accuracy: 9.95%
--------------------------------------------------
Epoch [4/5]
Train Loss: 2.3133 | Train Accuracy: 10.43%
Val Loss: 2.3054 | Val Accuracy: 9.05%
--------------------------------------------------
Epoch [5/5]
Train Loss: 2.3086 | Train Accuracy: 10.05%
Val Loss: 2.3020 | Val Accuracy: 10.65%
--------------------------------------------------
Best model saved to best_model.pth with val_accuracy=10.65%


0,1
epoch,▁▃▅▆█
train_accuracy,█▁▄▅▁
train_loss,█▁▁▁▁
val_accuracy,▅▃▅▁█
val_loss,▇█▇▇▁

0,1
epoch,5.0
train_accuracy,10.05126
train_loss,2.30856
val_accuracy,10.65
val_loss,2.30201


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uicznblc with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	dense_activation: sigmoid
[34m[1mwandb[0m: 	dropout_organisation: 2
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 3
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 64
[34m[1mwandb[0m: 	number_of_filters: 64
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: True


Epoch [1/15]
Train Loss: 2.0946 | Train Accuracy: 27.22%
Val Loss: 2.1138 | Val Accuracy: 25.30%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.0024 | Train Accuracy: 32.93%
Val Loss: 2.1167 | Val Accuracy: 24.45%
--------------------------------------------------
Epoch [3/15]
Train Loss: 1.9495 | Train Accuracy: 35.78%
Val Loss: 2.1171 | Val Accuracy: 23.85%
--------------------------------------------------
Epoch [4/15]
Train Loss: 1.8986 | Train Accuracy: 38.37%
Val Loss: 2.0334 | Val Accuracy: 29.55%
--------------------------------------------------
Epoch [5/15]
Train Loss: 1.8408 | Train Accuracy: 42.26%
Val Loss: 2.0446 | Val Accuracy: 29.35%
--------------------------------------------------
Epoch [6/15]
Train Loss: 1.7701 | Train Accuracy: 46.38%
Val Loss: 1.9916 | Val Accuracy: 30.15%
--------------------------------------------------
Epoch [7/15]
Train Loss: 1.6934 | Train Accuracy: 50.57%
Val Loss: 1.9715 | Val Accuracy: 32.95%
---------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▂▂▂▂▃▃▄▅▆▇▇███
train_loss,██▇▇▇▆▆▆▅▄▃▃▂▁▁
val_accuracy,▂▁▁▃▃▄▅▅▆▆▇▆▇██
val_loss,███▆▆▅▄▄▂▂▁▂▁▁▁

0,1
epoch,15.0
train_accuracy,98.89986
train_loss,0.67842
val_accuracy,39.95
val_loss,1.86076


[34m[1mwandb[0m: Agent Starting Run: tq2ija08 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: relu
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dropout_organisation: 2
[34m[1mwandb[0m: 	dropout_rate: 0.5
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 1
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 512
[34m[1mwandb[0m: 	number_of_filters: 16
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: True


Epoch [1/15]
Train Loss: 2.6043 | Train Accuracy: 9.76%
Val Loss: 2.3023 | Val Accuracy: 9.60%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.3074 | Train Accuracy: 9.63%
Val Loss: 2.2999 | Val Accuracy: 10.15%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.3001 | Train Accuracy: 11.66%
Val Loss: 2.2994 | Val Accuracy: 12.50%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.2978 | Train Accuracy: 11.99%
Val Loss: 2.2831 | Val Accuracy: 13.05%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.2979 | Train Accuracy: 11.86%
Val Loss: 2.2915 | Val Accuracy: 11.70%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.2952 | Train Accuracy: 12.54%
Val Loss: 2.2825 | Val Accuracy: 14.35%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.2980 | Train Accuracy: 12.49%
Val Loss: 2.2922 | Val Accuracy: 12.25%
------------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▁▆▆▆▇▇▅▆▆▆▃▆█▇
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▂▆▆▄█▅▁▅▅▆▄▄▆▁
val_loss,▅▄▄▁▃▁▃▅▁▄▃▃▂█▅

0,1
epoch,15.0
train_accuracy,12.17652
train_loss,2.30514
val_accuracy,9.1
val_loss,2.30464


[34m[1mwandb[0m: Agent Starting Run: 2s3r5ivi with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: sigmoid
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dropout_organisation: 5
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 2
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 128
[34m[1mwandb[0m: 	number_of_filters: 64
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: True


Epoch [1/15]
Train Loss: 12.5653 | Train Accuracy: 10.14%
Val Loss: 2.3053 | Val Accuracy: 10.25%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.3141 | Train Accuracy: 10.09%
Val Loss: 2.3065 | Val Accuracy: 9.20%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.3088 | Train Accuracy: 9.76%
Val Loss: 2.3059 | Val Accuracy: 9.90%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.3046 | Train Accuracy: 9.33%
Val Loss: 2.3045 | Val Accuracy: 9.90%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.3046 | Train Accuracy: 9.96%
Val Loss: 2.3046 | Val Accuracy: 9.20%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.3041 | Train Accuracy: 10.24%
Val Loss: 2.3050 | Val Accuracy: 9.20%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.3051 | Train Accuracy: 9.74%
Val Loss: 2.3091 | Val Accuracy: 9.30%
------------------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▇▇▄▁▆█▄▇▁▆▄▃▂▆▄
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▅▂▄▄▂▂▂▄▁█▁▂▂▄▁
val_loss,▃▅▄▃▃▃█▅▅▂▄▂▂▁▄

0,1
epoch,15.0
train_accuracy,9.77622
train_loss,2.30492
val_accuracy,9.05
val_loss,2.30584


[34m[1mwandb[0m: Agent Starting Run: ji9f0915 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: sigmoid
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dropout_organisation: 3
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 2
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 64
[34m[1mwandb[0m: 	number_of_filters: 128
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: False


Epoch [1/15]
Train Loss: 12.7028 | Train Accuracy: 9.63%
Val Loss: 2.3040 | Val Accuracy: 9.65%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.3048 | Train Accuracy: 9.78%
Val Loss: 2.3055 | Val Accuracy: 9.65%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.3048 | Train Accuracy: 9.63%
Val Loss: 2.3054 | Val Accuracy: 9.25%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.3050 | Train Accuracy: 9.63%
Val Loss: 2.3035 | Val Accuracy: 9.65%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.3049 | Train Accuracy: 9.96%
Val Loss: 2.3028 | Val Accuracy: 10.35%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.3047 | Train Accuracy: 9.74%
Val Loss: 2.3068 | Val Accuracy: 9.20%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.3052 | Train Accuracy: 9.75%
Val Loss: 2.3059 | Val Accuracy: 9.25%
---------------------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▄▅▄▄▆▅▅▅▅▃▇█▁█▆
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▂▁▂▅▁▁▁▃▁█▂▁▅▂
val_loss,▃▄▄▂▁▆▅▆▁▄▁▄█▄▅

0,1
epoch,15.0
train_accuracy,9.85123
train_loss,2.30509
val_accuracy,9.5
val_loss,2.30604


[34m[1mwandb[0m: Agent Starting Run: tmaox4jo with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	dense_activation: tanh
[34m[1mwandb[0m: 	dropout_organisation: 2
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 3
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 256
[34m[1mwandb[0m: 	number_of_filters: 128
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: False


Epoch [1/15]
Train Loss: 2.2604 | Train Accuracy: 15.44%
Val Loss: 2.2213 | Val Accuracy: 18.30%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.2362 | Train Accuracy: 17.51%
Val Loss: 2.2272 | Val Accuracy: 17.15%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.2144 | Train Accuracy: 18.21%
Val Loss: 2.2111 | Val Accuracy: 18.80%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.2045 | Train Accuracy: 18.74%
Val Loss: 2.1948 | Val Accuracy: 18.50%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.2003 | Train Accuracy: 19.30%
Val Loss: 2.2033 | Val Accuracy: 19.60%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.1917 | Train Accuracy: 19.58%
Val Loss: 2.1810 | Val Accuracy: 21.15%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.1947 | Train Accuracy: 19.51%
Val Loss: 2.1697 | Val Accuracy: 20.80%
---------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▃▄▅▅▅▅▇▆▆▇▇▇▇█
train_loss,█▆▅▄▄▃▄▃▃▃▂▂▂▂▁
val_accuracy,▃▁▃▃▄▆▆▄▆▅▆▆▇▇█
val_loss,▇█▆▅▆▄▂▃▄▄▃▃▁▁▁

0,1
epoch,15.0
train_accuracy,21.95274
train_loss,2.15562
val_accuracy,22.4
val_loss,2.15428


[34m[1mwandb[0m: Agent Starting Run: gsx6fzp8 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	dense_activation: sigmoid
[34m[1mwandb[0m: 	dropout_organisation: 2
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 128
[34m[1mwandb[0m: 	number_of_filters: 32
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: False


Epoch [1/15]
Train Loss: 2.3104 | Train Accuracy: 10.13%
Val Loss: 2.3064 | Val Accuracy: 9.50%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.3056 | Train Accuracy: 10.13%
Val Loss: 2.3042 | Val Accuracy: 9.50%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.3036 | Train Accuracy: 10.28%
Val Loss: 2.3034 | Val Accuracy: 9.50%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.3026 | Train Accuracy: 10.21%
Val Loss: 2.3029 | Val Accuracy: 9.90%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.3016 | Train Accuracy: 10.55%
Val Loss: 2.3033 | Val Accuracy: 9.30%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.3009 | Train Accuracy: 10.83%
Val Loss: 2.3029 | Val Accuracy: 10.20%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.2990 | Train Accuracy: 11.29%
Val Loss: 2.3018 | Val Accuracy: 10.50%
--------------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▁▁▁▂▃▅▃▄▅▆▅▇██
train_loss,█▆▅▅▄▄▃▃▃▂▂▃▂▁▂
val_accuracy,▂▂▂▃▁▄▅▆█▇▇▄▇██
val_loss,█▆▅▅▅▅▄▄▃▃▂▅▄▁▁

0,1
epoch,15.0
train_accuracy,12.31404
train_loss,2.2958
val_accuracy,11.15
val_loss,2.29847


[34m[1mwandb[0m: Agent Starting Run: zdlnthc2 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: tanh
[34m[1mwandb[0m: 	dense_activation: sigmoid
[34m[1mwandb[0m: 	dropout_organisation: 3
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 2
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 64
[34m[1mwandb[0m: 	number_of_filters: 128
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: False


Epoch [1/15]
Train Loss: 2.1752 | Train Accuracy: 21.67%
Val Loss: 2.1069 | Val Accuracy: 26.90%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.0681 | Train Accuracy: 28.25%
Val Loss: 2.0766 | Val Accuracy: 26.95%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.0316 | Train Accuracy: 30.07%
Val Loss: 2.0560 | Val Accuracy: 28.00%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.0040 | Train Accuracy: 31.28%
Val Loss: 2.0282 | Val Accuracy: 30.00%
--------------------------------------------------
Epoch [5/15]
Train Loss: 1.9819 | Train Accuracy: 32.63%
Val Loss: 2.0338 | Val Accuracy: 29.00%
--------------------------------------------------
Epoch [6/15]
Train Loss: 1.9604 | Train Accuracy: 33.82%
Val Loss: 2.0162 | Val Accuracy: 29.65%
--------------------------------------------------
Epoch [7/15]
Train Loss: 1.9415 | Train Accuracy: 34.95%
Val Loss: 1.9978 | Val Accuracy: 30.45%
---------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▃▄▄▄▅▅▅▆▆▇▇▇██
train_loss,█▆▆▅▅▄▄▄▃▃▃▂▂▁▁
val_accuracy,▁▁▂▄▃▄▅▅▆▇▇█▇██
val_loss,█▇▆▅▅▄▄▄▃▃▂▂▂▁▁

0,1
epoch,15.0
train_accuracy,44.98062
train_loss,1.76707
val_accuracy,33.15
val_loss,1.93446


[34m[1mwandb[0m: Agent Starting Run: k7rgznkh with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	dense_activation: tanh
[34m[1mwandb[0m: 	dropout_organisation: 4
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 0.5
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 64
[34m[1mwandb[0m: 	number_of_filters: 256
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: True


Epoch [1/15]
Train Loss: 2.2789 | Train Accuracy: 14.68%
Val Loss: 2.2353 | Val Accuracy: 18.70%
--------------------------------------------------
Epoch [2/15]
Train Loss: 2.1980 | Train Accuracy: 19.89%
Val Loss: 2.1849 | Val Accuracy: 22.30%
--------------------------------------------------
Epoch [3/15]
Train Loss: 2.1390 | Train Accuracy: 23.15%
Val Loss: 2.1414 | Val Accuracy: 23.35%
--------------------------------------------------
Epoch [4/15]
Train Loss: 2.1012 | Train Accuracy: 24.93%
Val Loss: 2.1073 | Val Accuracy: 25.15%
--------------------------------------------------
Epoch [5/15]
Train Loss: 2.0718 | Train Accuracy: 26.62%
Val Loss: 2.0980 | Val Accuracy: 25.25%
--------------------------------------------------
Epoch [6/15]
Train Loss: 2.0437 | Train Accuracy: 27.79%
Val Loss: 2.0670 | Val Accuracy: 26.80%
--------------------------------------------------
Epoch [7/15]
Train Loss: 2.0243 | Train Accuracy: 28.12%
Val Loss: 2.0583 | Val Accuracy: 26.05%
---------------

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_accuracy,▁▃▄▅▆▆▆▇▇▇▇█▇██
train_loss,█▆▅▅▄▄▃▃▂▂▂▂▂▁▁
val_accuracy,▁▄▅▆▆█▇███▇▇▇▇▄
val_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▂

0,1
epoch,15.0
train_accuracy,33.06663
train_loss,1.91259
val_accuracy,22.0
val_loss,2.03737


[34m[1mwandb[0m: Agent Starting Run: tuuf4hu1 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	dense_activation: sigmoid
[34m[1mwandb[0m: 	dropout_organisation: 4
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	factor: 3
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 128
[34m[1mwandb[0m: 	number_of_filters: 256
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: True


Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 70, in wandb_sweep
    scaler.scale(loss).backward()
  File "/usr/local/lib/python3.11/dist-packages/torch/_tensor.py", line 581, in backward
    torch.autograd.backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py", line 347, in backward
    _engine_run_backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/graph.py", line 825, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 498.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 490.12 MiB is free. Process 2641 has 14.26 GiB memory in use. Of the allocated memory 13.91 GiB is allocated by PyTorch, and 208.06 MiB is reserved by PyTorch but unallocated. If 

Run tuuf4hu1 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 70, in wandb_sweep
    scaler.scale(loss).backward()
  File "/usr/local/lib/python3.11/dist-packages/torch/_tensor.py", line 581, in backward
    torch.autograd.backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py", line 347, in backward
    _engine_run_backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/graph.py", line 825, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 498.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 490.12 MiB is free. Process 2641 has 14.

Epoch [1/5]
Train Loss: 2.3483 | Train Accuracy: 10.71%
Val Loss: 2.3121 | Val Accuracy: 8.45%
--------------------------------------------------
Epoch [2/5]
Train Loss: 2.3314 | Train Accuracy: 10.59%
Val Loss: 2.3070 | Val Accuracy: 9.80%
--------------------------------------------------
Epoch [3/5]
Train Loss: 2.3214 | Train Accuracy: 10.79%
Val Loss: 2.3062 | Val Accuracy: 9.30%
--------------------------------------------------
Epoch [4/5]
Train Loss: 2.3178 | Train Accuracy: 11.94%
Val Loss: 2.3081 | Val Accuracy: 9.45%
--------------------------------------------------
Epoch [5/5]
Train Loss: 2.3162 | Train Accuracy: 11.54%
Val Loss: 2.3081 | Val Accuracy: 9.05%
--------------------------------------------------
Best model saved to best_model.pth with val_accuracy=9.80%


0,1
epoch,▁▃▅▆█
train_accuracy,▂▁▂█▆
train_loss,█▄▂▁▁
val_accuracy,▁█▅▆▄
val_loss,█▂▁▃▃

0,1
epoch,5.0
train_accuracy,11.53894
train_loss,2.31616
val_accuracy,9.05
val_loss,2.30805


[34m[1mwandb[0m: Agent Starting Run: oi3p97zm with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	conv_activation: relu
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dropout_organisation: 4
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	factor: 2
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	max_pooling_size: 2
[34m[1mwandb[0m: 	n_classes: 10
[34m[1mwandb[0m: 	n_neurons: 512
[34m[1mwandb[0m: 	number_of_filters: 64
[34m[1mwandb[0m: 	padding: 1
[34m[1mwandb[0m: 	stride: 1
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: False


Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 70, in wandb_sweep
    scaler.scale(loss).backward()
  File "/usr/local/lib/python3.11/dist-packages/torch/_tensor.py", line 581, in backward
    torch.autograd.backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py", line 347, in backward
    _engine_run_backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/graph.py", line 825, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 62.12 MiB is free. Process 2641 has 14.68 GiB memory in use. Of the allocated memory 14.50 GiB is allocated by PyTorch, and 33.85 MiB is reserved by PyTorch but unallocated. If res

Run oi3p97zm errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 70, in wandb_sweep
    scaler.scale(loss).backward()
  File "/usr/local/lib/python3.11/dist-packages/torch/_tensor.py", line 581, in backward
    torch.autograd.backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py", line 347, in backward
    _engine_run_backward(
  File "/usr/local/lib/python3.11/dist-packages/torch/autograd/graph.py", line 825, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 62.12 MiB is free. Process 2641 has 14.68

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1326, in convert
    return t.to(
           ^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocat

Run u0gaaysk errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 67, in wandb_sweep
    pred = model(x)
           ^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31/1099850962.py", line 93, in forward
    x = block(x)
        ^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Run dl7meuqk errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 67, in wandb_sweep
    pred = model(x)
           ^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31/1099850962.py", line 93, in forward
    x = block(x)
        ^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packa

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 4.12 MiB is free. Process 2641 has 14.73 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 35.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


Run kvfu7qsx errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 4.12 MiB is free. Process 2641 has 14.73 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 35.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

[34m[1mwandb[0m: [32m[41mERROR[0m Run kvfu7qsx errored:
[34m[1mwandb[0m: [32m[41mERROR[0m 

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 2641 has 14.74 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 36.49 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


Run u2hr5wmc errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 2641 has 14.74 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 36.49 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

[34m[1mwandb[0m: [32m[41mERROR[0m Run u2hr5wmc errored:
[34m[1mwandb[0m: [32m[41mERROR[0m 

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 2641 has 14.74 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 34.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


Run 1lcghy8k errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 62, in wandb_sweep
    x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 2.12 MiB is free. Process 2641 has 14.74 GiB memory in use. Of the allocated memory 14.56 GiB is allocated by PyTorch, and 34.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

[34m[1mwandb[0m: [32m[41mERROR[0m Run 1lcghy8k errored:
[34m[1mwandb[0m: [32m[41mERROR[0m 

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1326, in convert
    return t.to(
           ^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocat

Run 23u16tuq errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1326, in convert
    return t.to(
           ^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocat

Run zoebx0el errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/

Traceback (most recent call last):
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1326, in convert
    return t.to(
           ^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocat

Run 5aee1nm1 errored:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
    self._function()
  File "/tmp/ipykernel_31/1913325791.py", line 51, in wandb_sweep
    ).to(device)
      ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
                    ^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/