In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torchvision import transforms, datasets
import wandb
from pytorch_lightning.loggers import WandbLogger
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class FlexibleCNN(pl.LightningModule):
    def __init__(
        self,
        input_channels=3,  # RGB images
        num_classes=10,
        filters=[32, 64, 128, 256, 512],  # Number of filters for each conv layer
        filter_size=3,  # Size of convolutional filters
        conv_activation='relu',  # Activation for conv layers
        dense_activation='relu',  # Activation for dense layer
        dense_neurons=128,  # Number of neurons in the dense layer
        filter_organization='double',  # 'same', 'double', or 'half'
        batch_norm=True,
        dropout_rate=0.3,
        dropout_position='after_pool',  # 'after_conv', 'after_pool', or 'after_dense'
        learning_rate=0.001,
        optimizer='adam'
    ):
        super().__init__()
        self.save_hyperparameters()
        
        # Adjust filters based on organization strategy
        if filter_organization == 'same':
            self.filters = [filters[0]] * 5
        elif filter_organization == 'double':
            self.filters = [filters[0]]
            for i in range(4):
                self.filters.append(self.filters[-1] * 2)
        elif filter_organization == 'half':
            self.filters = [filters[0]]
            for i in range(4):
                self.filters.append(max(self.filters[-1] // 2, 8))  # Ensure at least 8 filters
        else:
            self.filters = filters[:5]  # Use the provided filters directly (up to 5)
        
        # Get the appropriate activation function
        if conv_activation.lower() == 'relu':
            self.conv_act_fn = nn.ReLU()
        elif conv_activation.lower() == 'gelu':
            self.conv_act_fn = nn.GELU()
        elif conv_activation.lower() == 'silu' or conv_activation.lower() == 'swish':
            self.conv_act_fn = nn.SiLU()
        elif conv_activation.lower() == 'mish':
            self.conv_act_fn = nn.Mish()
        else:
            self.conv_act_fn = nn.ReLU()  # Default to ReLU
            
        # Get dense layer activation function
        if dense_activation.lower() == 'relu':
            self.dense_act_fn = nn.ReLU()
        elif dense_activation.lower() == 'gelu':
            self.dense_act_fn = nn.GELU()
        elif dense_activation.lower() == 'silu' or dense_activation.lower() == 'swish':
            self.dense_act_fn = nn.SiLU()
        elif dense_activation.lower() == 'mish':
            self.dense_act_fn = nn.Mish()
        else:
            self.dense_act_fn = nn.ReLU()  # Default to ReLU
        
        # Create convolutional layers
        self.conv_layers = nn.ModuleList()
        in_channels = input_channels
        
        for i in range(5):
            # Create a block containing conv, (optional) batch norm, activation, and pooling
            block = []
            
            # Convolutional layer
            block.append(nn.Conv2d(in_channels, self.filters[i], kernel_size=filter_size, padding=filter_size//2))
            
            # Batch normalization (if enabled)
            if batch_norm:
                block.append(nn.BatchNorm2d(self.filters[i]))
            
            # Activation function
            block.append(self.conv_act_fn)
            
            # Dropout after convolution (if specified)
            if dropout_rate > 0 and dropout_position == 'after_conv':
                block.append(nn.Dropout2d(dropout_rate))
                
            # Max pooling
            block.append(nn.MaxPool2d(2))
            
            # Dropout after pooling (if specified)
            if dropout_rate > 0 and dropout_position == 'after_pool':
                block.append(nn.Dropout2d(dropout_rate))
            
            # Add the block to the module list
            self.conv_layers.append(nn.Sequential(*block))
            
            # Update input channels for next layer
            in_channels = self.filters[i]
        
        # Calculate the size of flattened output from last conv layer
        # For 224x224 input, after 5 pooling layers (each dividing by 2), we get 7x7 feature maps
        self.feature_size = (224 // (2**5)) ** 2 * self.filters[-1]
        
        # Dense layers
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.feature_size, dense_neurons),
            self.dense_act_fn
        )
        
        # Dropout after dense (if specified)
        self.dropout_dense = None
        if dropout_rate > 0 and dropout_position == 'after_dense':
            self.dropout_dense = nn.Dropout(dropout_rate)
        
        # Output layer
        self.output_layer = nn.Linear(dense_neurons, num_classes)
        
        # Store other hyperparameters
        self.learning_rate = learning_rate
        self.optimizer_name = optimizer

    def forward(self, x):
        # Pass through all conv blocks
        for conv_block in self.conv_layers:
            x = conv_block(x)
        
        # Pass through dense layers
        x = self.classifier(x)
        
        # Apply dropout if needed
        if self.dropout_dense:
            x = self.dropout_dense(x)
        
        # Output layer
        x = self.output_layer(x)
        return x
    
    def configure_optimizers(self):
        if self.optimizer_name.lower() == 'adam':
            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        elif self.optimizer_name.lower() == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate, momentum=0.9)
        elif self.optimizer_name.lower() == 'rmsprop':
            optimizer = torch.optim.RMSprop(self.parameters(), lr=self.learning_rate)
        else:
            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
            
        return optimizer
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        
        # Calculate accuracy
        preds = torch.argmax(y_hat, dim=1)
        acc = (preds == y).float().mean()
        self.log('train_acc', acc, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        
        # Calculate accuracy
        preds = torch.argmax(y_hat, dim=1)
        acc = (preds == y).float().mean()
        self.log('val_acc', acc, prog_bar=True)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('test_loss', loss, prog_bar=True)
        
        # Calculate accuracy
        preds = torch.argmax(y_hat, dim=1)
        acc = (preds == y).float().mean()
        self.log('test_acc', acc, prog_bar=True)
        
        return loss


In [3]:
class INaturalistDataModule(pl.LightningDataModule):
    def __init__(self, batch_size=32, num_workers=4, data_augmentation=False):
        super().__init__()
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.data_augmentation = data_augmentation
        
    def setup(self, stage=None):
        # Define transformations
        if self.data_augmentation:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(10),
                transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
                transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
            
        val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        # Load dataset
        full_dataset = datasets.ImageFolder(root="/kaggle/input/nature-12k/inaturalist_12K/train", transform=train_transform)
        # Split into train/val/test
        train_size = int(0.8 * len(full_dataset))
        val_size = len(full_dataset) - train_size
        
        self.train_dataset, self.val_dataset = random_split(
            full_dataset, [train_size, val_size]
        )
        
        # Apply different transforms to validation and test sets
        self.val_dataset.dataset = datasets.ImageFolder(
            root="/kaggle/input/nature-12k/inaturalist_12K/train", transform=val_transform
        )
        self.test_dataset = datasets.ImageFolder(
            root="/kaggle/input/nature-12k/inaturalist_12K/val", transform=val_transform
        )
    
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            num_workers=self.num_workers
        )
    
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset, 
            batch_size=self.batch_size, 
            shuffle=False, 
            num_workers=self.num_workers
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset, 
            batch_size=self.batch_size, 
            shuffle=False, 
            num_workers=self.num_workers
        )

In [4]:
def train_with_wandb(config=None):
    """Train with WandB hyperparameter tuning"""
    with wandb.init(config=config):
        # Get hyperparameters from wandb
        config = wandb.config
        
        # Create model with the hyperparameters
        model = FlexibleCNN(
            input_channels=3,
            num_classes=10,
            filters=[config.filters_initial],
            filter_size=config.filter_size,
            conv_activation=config.conv_activation,
            dense_activation=config.dense_activation,
            dense_neurons=config.dense_neurons,
            filter_organization=config.filter_organization,
            batch_norm=config.batch_norm,
            dropout_rate=config.dropout_rate,
            dropout_position=config.dropout_position,
            learning_rate=config.learning_rate,
            optimizer=config.optimizer
        )
        
        # Create data module
        data_module = INaturalistDataModule(
            batch_size=config.batch_size,
            data_augmentation=config.data_augmentation
        )
        
        # Create WandB logger
        wandb_logger = WandbLogger(project="inaturalist-cnn")
        
        # Create callbacks
        early_stop_callback = EarlyStopping(
            monitor='val_loss',
            patience=10,
            mode='min'
        )
        
        checkpoint_callback = ModelCheckpoint(
            monitor='val_acc',
            dirpath='./checkpoints/',
            filename='inaturalist-cnn-{epoch:02d}-{val_acc:.2f}',
            save_top_k=1,
            mode='max'
        )
        
        # Create trainer
        trainer = pl.Trainer(
            max_epochs=10,
            logger=wandb_logger,
            callbacks=[early_stop_callback, checkpoint_callback],
            log_every_n_steps=10
        )
        
        # Train the model
        trainer.fit(model, data_module)

In [5]:
# Stage 1: Bayesian search for initial exploration
sweep_config_stage1 = {
    'name': 'Stage1-Sweep',
    'method': 'bayes',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'filters_initial': {'values': [16, 32, 64]},
        'filter_organization': {'values': ['same', 'double', 'half']},
        'conv_activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'dense_activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'learning_rate': {'values': [0.001, 0.01]},
        'batch_norm': {'values': [True, False]},
        'optimizer': {'values': ['adam', 'sgd', 'rmsprop']},
        'batch_size': {'values': [32, 64]},
        
        # Set default values for other parameters
        'filter_size': {'value': 3},
        'dense_neurons': {'value': 128},
        'dropout_rate': {'value': 0.3},
        'dropout_position': {'value': 'after_pool'},
        'data_augmentation': {'value': True}
    },
    'early_terminate': {'type': 'hyperband', 'min_iter': 5}
}

In [4]:
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

In [5]:
wandb_api = user_secrets.get_secret("wandb_api") 

In [6]:
wandb.login(key=wandb_api)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mda24m027[0m ([33mda24m027-indian-institute-of-technology-madras[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [9]:
sweep_id = wandb.sweep(sweep_config_stage1, project="DA6401_Assignment2")

Create sweep with ID: oyruzr45
Sweep URL: https://wandb.ai/da24m027-indian-institute-of-technology-madras/DA6401_Assignment2/sweeps/oyruzr45


In [None]:
wandb.agent(sweep_id, train_with_wandb, count=30)

[34m[1mwandb[0m: Agent Starting Run: ioqeia6i with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇█████
train_acc,▂▂▅▅▁▂▄▆▅▂▂▅▅▃▄▄▂▇█▂▁▅▅▃▃▇▂▄▃▄▇▄▇▁▂▅▂▃▂▄
train_loss,▇▆▆▇▆▅▆▆█▆▅▆▃▅▅▄▅▅▄▅▆▄▃▃▂▇▅▄▄▃▃▃▁▃▃▁▄▃▅▆
trainer/global_step,▁▁▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇██
val_acc,▁▅▅▆▆▇▆███
val_loss,█▅▄▃▃▃▂▂▁▁

0,1
epoch,9.0
train_acc,0.29032
train_loss,2.10746
trainer/global_step,2499.0
val_acc,0.3225
val_loss,1.9193


[34m[1mwandb[0m: Agent Starting Run: qsutopsw with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /kaggle/working/checkpoints exists and is not empty.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇███
train_acc,▁▂▂▂▃▃▅▆▆▄▄▃▃▃▃▁▃▄▃▄▄█▇▅▆█▅▅▄▃▄▄▆▆▇▆▃▇▅▆
train_loss,█▇▇▇▇▄▄▇▄▇▃▅▅▆▇▄▅▇▂▃▅▃▃▄▃▆▅▄▄▁▄▃▂▁▄▂▂▆▅▃
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇████
val_acc,▁▂▅▆▆▇▆▇▆█
val_loss,█▆▄▃▂▂▂▁▃▁

0,1
epoch,9.0
train_acc,0.22222
train_loss,2.00562
trainer/global_step,1249.0
val_acc,0.3025
val_loss,2.00697


[34m[1mwandb[0m: Agent Starting Run: qr1pagxm with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇███
train_acc,▁▅▃▂▄▅▄▃▁▆▅▄▅▆▄▅▆▇▄▃▃▆▃▄▅▅▅▆▅▆▄▅▆▇█▆▇▇▄▄
train_loss,▇▇▇█▅▅█▇▄▅▅▂▆▄▄▃▇▄▁▃▄▄▁▃▃▅▅▂▃▂▄▇▂▂▃▃▂▂▄▇
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇██
val_acc,▁▄▆▆▆▇▆█▇▇
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,9.0
train_acc,0.34921
train_loss,1.86839
trainer/global_step,1249.0
val_acc,0.277
val_loss,1.98631


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qdwq9rit with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: relu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: mish
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: sgd




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇███████
train_acc,▂▄▂▂▄▅▂▂▃▄▅▂█▄▃▁▂▄▃▃▅▃▄▇▅▃▅▄▄▇▂▅▄▄▆▄▆▇█▆
train_loss,▇▆█▇▇▇▆▇▅█▅▅▆▇▆▄██▂▁▇▅▆▄▅▂▆▃▃▇▅▆▁▆▂▇▄▇▂▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇████
val_acc,▁▄▆▆▆▆▇█▇█
val_loss,█▇▅▅▄▃▂▂▁▁

0,1
epoch,9.0
train_acc,0.2381
train_loss,2.22151
trainer/global_step,1249.0
val_acc,0.1835
val_loss,2.20403


[34m[1mwandb[0m: Agent Starting Run: 59zy5gpy with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
train_acc,█▃▃▆▁▅▅▄▅▂▅▅▅▄▄▅▃▁▃▅▄▄▅█▄▅▂▆▅▅▃▆▄▇▆▃▅▅▅▅
train_loss,▅▂▁▂▂▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇██
val_acc,▇▇▇▄▇█▃▁▄▃
val_loss,█▁▂▂▁▁▃▁▃▁

0,1
epoch,9.0
train_acc,0.11111
train_loss,2.30602
trainer/global_step,1249.0
val_acc,0.0955
val_loss,2.30352


[34m[1mwandb[0m: Agent Starting Run: e5098dbz with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇███████
train_acc,▇██▄▅▂▅▅▃▇▄▅▄▂▃▅▇▄▇▃█▃▅▂▂▁▅▃▅▅▆▅▅▅▄▄▄▄▁▄
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇█████
val_acc,▆▄▃▁▄▁█▄▁▃
val_loss,▂▅▃█▅█▁▂▅▆

0,1
epoch,9.0
train_acc,0.07937
train_loss,2.30482
trainer/global_step,1249.0
val_acc,0.093
val_loss,2.30447


[34m[1mwandb[0m: Agent Starting Run: gfafliw5 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▇▇████
train_acc,▃█▃▃▆▃▄▄▃▁▇▃▃▄▆▃▃▂▆▄▄▆▄█▂▆▇▇▃▄▄▇▃▆▃▃▇▇▃█
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇████
val_acc,███▂▂▁▁▁▂▂
val_loss,▃▁▂▃▄▆▆█▇█

0,1
epoch,9.0
train_acc,0.19355
train_loss,2.30075
trainer/global_step,2499.0
val_acc,0.0945
val_loss,2.30312


[34m[1mwandb[0m: Agent Starting Run: 490yfwr1 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇█████
train_acc,▅▅▅▃▆▅▃▃▅▄█▅▂▃▄▂▆▁▃▂▄▅▄▄▃▅▄▅▂▂▅▅▃▅▅▆▄▃▅▅
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▁▂▁
trainer/global_step,▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▁█▅▁▂▂▂█▆
val_loss,█▁▁▁▁▂▁▁▁▁

0,1
epoch,9.0
train_acc,0.11111
train_loss,4.41998
trainer/global_step,1249.0
val_acc,0.0995
val_loss,2.30407


[34m[1mwandb[0m: Agent Starting Run: a5rjnhdf with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: silu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: sgd




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇█████
train_acc,▂▃▆▅▁▅▃▂▆▅▇▄▅█▃▆▅▇▆▆▇▆▅▃▅▄▆█▆▅▆▃▆▆▇▇▆▆▇▂
train_loss,█▇█▅▅▇▇▅▅▅▄▄▅▆▄▆▄▅▅▄▆▄▅▆▄▄▅▅▅▄▅▄▄▄▄▁▄▄▁▅
trainer/global_step,▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▃▅▆▅▇█▆██
val_loss,█▆▅▃▄▃▁▄▁▁

0,1
epoch,9.0
train_acc,0.31746
train_loss,2.04591
trainer/global_step,1249.0
val_acc,0.2985
val_loss,1.99799


[34m[1mwandb[0m: Agent Starting Run: e63ker37 with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: mish
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: sgd




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇█████
train_acc,▄▆▃▅▃▂▄▃▆▁▆▆▃█▄▅▅▅▆▃▆▅▃▇▅▅▅▃▅▅▄█▄▆▆▄▄▆▅▆
train_loss,▆▇▆▆▆▅▄██▅▅▄▅▄▄▆▅▆▅▄▇▄▂▂▄▃▁▁▃▄▅▃▃▂▅▅▃▄▄▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█
val_acc,▁▂▅▅▅▇█▆▇▇
val_loss,█▆▄▄▃▂▂▁▁▂

0,1
epoch,9.0
train_acc,0.2381
train_loss,2.11675
trainer/global_step,1249.0
val_acc,0.223
val_loss,2.14323


[34m[1mwandb[0m: Agent Starting Run: rchfdl4a with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: mish
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: sgd




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇██
train_acc,▃▁▁▂▂▁▄▄▄▅▆▃▂▄▆▅▄▅▅█▇▆▆▇▆▆▆▇█▇▆█▇▇▅▇▅▇▅▆
train_loss,████████▆█▆▆▆▅▆▄▄▇▄▆▅▃▅▃▃▂▃▁▃▃▁▂▃▃▁▃▂▂▁▃
trainer/global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▇▇▇▇▇▇▇▇▇▇████
val_acc,▁▄▅▆▆▇█▇▇█
val_loss,█▇▆▅▄▃▂▂▂▁

0,1
epoch,9.0
train_acc,0.33333
train_loss,1.89253
trainer/global_step,1249.0
val_acc,0.2875
val_loss,1.98467


[34m[1mwandb[0m: Agent Starting Run: wugyk2h3 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: gelu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
train_acc,▃▅▄▃▃▇▆▃▄▅▄▆▅▂▂▅▃▂▆▄▂▁▅▄▃▄▅█▃▃▅▅▆▄▆▃▅▄▅▆
train_loss,█▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
val_acc,█▂▁▁▃▃▁▁▁▆
val_loss,█▂▃▃▆▆▄▂▁▂

0,1
epoch,9.0
train_acc,0.03226
train_loss,2.30764
trainer/global_step,2499.0
val_acc,0.107
val_loss,2.30428


[34m[1mwandb[0m: Agent Starting Run: xwmwmi5b with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇██
train_acc,▂▅▂▁▁▂▃▂▂▄▃▃▃▄▃▄▁▅▄▄▅▅▄▃▆▅▂▅▅██▇▃▅▅▅▃▆▅▆
train_loss,█▅▆▅▇▅▅▇▅▄▄▄▃▅▅▄▃▃▄▃▅▄▅▃▅▅▅▆▄▃▄▅▄▃▅▃▁▄▃▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▁▃▆▅▆▆▇██
val_loss,█▇▆▄▄▄▃▃▁▁

0,1
epoch,9.0
train_acc,0.19048
train_loss,2.05599
trainer/global_step,1249.0
val_acc,0.269
val_loss,2.06914


[34m[1mwandb[0m: Agent Starting Run: habfsi09 with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇███
train_acc,▂▃▃▅▅▅▁▃▂▅▁▄▃▂▂▂▃▄▃▄▅▄▄▃▆▄▃█▃▂▅▅▅▄▆▆▂▅▅▄
train_loss,▆▇▅▅▃▆▇██▇▂▅▅▆▄▁▄▃▅▅▃█▃▄▅▅█▂▄▄▁▄▇▅▄▃▄▅▄▆
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▄▄▅▆▇█▇██
val_loss,█▆▅▄▃▂▁▂▁▁

0,1
epoch,9.0
train_acc,0.29032
train_loss,1.79286
trainer/global_step,2499.0
val_acc,0.293
val_loss,1.97517


[34m[1mwandb[0m: Agent Starting Run: pns910c6 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇████
train_acc,▁▅▁▃▇▅▆▃▃▆▅▁▅▅▆▁▄▃▆▇▅▆▅▇▆▃▅▇█▆▇▇▄▃█▇▆▇▆█
train_loss,█▇█▆▆▅▄▆▆▆▇▅▂▄▄▃▂█▃▆▃▆▁▆▁▄▂▅▁▅▃▁▄▅▄▅▅▄▃▂
trainer/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
val_acc,▁▁▅▅█▇▇█▇▇
val_loss,█▆▄▄▂▂▂▁▁▁

0,1
epoch,9.0
train_acc,0.31746
train_loss,1.93012
trainer/global_step,1249.0
val_acc,0.3095
val_loss,1.92403


[34m[1mwandb[0m: Agent Starting Run: wescdi40 with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇███
train_acc,▁▁▂▁▁▂▂▅▃▂▅▂▂▂▃▅▅▅▃▇▁▇▃▅▃▅▁█▆▃▅▄▄▂▄▅▅▃▇▇
train_loss,▇▇▇▆▆▅▅▄▆█▄▄▅▆▄▃▃▂▂▅▅▄▅▃▅▅▄▁▁▅▄▄▂▄▁▁▂▃▂▃
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▃▅▆▆▅▇███
val_loss,█▆▄▄▃▄▂▁▁▁

0,1
epoch,9.0
train_acc,0.30159
train_loss,1.99869
trainer/global_step,1249.0
val_acc,0.29
val_loss,1.97174


[34m[1mwandb[0m: Agent Starting Run: 7pa65e0n with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇██
train_acc,▁▃▆▂▂▃▃▅▄▄▇▃▆▄▇▅▅▆█▄▄▄▆▃▃▄▃▄▅▇▃▁▆▅█▂▅▇▆▄
train_loss,▅█▇▅▆▆▄▅▆▅▄▅▂▁▆▄▁▃▃▅▆▆▅▄▄▄▄▆▃▂▃▄▂▆▇▅▄▅▂▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█
val_acc,▁▃▄▆▅▆▅▆█▇
val_loss,█▆▆▄▄▄▃▃▁▂

0,1
epoch,9.0
train_acc,0.28571
train_loss,2.09073
trainer/global_step,1249.0
val_acc,0.287
val_loss,1.98071


[34m[1mwandb[0m: Agent Starting Run: tovwgru2 with config:
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇████
train_acc,▁▁▅▅▅▄▃█▁▃▄▅▂▇▄▅▄▅▄▃▅▅▇▆▅▃▆▃▅▇▃▅▄▅▇▅▃▅▆▆
train_loss,█▇▇▅█▆▆▆▄▆▆▇▂▄▆▄▆▄▄▆▆▃▃▄▆▄▃▃▅▆▅▂▅▁▅▂▃▅▅▅
trainer/global_step,▁▁▁▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██
val_acc,▁▂▅▃▄▃▅▅█▇
val_loss,█▅▄▄▃▃▂▁▁▁

0,1
epoch,9.0
train_acc,0.25397
train_loss,2.01875
trainer/global_step,1249.0
val_acc,0.2585
val_loss,2.04713


[34m[1mwandb[0m: Agent Starting Run: oydr63f1 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: gelu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▅▅▅▆▆▆▆▆▆▆▆▇▇▇██████
train_acc,▁▂▅▅▃▄▅▅▅▂▆▅▅▆▅▇▆▅▅▅▅▅▆▅▅▄▅▅▅▆▇▇▇▅▅▇██▇▅
train_loss,▇▇▅▅▆▆▅▂▃▄█▃▅▄▆▅▆▄▄▃▄▃▇▄▇█▃▄▃▃▇▇▂▇▂▂▄▁▄▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇█████
val_acc,▁▄▃▅▃█▅▆▆▅
val_loss,█▄▆▂▆▁▂▂▂▄

0,1
epoch,9.0
train_acc,0.32258
train_loss,1.83977
trainer/global_step,2499.0
val_acc,0.2505
val_loss,2.09835


[34m[1mwandb[0m: Agent Starting Run: 00zfcvja with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: silu
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train_acc,▂▃▂▂▅▃▅▅▃▆▆█▄▃▆▅▇▅▄▅▆▄▆▇▅▅▆▃█▄▅▁▆█▅▇▇▆▄▆
train_loss,▇█▅▄▅▅▆▆▅▆▂▃▆▃▃▃▆▃▅▄▇▆▄▂▃▅▂▄▂▄▃▃▁▁▃▃▂▂▂▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
val_acc,▁▄▆▆▆▆▇▇██
val_loss,█▅▄▃▄▃▂▂▁▁

0,1
epoch,9.0
train_acc,0.31746
train_loss,1.92848
trainer/global_step,1249.0
val_acc,0.3325
val_loss,1.90251


[34m[1mwandb[0m: Agent Starting Run: vhsr2zcl with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [8]:
api = wandb.Api()
sweep = api.sweep("da24m027-indian-institute-of-technology-madras/DA6401_Assignment2/oyruzr45")
best_run_stage1 = sweep.best_run()
best_stage1_results = best_run_stage1.config

[34m[1mwandb[0m: Sorting runs by -summary_metrics.val_acc


True


In [9]:
best_stage1_results

{'filters': [64],
 'optimizer': 'rmsprop',
 'batch_norm': False,
 'batch_size': 64,
 'filter_size': 3,
 'num_classes': 10,
 'dropout_rate': 0.3,
 'dense_neurons': 128,
 'learning_rate': 0.001,
 'input_channels': 3,
 'conv_activation': 'mish',
 'filters_initial': 64,
 'dense_activation': 'relu',
 'dropout_position': 'after_pool',
 'data_augmentation': True,
 'filter_organization': 'same'}

In [14]:
sweep_config_stage2 = {
    'name': 'Stage2-Sweep',
    'method': 'bayes',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        # Fixed parameters from Stage 1
        'filters_initial': {'value': best_stage1_results['filters_initial']},
        'filter_organization': {'value': best_stage1_results['filter_organization']},
        'conv_activation': {'value': best_stage1_results['conv_activation']},
        'dense_activation': {'value': best_stage1_results['dense_activation']},
        'learning_rate': {'value': best_stage1_results['learning_rate']},
        'batch_norm': {'value': best_stage1_results['batch_norm']},
        'optimizer': {'value': best_stage1_results['optimizer']},
        'batch_size': {'value': best_stage1_results['batch_size']},
            
        # Parameters to tune in Stage 2
        'filter_size': {'values': [3, 5]},
        'dense_neurons': {'values': [64, 128, 256]},
        'dropout_rate': {'values': [0.2, 0.3, 0.4]},
        'dropout_position': {'values': ['after_conv', 'after_pool', 'after_dense']},
        'data_augmentation': {'values': [True, False]}
    }
}

In [15]:
sweep_id_stage2 = wandb.sweep(sweep_config_stage2, project="DA6401_Assignment2")

Create sweep with ID: gk7oqzq1
Sweep URL: https://wandb.ai/da24m027-indian-institute-of-technology-madras/DA6401_Assignment2/sweeps/gk7oqzq1


In [None]:
wandb.agent(sweep_id_stage2, train_with_wandb, count=30)

[34m[1mwandb[0m: Agent Starting Run: t20oz2p5 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█████
train_acc,▁▄▂▃▂▃▂▄▃▂▃▄▅▇▄▅▄▄█▄▄▆▃▆▄▅▆▅▅▅▄▇▆▇▇▅▇▇█▆
train_loss,█▇▇▇▆▆▆▇▇▅▆▆▅▄▆▄▆▃▅▇▅▂▆▄█▄▃▄▃▃▅▄▃▃▃▄▁▄▅▄
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▃▅▆▆▆▇▇▇█
val_loss,█▆▄▄▃▃▂▂▂▁

0,1
epoch,9.0
train_acc,0.36508
train_loss,1.74197
trainer/global_step,1249.0
val_acc,0.359
val_loss,1.85864


[34m[1mwandb[0m: Agent Starting Run: z6z0gowi with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.4
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:654: Checkpoint directory /kaggle/working/checkpoints exists and is not empty.


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇██
train_acc,▃▃▄▄▂▂▄▄▄▄▃▄▂▃▂▃█▃▅▃▂▄▃▂▃▃▅▁▅▅▅▃▅▅▄▂▃▃▄▄
train_loss,▇▆▃▄▃▃▃▂▂▃▂▃▃▃▅▃▂▃▂▂▂▁▂▂▂▂▂▃▂▃▁▂▂▂▃▂▁▂▄█
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇██
val_acc,▂▃▁█▇▂▁▂▂▄
val_loss,▇██▄▁▃▄▅▁▅

0,1
epoch,9.0
train_acc,0.06349
train_loss,2.29889
trainer/global_step,1249.0
val_acc,0.1185
val_loss,2.2938


[34m[1mwandb[0m: Agent Starting Run: 9frg1az0 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇██
train_acc,▁▃▁▂▄▃▅▅▃▃▃▄▄▄▄▄▅▄▆█▄▃▇▅▄▅▃▃▄▅▅▆▇▅▆▄▁▃▄▂
train_loss,█▆▅▄▅▆▄▅▃▄▄▄▅▅▅▃▄▃▃▂▃▂▃▄▂▇▄▃▃▄▂▂▃▆▃▄▂▂▁▁
trainer/global_step,▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
val_acc,▁▂▃▄▅▆▇█▆▄
val_loss,█▆▆▅▄▂▂▁▃▅

0,1
epoch,9.0
train_acc,0.11111
train_loss,2.36805
trainer/global_step,1249.0
val_acc,0.1935
val_loss,2.22488


[34m[1mwandb[0m: Agent Starting Run: cal5lytc with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇████
train_acc,▁▂▂▃▅▄▄▃▃▅▅▃▄▃▄▅▅▃█▆▃▆▃▃▁▁▃▁▂▁▁▁▂▁▃▂▂▁▃▃
train_loss,▅▅█▅▅▅▅▅▆▄▃▆▆▄▅▂▃▄▃▁▅▁▃▄▆▆▆▆▆▆▆▆▆▆▆▆▆▆▅▆
trainer/global_step,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
val_acc,▃▅▆▇▇█▁▂▁▁
val_loss,▇▅▄▂▃▁████

0,1
epoch,9.0
train_acc,0.11111
train_loss,2.31386
trainer/global_step,1249.0
val_acc,0.1
val_loss,2.29394


[34m[1mwandb[0m: Agent Starting Run: qgvcqfum with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇███
train_acc,▅▃▅▁▂▂▅▃▆▅█▅█▄▆▅▆▄▇█▇█▇▆▇▅▆▄▂▂▅▅▆▅▅▅▆▂▅▄
train_loss,▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▂▂▁▃▂▂▂▁▂▂▂▂▂▂▂▂▂▂█
trainer/global_step,▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇████
val_acc,▃█▁██▇▇▃▃▁
val_loss,▂▁█▁▁▁▁▂▂▂

0,1
epoch,9.0
train_acc,0.11111
train_loss,2.30485
trainer/global_step,1249.0
val_acc,0.102
val_loss,2.30985


[34m[1mwandb[0m: Agent Starting Run: 1kwiy0j7 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_pool
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 5
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇███
train_acc,▂▂▅▂▂▃▄▃▃▂▅▇▃▅▆▁▄▅▃▅▃▂▆▄▆▅▂▅▄▁▆▇▆▇▇▅▆█▃▃
train_loss,▆▄▃▃▃▃▃▃▃▃▃▂▂▃▂▃▃▃▄▃▃▃▂▂▂▃▃▃▂▃▃▃▂▂▁█▂▃▂▂
trainer/global_step,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇███
val_acc,▁▆▆▄▄▇▄▆▆█
val_loss,█▆▄▄▃▂▁▂▁▁

0,1
epoch,9.0
train_acc,0.11111
train_loss,2.26431
trainer/global_step,1249.0
val_acc,0.161
val_loss,2.24914


[34m[1mwandb[0m: Agent Starting Run: ju8emwcx with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇█
train_acc,▂▁▂▂▃▄▃▄▃▅▅▅▂▅▄▇▅▅▆▃▇▅▇▅█▅▇▇▆▆▅▅▇▆▄▅█▇▆▇
train_loss,█▇▆▆▆▅▅▆▅▆▅▅▅▅▅▃▃▄▃▃▂▃▃▂▃▁▁▃▃▄▃▁▃▂▂▁▁▂▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇█
val_acc,▁▂▄▆▄▇████
val_loss,█▇▅▄▅▂▂▂▁▁

0,1
epoch,9.0
train_acc,0.33333
train_loss,1.88349
trainer/global_step,1249.0
val_acc,0.33
val_loss,1.88388


[34m[1mwandb[0m: Agent Starting Run: v4oom599 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇████
train_acc,▁▂▃▁▃▅▄▁▆▂▁▅▂▂▃▃▅▅▆▆▅▃▅█▆▄▆▇▆▆▅▆▇▆▅▇▇▆▃▄
train_loss,█▄▅▄▄▄▄▅▄▄▄▃▃▄▄▄▄▄▃▃▃▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▁▂▁▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
val_acc,▁▃▄▄▆▅▅▆▆█
val_loss,█▆▅▅▃▃▃▂▄▁

0,1
epoch,9.0
train_acc,0.25397
train_loss,1.86358
trainer/global_step,1249.0
val_acc,0.3595
val_loss,1.88943


[34m[1mwandb[0m: Agent Starting Run: 87kn0jar with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train_acc,▁▁▁▃▃▄▂▅▅▃▄▄▅▃▄▅▆▅▄▃▃▅▆▄▅▃▆▄▆▄▇▅▅▆▅▄█▇▅▆
train_loss,▇▇█▇▇▅▆▅▆▆▄▅▅▅▄▄▄▄▅▅▃▇▅▄▃▆▂▄▃▅▂▆▄▄▄▃▃▄▄▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▃▅▆▅▅▅▇▇█
val_loss,█▅▄▃▃▆▄▂▂▁

0,1
epoch,9.0
train_acc,0.38095
train_loss,1.65516
trainer/global_step,1249.0
val_acc,0.339
val_loss,1.90561


[34m[1mwandb[0m: Agent Starting Run: fr6vo2ee with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇█████
train_acc,▁▃▃▄▄▄▃▃▂▃▆▆▂▅▅▅▃▅▅▃▄▇▄▃▅▆█▅▆▅▅▇▅▆▆█▅█▆▇
train_loss,█▅▅▅▄▄▅▅▄▅▄▃▄▄▄▄▃▄▃▃▄▄▄▄▅▃▂▃▅▃▃▂▂▄▂▂▃▁▃▂
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████
val_acc,▁▂▄▂▄▇█▇█▇
val_loss,█▆▅▆▅▂▁▁▁▁

0,1
epoch,9.0
train_acc,0.36508
train_loss,1.77408
trainer/global_step,1249.0
val_acc,0.309
val_loss,1.92782


[34m[1mwandb[0m: Agent Starting Run: sbfrldrz with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇███
train_acc,▁▃▁▂▃▄▃▃▃▃▅▆▄▄▄▆▅▄▄▅▆▄▄▅▄▆▄█▄▅▅▅▅▅▇▇▆▆▇▆
train_loss,█▇██▇▇▇▇▆▆▇▅▆▆▅▅▄▆▅█▃▄▅▃▅▄▃▄▃▄▅▃▄▃▅▄▂▃▃▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇█
val_acc,▁▃▅▅█▆▇█▇▇
val_loss,█▅▅▄▂▃▂▁▃▃

0,1
epoch,9.0
train_acc,0.42857
train_loss,1.8879
trainer/global_step,1249.0
val_acc,0.298
val_loss,1.98749


[34m[1mwandb[0m: Agent Starting Run: d9vjh31o with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▅▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇████
train_acc,▂▁▂▃▃▂▂▃▄▅▅▅▄▄▃▆▃▄▅▆▄▅▆▅▆▅▄▅▆▃█▄▅▅▅▇▅▇▅▃
train_loss,█████▆▇▇▆▆█▆▅▃▆▆▆▅▄▅▅▄▆▃▅▃▆▂▄▃▁▂▃▇▃▃▃▂▃▃
trainer/global_step,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇█
val_acc,▁▅▅▆▆▇▇▇█▇
val_loss,█▆▄▄▃▂▂▂▁▂

0,1
epoch,9.0
train_acc,0.22222
train_loss,1.98763
trainer/global_step,1249.0
val_acc,0.3335
val_loss,1.88454


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g6m0t7vd with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇████
train_acc,▂▁▃▂▁▃▄▃▂▅▂▃▅▄▆▄▄▆▄▅▅▄▅▅▄▅▅▆▆▅▆▄▅▆▆▆▄▅▆█
train_loss,▆▆▅▅▅▅▅▅▄▅▅▄▅▄▄▄▅▅▅▃▄▃▄▄▄▃▂▄▃▃▄█▄▃▄▃▃▂▃▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇███
val_acc,▁▂▄▅▅▆▇▇██
val_loss,█▇▆▅▄▄▃▂▁▂

0,1
epoch,9.0
train_acc,0.25397
train_loss,1.9486
trainer/global_step,1249.0
val_acc,0.3055
val_loss,1.98416


[34m[1mwandb[0m: Agent Starting Run: qopy5ss6 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▇████
train_acc,▃▂▁▄▂▂▃▄▂▂▅▂▅▅▄▄▁▅▃▄▄▅▂▅▅▅▄▃▅▄▇▅▆▆▆▃█▅▅▆
train_loss,█▃▃▂▂▃▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▁▂▁▁▂▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
val_acc,▁▃▅▄▆▆▇█▇█
val_loss,██▆▅▄▄▄▁▃▁

0,1
epoch,9.0
train_acc,0.25397
train_loss,2.00956
trainer/global_step,1249.0
val_acc,0.2605
val_loss,2.03532


[34m[1mwandb[0m: Agent Starting Run: v0tk5isv with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇███
train_acc,▁▁▂▄▃▄▃▅▄▄▃▄▄▄▃▅▅▄▄▄▄▄▄▅▅▄▆▆▅▅▆▅▅▆▅▄▆█▆█
train_loss,▇██▇▇▆▆▆██▇▇▆▆▆▆▅▆▄▆▄▆▅▅▄▅▅▁▄▅▄▃▅▅▂▃▁▃▂▃
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▄▅▇▆▅▇███
val_loss,█▅▄▃▃▃▁▂▁▂

0,1
epoch,9.0
train_acc,0.47619
train_loss,1.59374
trainer/global_step,1249.0
val_acc,0.339
val_loss,1.90082


[34m[1mwandb[0m: Agent Starting Run: 0eagxcag with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇████
train_acc,▁▂▃▁▁▃▄▁▄▅▄▁▃▆▃▃▅▃▃▄▂▃▅▅▃▅▅▂▅▂▄▆▇▆▆▇▇▃█▇
train_loss,█▆▅▅▅▄▄▄▄▃▄▅▄▅▃▄▃▃▄▄▄▄▃▂▃▃▄▄▃▄▁▃▂▃▁▂▁▂▂▂
trainer/global_step,▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇████
val_acc,▁▄▄▆▅▇▇▇▇█
val_loss,█▅▅▃▄▃▂▂▁▁

0,1
epoch,9.0
train_acc,0.42857
train_loss,1.73134
trainer/global_step,1249.0
val_acc,0.3455
val_loss,1.87766


[34m[1mwandb[0m: Agent Starting Run: qssus4a8 with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 64
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇████
train_acc,▁▂▃▃▄▂▂▅▃▂▄▄▄▅▅▅▄▆▆▄▇▄▅▅▅▆▄▆▆▅▅▇▄▅▆▆▇▆█▆
train_loss,█▇▇▆▇▇▇▇▇▆▇▆▃▇▆▅▆█▄▇▆▄▅▄▄▄▃▅▂▅▃▃▆▅▁▃▄▅▄▆
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▃▃▆▇▇▇▆▇█
val_loss,█▆▅▄▃▂▃▂▃▁

0,1
epoch,9.0
train_acc,0.22222
train_loss,2.2365
trainer/global_step,1249.0
val_acc,0.3125
val_loss,1.95494


[34m[1mwandb[0m: Agent Starting Run: fzs75wjg with config:
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_activation: mish
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_activation: relu
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout_position: after_dense
[34m[1mwandb[0m: 	dropout_rate: 0.3
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	filter_size: 3
[34m[1mwandb[0m: 	filters_initial: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: rmsprop




Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

## Best Model Evaluation

In [16]:
def visualize_test_results(model, data_module):
    """Visualize test results in a 10x3 grid (10 classes, 3 per class)"""
    import matplotlib.pyplot as plt
    import numpy as np
    import torch
    from torchvision import transforms
    
    # Set model to evaluation mode
    model.eval()
    
    # Get class names
    class_names = data_module.test_dataset.classes if hasattr(data_module.test_dataset, 'classes') else [f"Class {i}" for i in range(10)]
    
    # Get test dataloader
    test_loader = data_module.test_dataloader()
    
    # Create dictionary to store examples for each class
    class_examples = {i: [] for i in range(10)}
    
    # Get examples for each class
    with torch.no_grad():
        for images, labels in test_loader:
            # Get predictions
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            # Store examples
            for i, (image, label, pred) in enumerate(zip(images, labels, preds)):
                label_idx = label.item()
                if len(class_examples[label_idx]) < 3:
                    # Convert tensor to numpy for visualization
                    img = image.cpu().numpy().transpose(1, 2, 0)
                    
                    # Denormalize if necessary
                    if hasattr(data_module, 'normalize_transform'):
                        mean = data_module.normalize_transform.mean
                        std = data_module.normalize_transform.std
                        img = img * np.array(std) + np.array(mean)
                        
                    img = np.clip(img, 0, 1)
                    
                    class_examples[label_idx].append({
                        'image': img,
                        'true': class_names[label_idx],
                        'pred': class_names[pred.item()]
                    })
            
            # Check if we have enough examples
            if all(len(examples) >= 3 for examples in class_examples.values()):
                break
    
    # Create 10x3 grid for visualization
    fig, axes = plt.subplots(10, 3, figsize=(15, 30))
    
    for class_idx in range(10):
        examples = class_examples[class_idx]
        for i in range(min(3, len(examples))):
            ax = axes[class_idx, i]
            example = examples[i]
            
            # Display image
            ax.imshow(example['image'])
            
            # Set title with true and predicted labels
            title = f"True: {example['true']}\nPred: {example['pred']}"
            color = 'green' if example['true'] == example['pred'] else 'red'
            ax.set_title(title, color=color)
            
            # Remove axis ticks
            ax.set_xticks([])
            ax.set_yticks([])
    
    plt.tight_layout()
    
    # Log figure to wandb
    wandb.log({"test_predictions": wandb.Image(fig)})
    
    # Close the figure
    plt.close(fig)

In [17]:
#Function is similar to train_with_wandb() with added evaluation and visualization code
def train_and_evaluate(config=None):
    """Train and Evaluate best model config"""
    with wandb.init(config=config, project="DA6401_Assignment2"):
        # Get hyperparameters from wandb
        config = wandb.config
        
        # Create model with the hyperparameters
        model = FlexibleCNN(
            input_channels=3,
            num_classes=10,
            filters=[config.filters_initial],
            filter_size=config.filter_size,
            conv_activation=config.conv_activation,
            dense_activation=config.dense_activation,
            dense_neurons=config.dense_neurons,
            filter_organization=config.filter_organization,
            batch_norm=config.batch_norm,
            dropout_rate=config.dropout_rate,
            dropout_position=config.dropout_position,
            learning_rate=config.learning_rate,
            optimizer=config.optimizer
        )
        
        # Create data module
        data_module = INaturalistDataModule(
            batch_size=config.batch_size,
            data_augmentation=config.data_augmentation
        )
        
        # Create WandB logger
        wandb_logger = WandbLogger(project="inaturalist-cnn")
        
        # Create callbacks
        early_stop_callback = EarlyStopping(
            monitor='val_loss',
            patience=10,
            mode='min'
        )
        
        checkpoint_callback = ModelCheckpoint(
            monitor='val_acc',
            dirpath='./checkpoints/',
            filename='inaturalist-cnn-{epoch:02d}-{val_acc:.2f}',
            save_top_k=1,
            mode='max'
        )
        
        # Create trainer
        trainer = pl.Trainer(
            max_epochs=30,
            logger=wandb_logger,
            callbacks=[early_stop_callback, checkpoint_callback],
            log_every_n_steps=10
        )
        
        # Train the model
        trainer.fit(model, data_module)
        
        # Test the model
        test_result = trainer.test(model, data_module)
        
        # Log final metrics
        wandb.log({
            "test_accuracy": test_result[0]["test_acc"],
            "test_loss": test_result[0]["test_loss"]
        })
        
        # Visualize test results in a 10x3 grid
        visualize_test_results(model, data_module)

In [9]:
api = wandb.Api()
sweep = api.sweep("da24m027-indian-institute-of-technology-madras/DA6401_Assignment2/gk7oqzq1")
best_run = sweep.best_run()
best_run_config = best_run.config

[34m[1mwandb[0m: Sorting runs by -summary_metrics.val_acc


True


In [10]:
best_run_config

{'filters': [64],
 'optimizer': 'rmsprop',
 'batch_norm': False,
 'batch_size': 64,
 'filter_size': 3,
 'num_classes': 10,
 'dropout_rate': 0.2,
 'dense_neurons': 128,
 'learning_rate': 0.001,
 'input_channels': 3,
 'conv_activation': 'mish',
 'filters_initial': 64,
 'dense_activation': 'relu',
 'dropout_position': 'after_dense',
 'data_augmentation': False,
 'filter_organization': 'same'}

In [18]:
train_and_evaluate(config=best_run_config)

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇███
test_acc,▁
test_accuracy,▁
test_loss,▁▁
train_acc,▁▁▂▃▂▂▂▂▂▂▂▄▃▂▄▃▃▃▄▄▅▆▄▅▅▆▆▆▆▆▇█▇▇██▇▇█▇
train_loss,████▇▇██▇▇▇▇▇▆▅▆▅▄▅▅▅▅▄▃▃▄▄▃▃▃▂▃▃▂▁▁▂▃▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
val_acc,▁▃▃▅▇▅▇▇▇█▇█▇▇█▇▆
val_loss,▃▃▂▂▁▂▁▁▁▁▂▂▃▄▅▆█

0,1
epoch,17.0
test_acc,0.319
test_accuracy,0.319
test_loss,2.86039
train_acc,0.78125
train_loss,0.7383
trainer/global_step,2125.0
val_acc,0.329
val_loss,2.83573
