In [1]:
import os
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader


In [2]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
class MultiMNISTDataset(Dataset):
    def __init__(self, data_dir, transform=None, is_multi=False):
        self.images, self.labels = self.load_mnist_data(data_dir, transform, is_multi)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        return self.images[index], self.labels[index]
    


    
    @staticmethod
    def get_multi_labels(label_str, max_length):
        output_vector = np.zeros(max_length * 10)
        if label_str == '0':
            return output_vector
        for i, digit in enumerate(label_str):
            output_vector[i * 10 + int(digit)] = 1
        return output_vector
    
    @staticmethod
    def load_mnist_data(data_dir, transform=None, is_multi=False):
        max_length = max(len(label_str) for label_str in os.listdir(data_dir))
        images, labels = [], []
        for label_str in os.listdir(data_dir):
            folder_path = os.path.join(data_dir, label_str)
            label = (MultiMNISTDataset.get_multi_labels(label_str, max_length)
                     if is_multi else (len(label_str) if label_str != '0' else 0))
            for image_file in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_file)
                image = Image.open(image_path).convert('L')
                if transform:
                    image = transform(image)
                images.append(image)
                labels.append(label)
        return torch.stack(images), torch.tensor(labels)

    @staticmethod
    def get_dataloader(data_dir, batch_size=32, shuffle=True, transform=None, is_multi=False):
        dataset = MultiMNISTDataset(data_dir, transform, is_multi)
        return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    @staticmethod
    def get_dimensions(loader):
        input_channels, input_size = None, None
        for image, label in loader:
            if input_size is None:
                input_channels = image.shape[1]
                input_size=(image.shape[2], image.shape[3])
        return input_channels, input_size
    
    @staticmethod
    def get_max_classes(data_dir):
        return max(len(label_str) for label_str in os.listdir(data_dir))

        
train_loader = MultiMNISTDataset.get_dataloader('./double_mnist/train', batch_size=32, shuffle=True, transform=transform, is_multi=True)
val_loader = MultiMNISTDataset.get_dataloader('./double_mnist/val', batch_size=32, shuffle=True,transform=transform, is_multi=True)
test_loader = MultiMNISTDataset.get_dataloader('./double_mnist/test', batch_size=32, shuffle=True,transform=transform, is_multi=True)

max_classes = MultiMNISTDataset.get_max_classes('./double_mnist/train')

print("Number of Batches Train loader:", len(train_loader))
print("Number of Batches Validation loader:", len(val_loader))
print("Number of Batches Test loader:", len(test_loader))

  return torch.stack(images), torch.tensor(labels)


Number of Batches Train loader: 394
Number of Batches Validation loader: 94
Number of Batches Test loader: 144


In [4]:
print(max_classes)

3


In [None]:
class MultiLabelCNN(nn.Module):
    def __init__(self, layer_config=[], input_channels=1, input_size=(128,128), max_classes=3):
        super().__init__()

        self.layers = nn.ModuleList()
        self.activations = []  

        layer_in = input_channels
        current_size = input_size

        for config in layer_config:
            if config['type'] == 'conv2d':
                layer_out = config['out_channels']
                self.layers.append(nn.Conv2d(layer_in, layer_out, kernel_size=config['kernel_size'], stride=config['stride'], padding=config['padding']))
                self.activations.append(self.get_activation(config['activation']))

                current_size = self.calculate_conv_output_size(current_size, config['kernel_size'], config['stride'], config['padding'])
                layer_in = layer_out  

            elif config['type'] == 'pool':
                if config['pool_type'] == 'max':
                    self.layers.append(nn.MaxPool2d(kernel_size=config['kernel_size'], stride=2))
                elif config['pool_type'] == 'avg':
                    self.layers.append(nn.AvgPool2d(kernel_size=config['kernel_size'], stride=2))
                else:
                    raise ValueError('Invalid pool type')

                current_size = self.calculate_pool_output_size(current_size, config['kernel_size'], stride=2)

        flattened_size = layer_in * current_size[0] * current_size[1]

        self.fc1 = nn.Linear(flattened_size, 256)  
        self.fc2 = nn.Linear(256, max_classes*10)
        self.softmax = nn.Softmax(dim=-1)

    def get_activation(self, activation):
        if activation == 'sigmoid':
            return nn.Sigmoid()
        if activation == 'relu':
            return nn.ReLU()
        if activation == 'tanh':
            return nn.Tanh()
        else:
            raise ValueError('Invalid activation function')

    def calculate_conv_output_size(self, input_size, kernel_size, stride, padding):
        h_in, w_in = input_size
        w_out = ((w_in - kernel_size + 2 * padding) // stride) + 1
        h_out = ((h_in - kernel_size + 2 * padding) // stride) + 1
        return (h_out, w_out)

    def calculate_pool_output_size(self, input_size, kernel_size, stride):
        h_in, w_in = input_size
        w_out = ((w_in - kernel_size) // stride) + 1
        h_out = ((h_in - kernel_size) // stride) + 1
        return (h_out, w_out)

    def forward(self, x):
      
        for layer, activation in zip(self.layers, self.activations + [None] * (len(self.layers) - len(self.activations))):
          x = layer(x)
          if activation:
            x = activation(x)


        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return x
    
   
    
    @staticmethod     
    def calculate_accuracy(outputs, labels, max_digits):
        batch_size = outputs.size(0)
        outputs = outputs.view(batch_size, max_digits, 10)
        labels = labels.view(batch_size, max_digits, 10)
        
        predicted_digits = torch.argmax(outputs, dim=2)
        true_digits = torch.argmax(labels, dim=2)
        
       

        correct_predictions = (predicted_digits == true_digits).sum().item()
        total_positions = batch_size * max_digits

        accuracy = 100 * correct_predictions / total_positions
        return accuracy

    def train_model(self, train_loader, val_loader, optimizer, loss_function, epochs=5):
        for epoch in range(epochs):
            self.train()
            total_loss = 0.0
            print(f"Epoch {epoch + 1}/{epochs}:")

            # Training Loop
            for batch_idx, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = loss_function(outputs, labels)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            # Validation Phase
            self.eval()
            total_accuracy = 0.0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    outputs = self(inputs)
                    accuracy = MultiLabelCNN.calculate_accuracy(outputs, labels, self.fc2.out_features // 10)
                    total_accuracy += accuracy

            avg_loss = total_loss / len(train_loader)
            avg_accuracy = total_accuracy / len(val_loader)
            print(f"Training Loss: {avg_loss:.4f}, Validation Accuracy: {avg_accuracy:.2f}%")

    def test_model(self, test_loader):
        self.eval()
        total_accuracy = 0.0
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                outputs = self(inputs)
                accuracy = MultiLabelCNN.calculate_accuracy(outputs, labels, self.fc2.out_features // 10)
                total_accuracy += accuracy


        avg_accuracy = total_accuracy / len(test_loader)
        print(f"Test Accuracy: {avg_accuracy:.2f}%")


 

    def save(self, to_path='./best.pth'):
        torch.save(self.state_dict(), to_path)
        print(f"Model Saved Successfully to {to_path}")
        
    def load(self, from_path='./best.pth'):
        self.load_state_dict(torch.load(from_path))
        print(f"Model Loaded Successfully from {from_path}")    

In [6]:
input_channels, input_size = MultiMNISTDataset.get_dimensions(train_loader)

layer_config = [
    {'type': 'conv2d', 'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
    {'type': 'conv2d', 'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
    {'type': 'conv2d', 'out_channels': 128, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
]



model = MultiLabelCNN(layer_config=layer_config, input_channels=input_channels, input_size=input_size, max_classes=max_classes)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

loss_function = nn.CrossEntropyLoss()


model.train_model(train_loader, val_loader, optimizer, loss_function, epochs=5)

Epoch 1/5:
Training Loss: 6.4997, Validation Accuracy: 6.32%
Epoch 2/5:
Training Loss: 6.4722, Validation Accuracy: 3.32%
Epoch 3/5:
Training Loss: 6.4645, Validation Accuracy: 2.58%
Epoch 4/5:
Training Loss: 6.4363, Validation Accuracy: 6.01%
Epoch 5/5:
Training Loss: 6.2451, Validation Accuracy: 7.61%


In [7]:
model.save('first_reg.pth')
net = MultiLabelCNN(layer_config=layer_config, input_channels=input_channels, input_size=input_size, max_classes=max_classes)
# net.load('first_reg.pth')
model.test_model(train_loader)

Model Saved Successfully to first_reg.pth
Test Accuracy: 10.89%


# Full code

In [None]:
import os
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])



class MultiMNISTDataset(Dataset):
    def __init__(self, data_dir, transform=None, is_multi=False):
        self.images, self.labels = self.load_mnist_data(data_dir, transform, is_multi)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        return self.images[index], self.labels[index]
    


    
    @staticmethod
    def get_multi_labels(label_str, max_length):
        output_vector = np.zeros(max_length * 10)
        if label_str == '0':
            return output_vector
        for i, digit in enumerate(label_str):
            output_vector[i * 10 + int(digit)] = 1
        return output_vector
    
    @staticmethod
    def load_mnist_data(data_dir, transform=None, is_multi=False):
        max_length = max(len(label_str) for label_str in os.listdir(data_dir))
        images, labels = [], []
        for label_str in os.listdir(data_dir):
            folder_path = os.path.join(data_dir, label_str)
            label = (MultiMNISTDataset.get_multi_labels(label_str, max_length)
                     if is_multi else (len(label_str) if label_str != '0' else 0))
            for image_file in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_file)
                image = Image.open(image_path).convert('L')
                if transform:
                    image = transform(image)
                images.append(image)
                labels.append(label)
        return torch.stack(images), torch.tensor(labels)

    @staticmethod
    def get_dataloader(data_dir, batch_size=32, shuffle=True, transform=None, is_multi=False):
        dataset = MultiMNISTDataset(data_dir, transform, is_multi)
        return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

    @staticmethod
    def get_dimensions(loader):
        input_channels, input_size = None, None
        for image, label in loader:
            if input_size is None:
                input_channels = image.shape[1]
                input_size=(image.shape[2], image.shape[3])
        return input_channels, input_size
    
    @staticmethod
    def get_max_classes(data_dir):
        return max(len(label_str) for label_str in os.listdir(data_dir))

        
train_loader = MultiMNISTDataset.get_dataloader('./double_mnist/train', batch_size=32, shuffle=True, transform=transform, is_multi=True)
val_loader = MultiMNISTDataset.get_dataloader('./double_mnist/val', batch_size=32, shuffle=True,transform=transform, is_multi=True)
test_loader = MultiMNISTDataset.get_dataloader('./double_mnist/test', batch_size=32, shuffle=True,transform=transform, is_multi=True)

max_classes = MultiMNISTDataset.get_max_classes('./double_mnist/train')

print("Number of Batches Train loader:", len(train_loader))
print("Number of Batches Validation loader:", len(val_loader))
print("Number of Batches Test loader:", len(test_loader))



print(max_classes)



class MultiLabelCNN(nn.Module):
    def __init__(self, layer_config=[], input_channels=1, input_size=(128,128), max_classes=3):
        super().__init__()

        self.layers = nn.ModuleList()
        self.activations = []  

        layer_in = input_channels
        current_size = input_size

        for config in layer_config:
            if config['type'] == 'conv2d':
                layer_out = config['out_channels']
                self.layers.append(nn.Conv2d(layer_in, layer_out, kernel_size=config['kernel_size'], stride=config['stride'], padding=config['padding']))
                self.activations.append(self.get_activation(config['activation']))

                current_size = self.calculate_conv_output_size(current_size, config['kernel_size'], config['stride'], config['padding'])
                layer_in = layer_out  

            elif config['type'] == 'pool':
                if config['pool_type'] == 'max':
                    self.layers.append(nn.MaxPool2d(kernel_size=config['kernel_size'], stride=2))
                elif config['pool_type'] == 'avg':
                    self.layers.append(nn.AvgPool2d(kernel_size=config['kernel_size'], stride=2))
                else:
                    raise ValueError('Invalid pool type')

                current_size = self.calculate_pool_output_size(current_size, config['kernel_size'], stride=2)

        flattened_size = layer_in * current_size[0] * current_size[1]

        self.fc1 = nn.Linear(flattened_size, 256)  
        self.fc2 = nn.Linear(256, max_classes*10)
        self.softmax = nn.Softmax(dim=-1)

    def get_activation(self, activation):
        if activation == 'sigmoid':
            return nn.Sigmoid()
        if activation == 'relu':
            return nn.ReLU()
        if activation == 'tanh':
            return nn.Tanh()
        else:
            raise ValueError('Invalid activation function')

    def calculate_conv_output_size(self, input_size, kernel_size, stride, padding):
        h_in, w_in = input_size
        w_out = ((w_in - kernel_size + 2 * padding) // stride) + 1
        h_out = ((h_in - kernel_size + 2 * padding) // stride) + 1
        return (h_out, w_out)

    def calculate_pool_output_size(self, input_size, kernel_size, stride):
        h_in, w_in = input_size
        w_out = ((w_in - kernel_size) // stride) + 1
        h_out = ((h_in - kernel_size) // stride) + 1
        return (h_out, w_out)

    def forward(self, x):
        # for i, layer in enumerate(self.layers):
        #     x = layer(x)
        #     if i < len(self.activations):
        #         x = self.activations[i](x)

        for layer, activation in zip(self.layers, self.activations + [None] * (len(self.layers) - len(self.activations))):
          x = layer(x)
          if activation:
            x = activation(x)


        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        return x
    
   
    
    @staticmethod     
    def calculate_accuracy(outputs, labels, max_digits):
        batch_size = outputs.size(0)
        outputs = outputs.view(batch_size, max_digits, 10)
        labels = labels.view(batch_size, max_digits, 10)
        
        predicted_digits = torch.argmax(outputs, dim=2)
        true_digits = torch.argmax(labels, dim=2)
        
       

        correct_predictions = (predicted_digits == true_digits).sum().item()
        total_positions = batch_size * max_digits

        accuracy = 100 * correct_predictions / total_positions
        return accuracy

    def train_model(self, train_loader, val_loader, optimizer, loss_function, epochs=5):
        for epoch in range(epochs):
            self.train()
            total_loss = 0.0
            print(f"Epoch {epoch + 1}/{epochs}:")

            # Training Loop
            for batch_idx, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()
                outputs = self(inputs)
                loss = loss_function(outputs, labels)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            # Validation Phase
            self.eval()
            total_accuracy = 0.0
            with torch.no_grad():
                for inputs, labels in val_loader:
                    outputs = self(inputs)
                    accuracy = MultiLabelCNN.calculate_accuracy(outputs, labels, self.fc2.out_features // 10)
                    total_accuracy += accuracy

            avg_loss = total_loss / len(train_loader)
            avg_accuracy = total_accuracy / len(val_loader)
            print(f"Training Loss: {avg_loss:.4f}, Validation Accuracy: {avg_accuracy:.2f}%")

    def test_model(self, test_loader):
        self.eval()
        total_accuracy = 0.0
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                outputs = self(inputs)
                accuracy = MultiLabelCNN.calculate_accuracy(outputs, labels, self.fc2.out_features // 10)
                total_accuracy += accuracy


        avg_accuracy = total_accuracy / len(test_loader)
        print(f"Test Accuracy: {avg_accuracy:.2f}%")




    def save(self, to_path='./best.pth'):
        torch.save(self.state_dict(), to_path)
        print(f"Model Saved Successfully to {to_path}")
        
    def load(self, from_path='./best.pth'):
        self.load_state_dict(torch.load(from_path))
        print(f"Model Loaded Successfully from {from_path}")    



input_channels, input_size = MultiMNISTDataset.get_dimensions(train_loader)

layer_config = [
    {'type': 'conv2d', 'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
    {'type': 'conv2d', 'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
    {'type': 'conv2d', 'out_channels': 128, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
    {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
]



model = MultiLabelCNN(layer_config=layer_config, input_channels=input_channels, input_size=input_size, max_classes=max_classes)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

loss_function = nn.CrossEntropyLoss()


model.train_model(train_loader, val_loader, optimizer, loss_function, epochs=5)
        

model.save('first_reg.pth')
net = MultiLabelCNN(layer_config=layer_config, input_channels=input_channels, input_size=input_size, max_classes=max_classes)
# net.load('first_reg.pth')
model.test_model(train_loader)

In [None]:
# import wandb
# from torch.optim import SGD, Adam

# # Initialize wandb
# wandb.init(project="multilabel_cnn_hyperparameter_tuning")

# # Define hyperparameter search space
# hyperparameter_space = {
#     'learning_rate': [0.001, 0.01, 0.1],
#     'epochs': [5, 10, 15],
#     'dropout_rate': [0.2, 0.3, 0.5],
#     'conv_layers': [
#         [
#             {'type': 'conv2d', 'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
#             {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
#             {'type': 'conv2d', 'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
#             {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
#         ],
#         [
#             {'type': 'conv2d', 'out_channels': 32, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
#             {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
#             {'type': 'conv2d', 'out_channels': 64, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
#             {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
#             {'type': 'conv2d', 'out_channels': 128, 'kernel_size': 3, 'stride': 1, 'padding': 1, 'activation': 'relu'},
#             {'type': 'pool', 'kernel_size': 2, 'pool_type': 'max'},
#         ],
#     ],
#     'optimizer': ['SGD', 'Adam']
# }

# # Define Hamming accuracy calculation function
# def calculate_hamming_accuracy(predictions, labels):
#     """Calculates Hamming accuracy for multilabel predictions."""
#     predictions = (predictions > 0.5).float()  # Threshold at 0.5 for multilabel
#     return (predictions == labels).float().mean().item()

# # Define model training and evaluation with logging to WandB
# def train_and_evaluate(hyperparams):
#     # Initialize the model with the current hyperparameters
#     model = MultiLabelCNN(
#         layer_config=hyperparams['conv_layers'],
#         input_channels=input_channels,
#         input_size=input_size,
#         max_classes=max_classes
#     )
    
#     # Optimizer choice
#     optimizer = SGD(model.parameters(), lr=hyperparams['learning_rate'], momentum=0.9) if hyperparams['optimizer'] == 'SGD' else Adam(model.parameters(), lr=hyperparams['learning_rate'])
    
#     # Loss function
#     loss_function = nn.CrossEntropyLoss()
    
#     # Training and validation
#     for epoch in range(hyperparams['epochs']):
#         model.train()
#         train_loss = 0.0
#         for inputs, labels in train_loader:
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = loss_function(outputs, labels)
#             loss.backward()
#             optimizer.step()
#             train_loss += loss.item()

#         # Validation phase
#         model.eval()
#         val_loss, total_accuracy, total_hamming_accuracy = 0.0, 0.0, 0.0
#         with torch.no_grad():
#             for inputs, labels in val_loader:
#                 outputs = model(inputs)
#                 loss = loss_function(outputs, labels)
#                 val_loss += loss.item()
#                 accuracy = MultiLabelCNN.calculate_accuracy(outputs, labels, model.fc2.out_features // 10)
#                 hamming_accuracy = calculate_hamming_accuracy(outputs, labels)
#                 total_accuracy += accuracy
#                 total_hamming_accuracy += hamming_accuracy

#         avg_train_loss = train_loss / len(train_loader)
#         avg_val_loss = val_loss / len(val_loader)
#         avg_val_accuracy = total_accuracy / len(val_loader)
#         avg_val_hamming_accuracy = total_hamming_accuracy / len(val_loader)

#         # Log metrics to WandB
#         wandb.log({
#             'epoch': epoch + 1,
#             'train_loss': avg_train_loss,
#             'val_loss': avg_val_loss,
#             'val_accuracy': avg_val_accuracy,
#             'val_hamming_accuracy': avg_val_hamming_accuracy,
#         })
    
#     # Return final validation accuracy for this configuration
#     return avg_val_accuracy

# # Run experiments
# best_accuracy = 0
# best_params = None

# for lr in hyperparameter_space['learning_rate']:
#     for epoch in hyperparameter_space['epochs']:
#         for dropout in hyperparameter_space['dropout_rate']:
#             for layers in hyperparameter_space['conv_layers']:
#                 for opt in hyperparameter_space['optimizer']:
#                     # Define current set of hyperparameters
#                     current_params = {
#                         'learning_rate': lr,
#                         'epochs': epoch,
#                         'dropout_rate': dropout,
#                         'conv_layers': layers,
#                         'optimizer': opt
#                     }
                    
#                     # Log this specific run in WandB
#                     with wandb.init(config=current_params, reinit=True):
#                         accuracy = train_and_evaluate(current_params)
                        
#                         if accuracy > best_accuracy:
#                             best_accuracy = accuracy
#                             best_params = current_params

# # Display best hyperparameters and results
# print("Best Hyperparameters:", best_params)
# print("Best Validation Accuracy:", best_accuracy)

# # Load best model, evaluate on test data, and log exact match and hamming accuracy
# model.load_state_dict(torch.load('best_model.pth'))
# wandb.finish()


0,1
epoch,▁▃▅▆█
train_loss,█▇▅▂▁
val_accuracy,▂▁▁▅█
val_hamming_accuracy,█▇▂▁▁
val_loss,██▃▂▁

0,1
epoch,5.0
train_loss,5.959
val_accuracy,10.57181
val_hamming_accuracy,0.55967
val_loss,6.43646


0,1
epoch,▁▃▅▆█
train_loss,█▅▃▂▁
val_accuracy,▁▆▇██
val_hamming_accuracy,▁▄▆▇█
val_loss,█▃▂▁▃

0,1
epoch,5.0
train_loss,2.58847
val_accuracy,34.09427
val_hamming_accuracy,0.79827
val_loss,5.46635


0,1
epoch,▁▃▅▆█
train_loss,██▇▄▁
val_accuracy,▁▂▄██
val_hamming_accuracy,██▇▂▁
val_loss,██▇▁▂

0,1
epoch,5.0
train_loss,5.99593
val_accuracy,9.05733
val_hamming_accuracy,0.5796
val_loss,6.48785


0,1
epoch,▁▃▅▆█
train_loss,█▄▂▁▁
val_accuracy,▁▆▇██
val_hamming_accuracy,▁▆▇██
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_loss,1.99306
val_accuracy,56.87057
val_hamming_accuracy,0.88582
val_loss,3.50497


0,1
epoch,▁▃▅▆█
train_loss,█▇▄▂▁
val_accuracy,▁▄▅▆█
val_hamming_accuracy,█▇▁▂▁
val_loss,█▆▃▁▂

0,1
epoch,5.0
train_loss,5.93982
val_accuracy,11.0557
val_hamming_accuracy,0.55132
val_loss,6.42917


0,1
epoch,▁▃▅▆█
train_loss,█▅▃▂▁
val_accuracy,▁▅▆▇█
val_hamming_accuracy,▁▆▇██
val_loss,█▁▁▁▄

0,1
epoch,5.0
train_loss,2.58911
val_accuracy,35.94489
val_hamming_accuracy,0.80552
val_loss,5.74415


0,1
epoch,▁▃▅▆█
train_loss,█▇▇▄▁
val_accuracy,▃▁▄▅█
val_hamming_accuracy,▇▇█▂▁
val_loss,▇█▆▃▁

0,1
epoch,5.0
train_loss,5.97988
val_accuracy,11.1185
val_hamming_accuracy,0.55863
val_loss,6.34463


0,1
epoch,▁▃▅▆█
train_loss,█▄▂▁▁
val_accuracy,▁▆▇▇█
val_hamming_accuracy,▁▆▇▇█
val_loss,█▃▁▁▁

0,1
epoch,5.0
train_loss,2.04557
val_accuracy,55.57772
val_hamming_accuracy,0.87108
val_loss,3.62182


0,1
epoch,▁▃▅▆█
train_loss,█▇▅▃▁
val_accuracy,▄▁▃██
val_hamming_accuracy,█▅▂▁▁
val_loss,█▆▃▁▂

0,1
epoch,5.0
train_loss,5.95124
val_accuracy,10.17287
val_hamming_accuracy,0.55277
val_loss,6.44346


0,1
epoch,▁▃▅▆█
train_loss,█▆▄▂▁
val_accuracy,▁▄▆██
val_hamming_accuracy,▁▂▅██
val_loss,█▅▁▁▁

0,1
epoch,5.0
train_loss,2.81898
val_accuracy,32.66475
val_hamming_accuracy,0.77327
val_loss,5.54202


0,1
epoch,▁▃▅▆█
train_loss,█▇▆▆▁
val_accuracy,█▆▇▁█
val_hamming_accuracy,▆█▂▆▁
val_loss,█▇█▇▁

0,1
epoch,5.0
train_loss,6.36336
val_accuracy,6.59722
val_hamming_accuracy,0.68467
val_loss,6.58253


Traceback (most recent call last):
  File "C:\Users\devan\AppData\Local\Temp\ipykernel_25320\3268955498.py", line 115, in <module>
    accuracy = train_and_evaluate(current_params)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\devan\AppData\Local\Temp\ipykernel_25320\3268955498.py", line 61, in train_and_evaluate
    loss.backward()
  File "c:\Users\devan\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\_tensor.py", line 581, in backward
    torch.autograd.backward(
  File "c:\Users\devan\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\autograd\__init__.py", line 347, in backward
    _engine_run_backward(
  File "c:\Users\devan\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\autograd\graph.py", line 825, in _engine_run_backward
    return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

0,1
epoch,▁█
train_loss,█▁
val_accuracy,▁█
val_hamming_accuracy,▁█
val_loss,█▁

0,1
epoch,2.0
train_loss,3.50239
val_accuracy,50.08126
val_hamming_accuracy,0.85467
val_loss,3.88089


KeyboardInterrupt: 

wandb plots link
https://api.wandb.ai/links/devansh-kantesaria-iiit-hyderabad/fwi1rjq8

Best model

LR:0.001
dropout_rate:0.2
optimizer: Adam
epoch:5
train loss:1.993
val_acc:56.870
val_hamming_acc:56.870
val_loss:3.5
