# Hyperparameter Tuning

These are the hyperparameters we would be tuning for getting the best results from the model:

1) Learning Rate \\
2) Batch Size' \\
3) L1 lambda \\
4) optimizer \\
6) Weight Decay \\
7) Epochs

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
import os
from PIL import Image
from tqdm import tqdm
from torchvision.utils import save_image
# local module imports
import config
from utils import save_checkpoint, load_checkpoint, save_some_examples
from UvU_Net_Generator import OuterUNet as generator
from Pix_Discriminator import Discriminator

torch.backends.cudnn.benchmark = True

  check_for_updates()


torch.Size([1, 3, 512, 512])


In [2]:
def train_fn(
    disc, gen, loader, opt_disc, opt_gen, l1_loss, bce, g_scaler, d_scaler,
):
    loop = tqdm(loader, leave=True)

    for idx, (x, y) in enumerate(loop):
        x = x.to(config.DEVICE)
        y = y.to(config.DEVICE)

        # Train Discriminator
        with torch.cuda.amp.autocast():
            y_fake = gen(x)
            D_real = disc(x, y)
            D_real_loss = bce(D_real, torch.ones_like(D_real))
            D_fake = disc(x, y_fake.detach())
            D_fake_loss = bce(D_fake, torch.zeros_like(D_fake))
            D_loss = (D_real_loss + D_fake_loss) / 2

        disc.zero_grad()
        d_scaler.scale(D_loss).backward()
        d_scaler.step(opt_disc)
        d_scaler.update()

        # Train generator
        with torch.cuda.amp.autocast():
            D_fake = disc(x, y_fake)
            G_fake_loss = bce(D_fake, torch.ones_like(D_fake))
            L1 = l1_loss(y_fake, y) * config.L1_LAMBDA
            G_loss = G_fake_loss + L1

        opt_gen.zero_grad()
        g_scaler.scale(G_loss).backward()
        g_scaler.step(opt_gen)
        g_scaler.update()

        if idx % 10 == 0:
            loop.set_postfix(
                D_real=torch.sigmoid(D_real).mean().item(),
                D_fake=torch.sigmoid(D_fake).mean().item(),
            )

In [3]:
!unzip -q Sample_dataset.zip

In [4]:
class PairedImageDataset(Dataset):
    def __init__(self, input_dir, target_dir, transform=None):
        self.input_dir = input_dir
        self.target_dir = target_dir
        self.input_images = sorted(os.listdir(input_dir))
        self.target_images = sorted(os.listdir(target_dir))
        self.transform = transform

        assert len(self.input_images) == len(self.target_images), "Mismatch between input and target images!"

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_image_path = os.path.join(self.input_dir, self.input_images[idx])
        target_image_path = os.path.join(self.target_dir, self.target_images[idx])

        input_image = Image.open(input_image_path).convert("RGB")
        target_image = Image.open(target_image_path).convert("RGB")

        if self.transform:
            input_image = self.transform(input_image)
            target_image = self.transform(target_image)

        return input_image, target_image


input_dir = '/content/Sample_dataset/sobel_images'
target_dir = '/content/Sample_dataset/input_images'

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

dataset = PairedImageDataset(input_dir=input_dir, target_dir=target_dir, transform=transform)

# Split dataset into training and validation (80-20 split)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoader for training and validation
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Example to iterate over the data loader
for batch_idx, (input_images, target_images) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1}")
    print(f"Input batch size: {input_images.shape}")
    print(f"Target batch size: {target_images.shape}")


Batch 1
Input batch size: torch.Size([16, 3, 256, 256])
Target batch size: torch.Size([16, 3, 256, 256])
Batch 2
Input batch size: torch.Size([4, 3, 256, 256])
Target batch size: torch.Size([4, 3, 256, 256])


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
save_dir = '/content/drive/MyDrive/model_checkpoints/'  # Change this to your desired directory
os.makedirs(save_dir, exist_ok=True)
directory = '/content/evaluation/'
if not os.path.exists(directory):
    os.makedirs(directory)  # Create the directory if it doesn't exist

def main():
    disc = Discriminator(in_channels=3).to(config.DEVICE)
    gen = generator(in_channels=3, features=64).to(config.DEVICE)
    opt_disc = optim.Adam(disc.parameters(), lr=config.LEARNING_RATE, betas=(0.5, 0.999))
    opt_gen = optim.Adam(gen.parameters(), lr=config.LEARNING_RATE, betas=(0.5, 0.999))
    BCE = nn.BCEWithLogitsLoss()
    L1_LOSS = nn.L1Loss()

    if config.LOAD_MODEL:
        load_checkpoint(
            config.CHECKPOINT_GEN, gen, opt_gen, config.LEARNING_RATE,
        )
        load_checkpoint(
            config.CHECKPOINT_DISC, disc, opt_disc, config.LEARNING_RATE,
        )

    g_scaler = torch.cuda.amp.GradScaler()
    d_scaler = torch.cuda.amp.GradScaler()

    for epoch in range(config.NUM_EPOCHS):
        train_fn(
            disc, gen, train_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler,
        )

        # Save models every 50 epochs to Google Drive
        if epoch % 50 == 49:
            print(f"Saving model at epoch {epoch}")
            torch.save({
                'epoch': epoch,
                'generator_state_dict': gen.state_dict(),
                'optimizer_gen_state_dict': opt_gen.state_dict(),
            }, f"{save_dir}generator_epoch_{epoch}.pth")

            torch.save({
                'epoch': epoch,
                'discriminator_state_dict': disc.state_dict(),
                'optimizer_disc_state_dict': opt_disc.state_dict(),
            }, f"{save_dir}discriminator_epoch_{epoch}.pth")

            save_some_examples(gen, val_loader, epoch, folder="evaluation")


if __name__ == "__main__":
    main()


  g_scaler = torch.cuda.amp.GradScaler()
  d_scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
100%|██████████| 2/2 [00:08<00:00,  4.03s/it, D_fake=0.524, D_real=0.412]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.507, D_real=0.574]
100%|██████████| 2/2 [00:00<00:00,  6.08it/s, D_fake=0.47, D_real=0.535]
100%|██████████| 2/2 [00:00<00:00,  6.17it/s, D_fake=0.462, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  6.08it/s, D_fake=0.468, D_real=0.547]
100%|██████████| 2/2 [00:00<00:00,  6.31it/s, D_fake=0.427, D_real=0.558]
100%|██████████| 2/2 [00:00<00:00,  6.15it/s, D_fake=0.412, D_real=0.569]
100%|██████████| 2/2 [00:00<00:00,  6.21it/s, D_fake=0.422, D_real=0.595]
100%|██████████| 2/2 [00:00<00:00,  6.23it/s, D_fake=0.42, D_real=0.584]
100%|██████████| 2/2 [00:00<00:00,  6.20it/s, D_fake=0.387, D_real=0.619]
100%|██████████| 2/2 [00:00<00:00,  6.15it/s, D_fake=0.377, D_real=0.615]
100%|██████████| 2/2 [00:00<00:00,  6.

Saving model at epoch 49


100%|██████████| 2/2 [00:00<00:00,  3.97it/s, D_fake=0.186, D_real=0.812]
100%|██████████| 2/2 [00:00<00:00,  4.26it/s, D_fake=0.145, D_real=0.783]
100%|██████████| 2/2 [00:00<00:00,  4.76it/s, D_fake=0.179, D_real=0.805]
100%|██████████| 2/2 [00:00<00:00,  4.46it/s, D_fake=0.182, D_real=0.808]
100%|██████████| 2/2 [00:00<00:00,  4.47it/s, D_fake=0.14, D_real=0.82]
100%|██████████| 2/2 [00:00<00:00,  3.62it/s, D_fake=0.154, D_real=0.852]
100%|██████████| 2/2 [00:00<00:00,  5.26it/s, D_fake=0.171, D_real=0.823]
100%|██████████| 2/2 [00:00<00:00,  5.44it/s, D_fake=0.232, D_real=0.848]
100%|██████████| 2/2 [00:00<00:00,  5.63it/s, D_fake=0.13, D_real=0.822]
100%|██████████| 2/2 [00:00<00:00,  5.74it/s, D_fake=0.117, D_real=0.834]
100%|██████████| 2/2 [00:00<00:00,  6.08it/s, D_fake=0.22, D_real=0.797]
100%|██████████| 2/2 [00:00<00:00,  6.15it/s, D_fake=0.126, D_real=0.83]
100%|██████████| 2/2 [00:00<00:00,  6.16it/s, D_fake=0.117, D_real=0.795]
100%|██████████| 2/2 [00:00<00:00,  5.89it/

Saving model at epoch 99


100%|██████████| 2/2 [00:00<00:00,  4.81it/s, D_fake=0.0652, D_real=0.944]
100%|██████████| 2/2 [00:00<00:00,  6.00it/s, D_fake=0.0673, D_real=0.951]
100%|██████████| 2/2 [00:00<00:00,  6.13it/s, D_fake=0.0695, D_real=0.915]
100%|██████████| 2/2 [00:00<00:00,  6.06it/s, D_fake=0.0559, D_real=0.944]
100%|██████████| 2/2 [00:00<00:00,  6.11it/s, D_fake=0.0464, D_real=0.959]
100%|██████████| 2/2 [00:00<00:00,  6.10it/s, D_fake=0.0559, D_real=0.942]
100%|██████████| 2/2 [00:00<00:00,  5.97it/s, D_fake=0.0824, D_real=0.911]
100%|██████████| 2/2 [00:00<00:00,  6.14it/s, D_fake=0.0514, D_real=0.938]
100%|██████████| 2/2 [00:00<00:00,  6.04it/s, D_fake=0.0447, D_real=0.955]
100%|██████████| 2/2 [00:00<00:00,  5.98it/s, D_fake=0.0725, D_real=0.849]
100%|██████████| 2/2 [00:00<00:00,  6.13it/s, D_fake=0.0818, D_real=0.876]
100%|██████████| 2/2 [00:00<00:00,  6.15it/s, D_fake=0.052, D_real=0.946]
100%|██████████| 2/2 [00:00<00:00,  5.92it/s, D_fake=0.0484, D_real=0.965]
100%|██████████| 2/2 [00:0

Saving model at epoch 149


100%|██████████| 2/2 [00:00<00:00,  4.74it/s, D_fake=0.0471, D_real=0.964]
100%|██████████| 2/2 [00:00<00:00,  5.44it/s, D_fake=0.0318, D_real=0.968]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.0309, D_real=0.972]
100%|██████████| 2/2 [00:00<00:00,  5.47it/s, D_fake=0.0404, D_real=0.959]
100%|██████████| 2/2 [00:00<00:00,  5.55it/s, D_fake=0.0403, D_real=0.941]
100%|██████████| 2/2 [00:00<00:00,  5.53it/s, D_fake=0.0599, D_real=0.822]
100%|██████████| 2/2 [00:00<00:00,  5.18it/s, D_fake=0.0719, D_real=0.947]
100%|██████████| 2/2 [00:00<00:00,  5.55it/s, D_fake=0.0632, D_real=0.905]
100%|██████████| 2/2 [00:00<00:00,  5.82it/s, D_fake=0.0576, D_real=0.903]
100%|██████████| 2/2 [00:00<00:00,  6.09it/s, D_fake=0.0568, D_real=0.948]
100%|██████████| 2/2 [00:00<00:00,  6.11it/s, D_fake=0.0574, D_real=0.956]
100%|██████████| 2/2 [00:00<00:00,  5.99it/s, D_fake=0.0565, D_real=0.975]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0562, D_real=0.965]
100%|██████████| 2/2 [00:

Saving model at epoch 199


100%|██████████| 2/2 [00:00<00:00,  5.02it/s, D_fake=0.0177, D_real=0.985]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0283, D_real=0.969]
100%|██████████| 2/2 [00:00<00:00,  6.06it/s, D_fake=0.0352, D_real=0.947]
100%|██████████| 2/2 [00:00<00:00,  6.05it/s, D_fake=0.0241, D_real=0.965]
100%|██████████| 2/2 [00:00<00:00,  6.00it/s, D_fake=0.0243, D_real=0.981]
100%|██████████| 2/2 [00:00<00:00,  6.01it/s, D_fake=0.0265, D_real=0.985]
100%|██████████| 2/2 [00:00<00:00,  6.08it/s, D_fake=0.0315, D_real=0.983]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0191, D_real=0.979]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0174, D_real=0.987]
100%|██████████| 2/2 [00:00<00:00,  6.09it/s, D_fake=0.0196, D_real=0.989]
100%|██████████| 2/2 [00:00<00:00,  6.09it/s, D_fake=0.023, D_real=0.988]
100%|██████████| 2/2 [00:00<00:00,  5.23it/s, D_fake=0.0173, D_real=0.987]
100%|██████████| 2/2 [00:00<00:00,  5.65it/s, D_fake=0.0265, D_real=0.967]
100%|██████████| 2/2 [00:0

Saving model at epoch 249


100%|██████████| 2/2 [00:00<00:00,  5.03it/s, D_fake=0.0196, D_real=0.988]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0198, D_real=0.991]
100%|██████████| 2/2 [00:00<00:00,  5.98it/s, D_fake=0.0165, D_real=0.99]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0146, D_real=0.989]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.0152, D_real=0.99]
100%|██████████| 2/2 [00:00<00:00,  6.01it/s, D_fake=0.016, D_real=0.986]
100%|██████████| 2/2 [00:00<00:00,  5.91it/s, D_fake=0.0217, D_real=0.982]
100%|██████████| 2/2 [00:00<00:00,  5.82it/s, D_fake=0.0166, D_real=0.988]
100%|██████████| 2/2 [00:00<00:00,  6.05it/s, D_fake=0.0155, D_real=0.988]
100%|██████████| 2/2 [00:00<00:00,  6.07it/s, D_fake=0.0139, D_real=0.987]
100%|██████████| 2/2 [00:00<00:00,  5.90it/s, D_fake=0.0107, D_real=0.991]
100%|██████████| 2/2 [00:00<00:00,  6.00it/s, D_fake=0.0142, D_real=0.987]
100%|██████████| 2/2 [00:00<00:00,  5.97it/s, D_fake=0.0143, D_real=0.987]
100%|██████████| 2/2 [00:00<

Saving model at epoch 299


100%|██████████| 2/2 [00:00<00:00,  4.89it/s, D_fake=0.011, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.90it/s, D_fake=0.0099, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.92it/s, D_fake=0.00959, D_real=0.995]
100%|██████████| 2/2 [00:00<00:00,  5.97it/s, D_fake=0.0088, D_real=0.995]
100%|██████████| 2/2 [00:00<00:00,  5.69it/s, D_fake=0.00995, D_real=0.994]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.00845, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.60it/s, D_fake=0.00961, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s, D_fake=0.00821, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.56it/s, D_fake=0.0101, D_real=0.995]
100%|██████████| 2/2 [00:00<00:00,  5.66it/s, D_fake=0.00909, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.46it/s, D_fake=0.00584, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.29it/s, D_fake=0.00993, D_real=0.994]
100%|██████████| 2/2 [00:00<00:00,  5.54it/s, D_fake=0.00864, D_real=0.995]
100%|██████████| 

Saving model at epoch 349


100%|██████████| 2/2 [00:00<00:00,  4.99it/s, D_fake=0.0097, D_real=0.991]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.013, D_real=0.99]
100%|██████████| 2/2 [00:00<00:00,  6.01it/s, D_fake=0.0115, D_real=0.993]
100%|██████████| 2/2 [00:00<00:00,  5.91it/s, D_fake=0.0118, D_real=0.993]
100%|██████████| 2/2 [00:00<00:00,  6.01it/s, D_fake=0.0118, D_real=0.993]
100%|██████████| 2/2 [00:00<00:00,  5.99it/s, D_fake=0.00913, D_real=0.993]
100%|██████████| 2/2 [00:00<00:00,  5.95it/s, D_fake=0.00798, D_real=0.994]
100%|██████████| 2/2 [00:00<00:00,  5.88it/s, D_fake=0.0107, D_real=0.991]
100%|██████████| 2/2 [00:00<00:00,  5.96it/s, D_fake=0.0113, D_real=0.989]
100%|██████████| 2/2 [00:00<00:00,  5.99it/s, D_fake=0.00951, D_real=0.995]
100%|██████████| 2/2 [00:00<00:00,  5.93it/s, D_fake=0.00984, D_real=0.994]
100%|██████████| 2/2 [00:00<00:00,  5.97it/s, D_fake=0.00929, D_real=0.991]
100%|██████████| 2/2 [00:00<00:00,  5.96it/s, D_fake=0.0153, D_real=0.989]
100%|██████████| 2/2 [

Saving model at epoch 399


100%|██████████| 2/2 [00:00<00:00,  4.72it/s, D_fake=0.00876, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.67it/s, D_fake=0.00811, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s, D_fake=0.00663, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.60it/s, D_fake=0.0062, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.00443, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  5.40it/s, D_fake=0.00722, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.52it/s, D_fake=0.00528, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.46it/s, D_fake=0.00513, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  5.34it/s, D_fake=0.00653, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s, D_fake=0.00506, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s, D_fake=0.00528, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.73it/s, D_fake=0.00618, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  5.90it/s, D_fake=0.00484, D_real=0.998]
100%|████████

Saving model at epoch 449


100%|██████████| 2/2 [00:00<00:00,  5.10it/s, D_fake=0.0039, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.78it/s, D_fake=0.00568, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.00498, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.00542, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.60it/s, D_fake=0.00348, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.60it/s, D_fake=0.00557, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.54it/s, D_fake=0.00366, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  5.39it/s, D_fake=0.00404, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  5.22it/s, D_fake=0.00383, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  5.11it/s, D_fake=0.0046, D_real=0.996]
100%|██████████| 2/2 [00:00<00:00,  4.94it/s, D_fake=0.00435, D_real=0.997]
100%|██████████| 2/2 [00:00<00:00,  3.14it/s, D_fake=0.00501, D_real=0.998]
100%|██████████| 2/2 [00:00<00:00,  4.21it/s, D_fake=0.00442, D_real=0.997]
100%|█████████

Saving model at epoch 499


In [7]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.3-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.2/233.2 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [8]:
import numpy as np
import random
import itertools
import torch

def calculate_validation_loss(disc, gen, val_loader, L1_LOSS, BCE, device):
    gen.eval()
    total_val_loss = 0
    total_batches = len(val_loader)

    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)

            y_fake = gen(x)

            D_fake = disc(x, y_fake)
            G_fake_loss = BCE(D_fake, torch.ones_like(D_fake))

            L1_loss = L1_LOSS(y_fake, y) * config.L1_LAMBDA

            val_loss = G_fake_loss + L1_loss

            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / total_batches
    return avg_val_loss

def train_and_validate(disc, gen, train_loader, val_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler, num_epochs):
    for epoch in range(num_epochs):
        gen.train()
        disc.train()
        train_fn(disc, gen, train_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler)

    val_loss = calculate_validation_loss(disc, gen, val_loader, L1_LOSS, BCE, config.DEVICE)

    return val_loss

def randomized_search(param_dist, n_trials=10):
    best_params = None
    best_val_loss = float('inf')

    for trial in range(n_trials):
        params = {key: random.choice(value) for key, value in param_dist.items()}
        print(f"Trial {trial + 1} with parameters: {params}")

        learning_rate = params['learning_rate']
        batch_size = params['batch_size']
        L1_LAMBDA = params['L1_LAMBDA']
        LAMBDA_GP = params['LAMBDA_GP']
        optimizer_type = params['optimizer']
        betas = params['betas']
        weight_decay = params['weight_decay']
        num_epochs = params['num_epochs']

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        disc = Discriminator(in_channels=3).to(config.DEVICE)
        gen = generator(in_channels=3, features=64).to(config.DEVICE)

        if optimizer_type == 'Adam':
            opt_disc = optim.Adam(disc.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
            opt_gen = optim.Adam(gen.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
        elif optimizer_type == 'RMSprop':
            opt_disc = optim.RMSprop(disc.parameters(), lr=learning_rate, weight_decay=weight_decay)
            opt_gen = optim.RMSprop(gen.parameters(), lr=learning_rate, weight_decay=weight_decay)

        BCE = nn.BCEWithLogitsLoss()
        L1_LOSS = nn.L1Loss()
        g_scaler = torch.cuda.amp.GradScaler()
        d_scaler = torch.cuda.amp.GradScaler()

        val_loss = train_and_validate(disc, gen, train_loader, val_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler, num_epochs)

        print(f"Validation Loss for Trial {trial + 1}: {val_loss}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = params

    return best_params, best_val_loss

if __name__ == "__main__":
    param_dist = {
        'learning_rate': [1e-5, 1e-4, 2e-4, 1e-3],
        'batch_size': [8, 16, 32],
        'L1_LAMBDA': [50, 100, 150],
        'LAMBDA_GP': [5, 10, 15],
        'optimizer': ['Adam', 'RMSprop'],
        'betas': [(0.5, 0.999), (0.9, 0.999)],
        'weight_decay': [0, 1e-5, 1e-4],
        'num_epochs': [300, 500]
    }

    best_params, best_val_loss = randomized_search(param_dist, n_trials=20)
    print(f"Best parameters: {best_params}, Best validation loss: {best_val_loss}")


Trial 1 with parameters: {'learning_rate': 0.0002, 'batch_size': 16, 'L1_LAMBDA': 150, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 300}


  g_scaler = torch.cuda.amp.GradScaler()
  d_scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
100%|██████████| 2/2 [00:00<00:00,  5.17it/s, D_fake=0.569, D_real=0.48]
100%|██████████| 2/2 [00:00<00:00,  6.10it/s, D_fake=0.456, D_real=0.559]
100%|██████████| 2/2 [00:00<00:00,  6.05it/s, D_fake=0.425, D_real=0.495]
100%|██████████| 2/2 [00:00<00:00,  6.05it/s, D_fake=0.434, D_real=0.504]
100%|██████████| 2/2 [00:00<00:00,  6.06it/s, D_fake=0.454, D_real=0.556]
100%|██████████| 2/2 [00:00<00:00,  6.01it/s, D_fake=0.44, D_real=0.593]
100%|██████████| 2/2 [00:00<00:00,  6.03it/s, D_fake=0.415, D_real=0.599]
100%|██████████| 2/2 [00:00<00:00,  5.87it/s, D_fake=0.364, D_real=0.603]
100%|██████████| 2/2 [00:00<00:00,  6.03it/s, D_fake=0.338, D_real=0.587]
100%|██████████| 2/2 [00:00<00:00,  6.04it/s, D_fake=0.332, D_real=0.627]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.338, D_real=0.645]
100%|██████████| 2/2 [00:00<00:00,  6.

Validation Loss for Trial 1: 39.95629119873047
Trial 2 with parameters: {'learning_rate': 0.0002, 'batch_size': 16, 'L1_LAMBDA': 50, 'LAMBDA_GP': 10, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.23, D_real=0.534]
100%|██████████| 2/2 [00:00<00:00,  5.93it/s, D_fake=0.697, D_real=0.577]
100%|██████████| 2/2 [00:00<00:00,  5.73it/s, D_fake=0.63, D_real=0.329]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.555, D_real=0.376]
100%|██████████| 2/2 [00:00<00:00,  6.14it/s, D_fake=0.503, D_real=0.406]
100%|██████████| 2/2 [00:00<00:00,  5.86it/s, D_fake=0.461, D_real=0.494]
100%|██████████| 2/2 [00:00<00:00,  5.79it/s, D_fake=0.466, D_real=0.522]
100%|██████████| 2/2 [00:00<00:00,  5.96it/s, D_fake=0.448, D_real=0.525]
100%|██████████| 2/2 [00:00<00:00,  5.89it/s, D_fake=0.436, D_real=0.538]
100%|██████████| 2/2 [00:00<00:00,  6.02it/s, D_fake=0.436, D_real=0.532]
100%|██████████| 2/2 [00:00<00:00,  5.51it/s, D_fake=0.429, D_real=0.568]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.446, D_real=0.538]
100%|██████████| 2/2 [00:00<00:00,  5.88it/s, D_fake=0.442, D_real=0.528]
100%|██████████| 2/2 [00:00<00:00,  5.87

Validation Loss for Trial 2: 33.384403228759766
Trial 3 with parameters: {'learning_rate': 0.001, 'batch_size': 16, 'L1_LAMBDA': 50, 'LAMBDA_GP': 10, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.07it/s, D_fake=1.55e-6, D_real=0.605]
100%|██████████| 2/2 [00:00<00:00,  5.80it/s, D_fake=0.317, D_real=0.564]
100%|██████████| 2/2 [00:00<00:00,  5.54it/s, D_fake=0.532, D_real=0.606]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s, D_fake=0.608, D_real=0.548]
100%|██████████| 2/2 [00:00<00:00,  5.47it/s, D_fake=0.511, D_real=0.516]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s, D_fake=0.519, D_real=0.504]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s, D_fake=0.552, D_real=0.463]
100%|██████████| 2/2 [00:00<00:00,  5.71it/s, D_fake=0.524, D_real=0.44]
100%|██████████| 2/2 [00:00<00:00,  5.57it/s, D_fake=0.477, D_real=0.581]
100%|██████████| 2/2 [00:00<00:00,  5.45it/s, D_fake=0.396, D_real=0.557]
100%|██████████| 2/2 [00:00<00:00,  5.52it/s, D_fake=0.488, D_real=0.503]
100%|██████████| 2/2 [00:00<00:00,  5.45it/s, D_fake=0.419, D_real=0.558]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s, D_fake=0.521, D_real=0.515]
100%|██████████| 2/2 [00:00<00:00,  5

Validation Loss for Trial 3: 27.439434051513672
Trial 4 with parameters: {'learning_rate': 0.0001, 'batch_size': 16, 'L1_LAMBDA': 50, 'LAMBDA_GP': 15, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.68it/s, D_fake=0.555, D_real=0.495]
100%|██████████| 2/2 [00:00<00:00,  5.82it/s, D_fake=0.465, D_real=0.489]
100%|██████████| 2/2 [00:00<00:00,  4.99it/s, D_fake=0.464, D_real=0.534]
100%|██████████| 2/2 [00:00<00:00,  5.42it/s, D_fake=0.457, D_real=0.528]
100%|██████████| 2/2 [00:00<00:00,  5.14it/s, D_fake=0.441, D_real=0.537]
100%|██████████| 2/2 [00:00<00:00,  4.75it/s, D_fake=0.449, D_real=0.527]
100%|██████████| 2/2 [00:00<00:00,  5.19it/s, D_fake=0.453, D_real=0.522]
100%|██████████| 2/2 [00:00<00:00,  5.14it/s, D_fake=0.424, D_real=0.589]
100%|██████████| 2/2 [00:00<00:00,  4.80it/s, D_fake=0.411, D_real=0.589]
100%|██████████| 2/2 [00:00<00:00,  4.86it/s, D_fake=0.384, D_real=0.58]
100%|██████████| 2/2 [00:00<00:00,  4.71it/s, D_fake=0.409, D_real=0.587]
100%|██████████| 2/2 [00:00<00:00,  5.21it/s, D_fake=0.417, D_real=0.599]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s, D_fake=0.434, D_real=0.572]
100%|██████████| 2/2 [00:00<00:00,  5.3

Validation Loss for Trial 4: 37.174381256103516
Trial 5 with parameters: {'learning_rate': 0.0002, 'batch_size': 32, 'L1_LAMBDA': 100, 'LAMBDA_GP': 15, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 1e-05, 'num_epochs': 500}


100%|██████████| 1/1 [00:04<00:00,  4.88s/it, D_fake=0.227, D_real=0.525]
100%|██████████| 1/1 [00:00<00:00,  2.77it/s, D_fake=0.453, D_real=0.23]
100%|██████████| 1/1 [00:00<00:00,  2.93it/s, D_fake=0.606, D_real=0.433]
100%|██████████| 1/1 [00:00<00:00,  2.79it/s, D_fake=0.565, D_real=0.588]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s, D_fake=0.41, D_real=0.56]
100%|██████████| 1/1 [00:00<00:00,  2.78it/s, D_fake=0.524, D_real=0.472]
100%|██████████| 1/1 [00:00<00:00,  2.65it/s, D_fake=0.453, D_real=0.542]
100%|██████████| 1/1 [00:00<00:00,  2.74it/s, D_fake=0.512, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.83it/s, D_fake=0.45, D_real=0.522]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s, D_fake=0.484, D_real=0.517]
100%|██████████| 1/1 [00:00<00:00,  2.80it/s, D_fake=0.46, D_real=0.485]
100%|██████████| 1/1 [00:00<00:00,  2.85it/s, D_fake=0.485, D_real=0.526]
100%|██████████| 1/1 [00:00<00:00,  2.83it/s, D_fake=0.476, D_real=0.527]
100%|██████████| 1/1 [00:00<00:00,  2.87it/

Validation Loss for Trial 5: 43.1692008972168
Trial 6 with parameters: {'learning_rate': 1e-05, 'batch_size': 8, 'L1_LAMBDA': 150, 'LAMBDA_GP': 5, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 3/3 [00:03<00:00,  1.01s/it, D_fake=0.496, D_real=0.496]
100%|██████████| 3/3 [00:00<00:00,  7.61it/s, D_fake=0.495, D_real=0.504]
100%|██████████| 3/3 [00:00<00:00,  7.29it/s, D_fake=0.493, D_real=0.514]
100%|██████████| 3/3 [00:00<00:00,  7.40it/s, D_fake=0.491, D_real=0.523]
100%|██████████| 3/3 [00:00<00:00,  7.41it/s, D_fake=0.488, D_real=0.522]
100%|██████████| 3/3 [00:00<00:00,  7.48it/s, D_fake=0.482, D_real=0.529]
100%|██████████| 3/3 [00:00<00:00,  7.46it/s, D_fake=0.476, D_real=0.525]
100%|██████████| 3/3 [00:00<00:00,  7.20it/s, D_fake=0.474, D_real=0.534]
100%|██████████| 3/3 [00:00<00:00,  7.91it/s, D_fake=0.471, D_real=0.54]
100%|██████████| 3/3 [00:00<00:00,  7.67it/s, D_fake=0.468, D_real=0.536]
100%|██████████| 3/3 [00:00<00:00,  7.92it/s, D_fake=0.465, D_real=0.552]
100%|██████████| 3/3 [00:00<00:00,  7.88it/s, D_fake=0.467, D_real=0.552]
100%|██████████| 3/3 [00:00<00:00,  7.87it/s, D_fake=0.463, D_real=0.55]
100%|██████████| 3/3 [00:00<00:00,  7.92

Validation Loss for Trial 6: 37.234703063964844
Trial 7 with parameters: {'learning_rate': 1e-05, 'batch_size': 8, 'L1_LAMBDA': 50, 'LAMBDA_GP': 5, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 3/3 [00:00<00:00,  7.82it/s, D_fake=0.538, D_real=0.546]
100%|██████████| 3/3 [00:00<00:00,  7.87it/s, D_fake=0.519, D_real=0.536]
100%|██████████| 3/3 [00:00<00:00,  7.60it/s, D_fake=0.5, D_real=0.53]
100%|██████████| 3/3 [00:00<00:00,  7.80it/s, D_fake=0.488, D_real=0.518]
100%|██████████| 3/3 [00:00<00:00,  7.66it/s, D_fake=0.479, D_real=0.517]
100%|██████████| 3/3 [00:00<00:00,  7.41it/s, D_fake=0.473, D_real=0.507]
100%|██████████| 3/3 [00:00<00:00,  7.65it/s, D_fake=0.469, D_real=0.516]
100%|██████████| 3/3 [00:00<00:00,  7.41it/s, D_fake=0.469, D_real=0.525]
100%|██████████| 3/3 [00:00<00:00,  7.24it/s, D_fake=0.469, D_real=0.536]
100%|██████████| 3/3 [00:00<00:00,  7.60it/s, D_fake=0.468, D_real=0.535]
100%|██████████| 3/3 [00:00<00:00,  7.36it/s, D_fake=0.472, D_real=0.531]
100%|██████████| 3/3 [00:00<00:00,  7.24it/s, D_fake=0.467, D_real=0.55]
100%|██████████| 3/3 [00:00<00:00,  7.46it/s, D_fake=0.466, D_real=0.547]
100%|██████████| 3/3 [00:00<00:00,  7.46it

Validation Loss for Trial 7: 37.22087860107422
Trial 8 with parameters: {'learning_rate': 0.001, 'batch_size': 16, 'L1_LAMBDA': 150, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.677, D_real=0.437]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.619, D_real=0.587]
100%|██████████| 2/2 [00:00<00:00,  5.53it/s, D_fake=0.377, D_real=0.379]
100%|██████████| 2/2 [00:00<00:00,  5.68it/s, D_fake=0.514, D_real=0.54]
100%|██████████| 2/2 [00:00<00:00,  5.69it/s, D_fake=0.448, D_real=0.554]
100%|██████████| 2/2 [00:00<00:00,  5.69it/s, D_fake=0.489, D_real=0.51]
100%|██████████| 2/2 [00:00<00:00,  5.68it/s, D_fake=0.482, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.49, D_real=0.529]
100%|██████████| 2/2 [00:00<00:00,  5.71it/s, D_fake=0.398, D_real=0.575]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.449, D_real=0.557]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.425, D_real=0.564]
100%|██████████| 2/2 [00:00<00:00,  5.66it/s, D_fake=0.557, D_real=0.543]
100%|██████████| 2/2 [00:00<00:00,  5.54it/s, D_fake=0.561, D_real=0.562]
100%|██████████| 2/2 [00:00<00:00,  5.37i

Validation Loss for Trial 8: 26.840585708618164
Trial 9 with parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'L1_LAMBDA': 100, 'LAMBDA_GP': 5, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.11it/s, D_fake=0.477, D_real=0.471]
100%|██████████| 2/2 [00:00<00:00,  5.33it/s, D_fake=0.483, D_real=0.487]
100%|██████████| 2/2 [00:00<00:00,  5.01it/s, D_fake=0.49, D_real=0.501]
100%|██████████| 2/2 [00:00<00:00,  5.39it/s, D_fake=0.494, D_real=0.511]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.494, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  5.56it/s, D_fake=0.495, D_real=0.527]
100%|██████████| 2/2 [00:00<00:00,  5.58it/s, D_fake=0.493, D_real=0.528]
100%|██████████| 2/2 [00:00<00:00,  5.51it/s, D_fake=0.489, D_real=0.534]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s, D_fake=0.487, D_real=0.53]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.481, D_real=0.531]
100%|██████████| 2/2 [00:00<00:00,  5.51it/s, D_fake=0.478, D_real=0.534]
100%|██████████| 2/2 [00:00<00:00,  5.57it/s, D_fake=0.476, D_real=0.534]
100%|██████████| 2/2 [00:00<00:00,  5.56it/s, D_fake=0.473, D_real=0.536]
100%|██████████| 2/2 [00:00<00:00,  5.44

Validation Loss for Trial 9: 38.24538040161133
Trial 10 with parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'L1_LAMBDA': 150, 'LAMBDA_GP': 5, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.5, D_real=0.448]
100%|██████████| 2/2 [00:00<00:00,  5.74it/s, D_fake=0.489, D_real=0.539]
100%|██████████| 2/2 [00:00<00:00,  5.59it/s, D_fake=0.483, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.48, D_real=0.527]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.475, D_real=0.53]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.47, D_real=0.535]
100%|██████████| 2/2 [00:00<00:00,  5.62it/s, D_fake=0.466, D_real=0.54]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.462, D_real=0.542]
100%|██████████| 2/2 [00:00<00:00,  5.71it/s, D_fake=0.459, D_real=0.545]
100%|██████████| 2/2 [00:00<00:00,  5.59it/s, D_fake=0.452, D_real=0.55]
100%|██████████| 2/2 [00:00<00:00,  5.63it/s, D_fake=0.449, D_real=0.556]
100%|██████████| 2/2 [00:00<00:00,  5.72it/s, D_fake=0.45, D_real=0.555]
100%|██████████| 2/2 [00:00<00:00,  5.64it/s, D_fake=0.444, D_real=0.559]
100%|██████████| 2/2 [00:00<00:00,  5.67it/s, 

Validation Loss for Trial 10: 39.735015869140625
Trial 11 with parameters: {'learning_rate': 0.001, 'batch_size': 8, 'L1_LAMBDA': 50, 'LAMBDA_GP': 10, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 300}


100%|██████████| 3/3 [00:00<00:00,  8.08it/s, D_fake=4.17e-7, D_real=0.537]
100%|██████████| 3/3 [00:00<00:00,  8.12it/s, D_fake=0.572, D_real=0.386]
100%|██████████| 3/3 [00:00<00:00,  8.14it/s, D_fake=0.419, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  8.15it/s, D_fake=0.565, D_real=0.549]
100%|██████████| 3/3 [00:00<00:00,  8.13it/s, D_fake=0.488, D_real=0.483]
100%|██████████| 3/3 [00:00<00:00,  8.25it/s, D_fake=0.624, D_real=0.393]
100%|██████████| 3/3 [00:00<00:00,  8.09it/s, D_fake=0.375, D_real=0.664]
100%|██████████| 3/3 [00:00<00:00,  8.15it/s, D_fake=0.608, D_real=0.445]
100%|██████████| 3/3 [00:00<00:00,  8.15it/s, D_fake=0.386, D_real=0.596]
100%|██████████| 3/3 [00:00<00:00,  8.20it/s, D_fake=0.473, D_real=0.508]
100%|██████████| 3/3 [00:00<00:00,  8.16it/s, D_fake=0.536, D_real=0.464]
100%|██████████| 3/3 [00:00<00:00,  8.12it/s, D_fake=0.347, D_real=0.623]
100%|██████████| 3/3 [00:00<00:00,  7.95it/s, D_fake=0.708, D_real=0.313]
100%|██████████| 3/3 [00:00<00:00,  

Validation Loss for Trial 11: 37.796566009521484
Trial 12 with parameters: {'learning_rate': 0.0002, 'batch_size': 8, 'L1_LAMBDA': 100, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 3/3 [00:00<00:00,  7.68it/s, D_fake=0.551, D_real=0.486]
100%|██████████| 3/3 [00:00<00:00,  7.47it/s, D_fake=0.458, D_real=0.526]
100%|██████████| 3/3 [00:00<00:00,  7.17it/s, D_fake=0.45, D_real=0.506]
100%|██████████| 3/3 [00:00<00:00,  7.52it/s, D_fake=0.471, D_real=0.565]
100%|██████████| 3/3 [00:00<00:00,  7.62it/s, D_fake=0.422, D_real=0.577]
100%|██████████| 3/3 [00:00<00:00,  7.40it/s, D_fake=0.383, D_real=0.59]
100%|██████████| 3/3 [00:00<00:00,  7.34it/s, D_fake=0.388, D_real=0.608]
100%|██████████| 3/3 [00:00<00:00,  7.33it/s, D_fake=0.34, D_real=0.639]
100%|██████████| 3/3 [00:00<00:00,  7.51it/s, D_fake=0.341, D_real=0.635]
100%|██████████| 3/3 [00:00<00:00,  7.75it/s, D_fake=0.333, D_real=0.652]
100%|██████████| 3/3 [00:00<00:00,  7.86it/s, D_fake=0.3, D_real=0.684]
100%|██████████| 3/3 [00:00<00:00,  7.92it/s, D_fake=0.27, D_real=0.714]
100%|██████████| 3/3 [00:00<00:00,  7.83it/s, D_fake=0.273, D_real=0.741]
100%|██████████| 3/3 [00:00<00:00,  8.00it/s

Validation Loss for Trial 12: 40.04781723022461
Trial 13 with parameters: {'learning_rate': 0.0001, 'batch_size': 16, 'L1_LAMBDA': 150, 'LAMBDA_GP': 5, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.18it/s, D_fake=0.489, D_real=0.562]
100%|██████████| 2/2 [00:00<00:00,  5.08it/s, D_fake=0.454, D_real=0.498]
100%|██████████| 2/2 [00:00<00:00,  5.24it/s, D_fake=0.456, D_real=0.503]
100%|██████████| 2/2 [00:00<00:00,  5.02it/s, D_fake=0.472, D_real=0.531]
100%|██████████| 2/2 [00:00<00:00,  5.31it/s, D_fake=0.485, D_real=0.568]
100%|██████████| 2/2 [00:00<00:00,  5.69it/s, D_fake=0.472, D_real=0.589]
100%|██████████| 2/2 [00:00<00:00,  5.58it/s, D_fake=0.447, D_real=0.579]
100%|██████████| 2/2 [00:00<00:00,  5.59it/s, D_fake=0.422, D_real=0.567]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.397, D_real=0.575]
100%|██████████| 2/2 [00:00<00:00,  5.67it/s, D_fake=0.396, D_real=0.579]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.395, D_real=0.607]
100%|██████████| 2/2 [00:00<00:00,  5.57it/s, D_fake=0.384, D_real=0.627]
100%|██████████| 2/2 [00:00<00:00,  5.50it/s, D_fake=0.376, D_real=0.633]
100%|██████████| 2/2 [00:00<00:00,  5.

Validation Loss for Trial 13: 39.347869873046875
Trial 14 with parameters: {'learning_rate': 1e-05, 'batch_size': 8, 'L1_LAMBDA': 50, 'LAMBDA_GP': 5, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 3/3 [00:00<00:00,  7.21it/s, D_fake=0.508, D_real=0.464]
100%|██████████| 3/3 [00:00<00:00,  7.19it/s, D_fake=0.488, D_real=0.526]
100%|██████████| 3/3 [00:00<00:00,  7.23it/s, D_fake=0.475, D_real=0.532]
100%|██████████| 3/3 [00:00<00:00,  7.13it/s, D_fake=0.465, D_real=0.536]
100%|██████████| 3/3 [00:00<00:00,  7.23it/s, D_fake=0.464, D_real=0.533]
100%|██████████| 3/3 [00:00<00:00,  8.02it/s, D_fake=0.454, D_real=0.552]
100%|██████████| 3/3 [00:00<00:00,  8.12it/s, D_fake=0.451, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  8.08it/s, D_fake=0.45, D_real=0.548]
100%|██████████| 3/3 [00:00<00:00,  8.01it/s, D_fake=0.44, D_real=0.557]
100%|██████████| 3/3 [00:00<00:00,  7.98it/s, D_fake=0.433, D_real=0.567]
100%|██████████| 3/3 [00:00<00:00,  7.98it/s, D_fake=0.424, D_real=0.573]
100%|██████████| 3/3 [00:00<00:00,  8.07it/s, D_fake=0.415, D_real=0.58]
100%|██████████| 3/3 [00:00<00:00,  8.16it/s, D_fake=0.41, D_real=0.572]
100%|██████████| 3/3 [00:00<00:00,  8.02it

Validation Loss for Trial 14: 36.54172134399414
Trial 15 with parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'L1_LAMBDA': 100, 'LAMBDA_GP': 10, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 1e-05, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.16it/s, D_fake=0.509, D_real=0.492]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s, D_fake=0.486, D_real=0.509]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s, D_fake=0.484, D_real=0.53]
100%|██████████| 2/2 [00:00<00:00,  5.01it/s, D_fake=0.47, D_real=0.541]
100%|██████████| 2/2 [00:00<00:00,  5.11it/s, D_fake=0.467, D_real=0.542]
100%|██████████| 2/2 [00:00<00:00,  5.23it/s, D_fake=0.466, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  5.34it/s, D_fake=0.464, D_real=0.55]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.458, D_real=0.553]
100%|██████████| 2/2 [00:00<00:00,  5.46it/s, D_fake=0.455, D_real=0.557]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.453, D_real=0.562]
100%|██████████| 2/2 [00:00<00:00,  5.61it/s, D_fake=0.448, D_real=0.558]
100%|██████████| 2/2 [00:00<00:00,  5.62it/s, D_fake=0.446, D_real=0.561]
100%|██████████| 2/2 [00:00<00:00,  5.65it/s, D_fake=0.438, D_real=0.564]
100%|██████████| 2/2 [00:00<00:00,  5.64i

Validation Loss for Trial 15: 36.733707427978516
Trial 16 with parameters: {'learning_rate': 0.0001, 'batch_size': 32, 'L1_LAMBDA': 100, 'LAMBDA_GP': 5, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 1/1 [00:00<00:00,  2.91it/s, D_fake=0.484, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.69it/s, D_fake=0.489, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s, D_fake=0.502, D_real=0.527]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s, D_fake=0.5, D_real=0.542]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s, D_fake=0.49, D_real=0.548]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s, D_fake=0.479, D_real=0.543]
100%|██████████| 1/1 [00:00<00:00,  2.83it/s, D_fake=0.469, D_real=0.542]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s, D_fake=0.464, D_real=0.544]
100%|██████████| 1/1 [00:00<00:00,  2.85it/s, D_fake=0.461, D_real=0.548]
100%|██████████| 1/1 [00:00<00:00,  2.83it/s, D_fake=0.458, D_real=0.552]
100%|██████████| 1/1 [00:00<00:00,  2.85it/s, D_fake=0.456, D_real=0.556]
100%|██████████| 1/1 [00:00<00:00,  2.72it/s, D_fake=0.453, D_real=0.559]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s, D_fake=0.449, D_real=0.563]
100%|██████████| 1/1 [00:00<00:00,  2.88i

Validation Loss for Trial 16: 39.73525619506836
Trial 17 with parameters: {'learning_rate': 0.0002, 'batch_size': 32, 'L1_LAMBDA': 150, 'LAMBDA_GP': 10, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 1/1 [00:00<00:00,  2.84it/s, D_fake=0.0747, D_real=0.557]
100%|██████████| 1/1 [00:00<00:00,  2.95it/s, D_fake=0.481, D_real=0.109]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s, D_fake=0.49, D_real=0.546]
100%|██████████| 1/1 [00:00<00:00,  2.75it/s, D_fake=0.504, D_real=0.497]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s, D_fake=0.447, D_real=0.539]
100%|██████████| 1/1 [00:00<00:00,  2.67it/s, D_fake=0.416, D_real=0.448]
100%|██████████| 1/1 [00:00<00:00,  2.76it/s, D_fake=0.54, D_real=0.476]
100%|██████████| 1/1 [00:00<00:00,  2.87it/s, D_fake=0.397, D_real=0.512]
100%|██████████| 1/1 [00:00<00:00,  2.82it/s, D_fake=0.529, D_real=0.449]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s, D_fake=0.377, D_real=0.553]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s, D_fake=0.519, D_real=0.417]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s, D_fake=0.426, D_real=0.548]
100%|██████████| 1/1 [00:00<00:00,  2.80it/s, D_fake=0.48, D_real=0.467]
100%|██████████| 1/1 [00:00<00:00,  2.65

Validation Loss for Trial 17: 41.18632507324219
Trial 18 with parameters: {'learning_rate': 0.0002, 'batch_size': 8, 'L1_LAMBDA': 50, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 3/3 [00:00<00:00,  7.77it/s, D_fake=0.448, D_real=0.546]
100%|██████████| 3/3 [00:00<00:00,  7.70it/s, D_fake=0.472, D_real=0.489]
100%|██████████| 3/3 [00:00<00:00,  7.72it/s, D_fake=0.535, D_real=0.578]
100%|██████████| 3/3 [00:00<00:00,  7.70it/s, D_fake=0.45, D_real=0.581]
100%|██████████| 3/3 [00:00<00:00,  7.89it/s, D_fake=0.38, D_real=0.519]
100%|██████████| 3/3 [00:00<00:00,  7.87it/s, D_fake=0.388, D_real=0.584]
100%|██████████| 3/3 [00:00<00:00,  7.62it/s, D_fake=0.392, D_real=0.64]
100%|██████████| 3/3 [00:00<00:00,  7.92it/s, D_fake=0.332, D_real=0.667]
100%|██████████| 3/3 [00:00<00:00,  7.81it/s, D_fake=0.325, D_real=0.646]
100%|██████████| 3/3 [00:00<00:00,  7.28it/s, D_fake=0.283, D_real=0.688]
100%|██████████| 3/3 [00:00<00:00,  7.45it/s, D_fake=0.331, D_real=0.696]
100%|██████████| 3/3 [00:00<00:00,  7.73it/s, D_fake=0.247, D_real=0.681]
100%|██████████| 3/3 [00:00<00:00,  7.26it/s, D_fake=0.265, D_real=0.728]
100%|██████████| 3/3 [00:00<00:00,  7.66i

Validation Loss for Trial 18: 38.9878044128418
Trial 19 with parameters: {'learning_rate': 0.0002, 'batch_size': 16, 'L1_LAMBDA': 100, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  5.44it/s, D_fake=0.465, D_real=0.594]
100%|██████████| 2/2 [00:00<00:00,  5.45it/s, D_fake=0.409, D_real=0.458]
100%|██████████| 2/2 [00:00<00:00,  5.52it/s, D_fake=0.447, D_real=0.464]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.515, D_real=0.532]
100%|██████████| 2/2 [00:00<00:00,  5.45it/s, D_fake=0.524, D_real=0.598]
100%|██████████| 2/2 [00:00<00:00,  5.49it/s, D_fake=0.477, D_real=0.597]
100%|██████████| 2/2 [00:00<00:00,  5.38it/s, D_fake=0.432, D_real=0.567]
100%|██████████| 2/2 [00:00<00:00,  5.23it/s, D_fake=0.395, D_real=0.541]
100%|██████████| 2/2 [00:00<00:00,  5.28it/s, D_fake=0.389, D_real=0.546]
100%|██████████| 2/2 [00:00<00:00,  5.09it/s, D_fake=0.402, D_real=0.569]
100%|██████████| 2/2 [00:00<00:00,  5.25it/s, D_fake=0.406, D_real=0.601]
100%|██████████| 2/2 [00:00<00:00,  5.26it/s, D_fake=0.39, D_real=0.629]
100%|██████████| 2/2 [00:00<00:00,  4.99it/s, D_fake=0.367, D_real=0.629]
100%|██████████| 2/2 [00:00<00:00,  5.1

Validation Loss for Trial 19: 34.73692321777344
Trial 20 with parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'L1_LAMBDA': 50, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 2/2 [00:00<00:00,  5.53it/s, D_fake=0.513, D_real=0.518]
100%|██████████| 2/2 [00:00<00:00,  5.41it/s, D_fake=0.504, D_real=0.517]
100%|██████████| 2/2 [00:00<00:00,  5.36it/s, D_fake=0.497, D_real=0.516]
100%|██████████| 2/2 [00:00<00:00,  5.16it/s, D_fake=0.493, D_real=0.518]
100%|██████████| 2/2 [00:00<00:00,  5.15it/s, D_fake=0.491, D_real=0.521]
100%|██████████| 2/2 [00:00<00:00,  4.88it/s, D_fake=0.489, D_real=0.525]
100%|██████████| 2/2 [00:00<00:00,  5.03it/s, D_fake=0.487, D_real=0.528]
100%|██████████| 2/2 [00:00<00:00,  5.16it/s, D_fake=0.485, D_real=0.532]
100%|██████████| 2/2 [00:00<00:00,  5.37it/s, D_fake=0.483, D_real=0.53]
100%|██████████| 2/2 [00:00<00:00,  5.18it/s, D_fake=0.478, D_real=0.531]
100%|██████████| 2/2 [00:00<00:00,  5.35it/s, D_fake=0.476, D_real=0.535]
100%|██████████| 2/2 [00:00<00:00,  5.57it/s, D_fake=0.473, D_real=0.539]
100%|██████████| 2/2 [00:00<00:00,  5.56it/s, D_fake=0.471, D_real=0.546]
100%|██████████| 2/2 [00:00<00:00,  5.5

Validation Loss for Trial 20: 29.038705825805664
Best parameters: {'learning_rate': 0.001, 'batch_size': 16, 'L1_LAMBDA': 150, 'LAMBDA_GP': 10, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}, Best validation loss: 26.840585708618164





Best parameters:

 {'learning_rate': 0.001, \\
  'batch_size': 16, \\
  'L1_LAMBDA': 150, \\
  'LAMBDA_GP': 10, \\
  'optimizer': 'Adam', \\
  'betas': (0.5, 0.999), \\
  'weight_decay': 0, \\
  'num_epochs': 500}

Best validation loss: 26.840585708618164
