# Hyperparameter Tuning

These are the hyperparameters we would be tuning for getting the best results from the model:

1) Learning Rate \\
2) Batch Size' \\
3) L1 lambda \\
4) optimizer \\
6) Weight Decay \\
7) Epochs

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from torchvision.datasets import ImageFolder
import os
from PIL import Image
from tqdm import tqdm
from torchvision.utils import save_image
# local module imports
import config
from utils import save_checkpoint, load_checkpoint, save_some_examples
from UvU_Net_Generator import OuterUNet as generator
from UvU_Discriminator import Discriminator

torch.backends.cudnn.benchmark = True

  check_for_updates()


torch.Size([1, 3, 512, 512])


In [None]:
def compute_gradient_penalty(disc, real_samples, fake_samples, device, lambda_gp):
    # Get random epsilon for interpolation
    batch_size, channels, height, width = real_samples.shape
    epsilon = torch.rand((batch_size, 1, 1, 1), device=device, requires_grad=True)

    # Interpolate between real and fake samples
    interpolated = epsilon * real_samples + (1 - epsilon) * fake_samples
    interpolated = interpolated.to(device)

    # Get the discriminator's prediction on the interpolated samples
    mixed_scores = disc(interpolated, interpolated)  # Pass the interpolated samples twice

    # Calculate gradients of the mixed_scores with respect to interpolated samples
    gradients = torch.autograd.grad(
        inputs=interpolated,
        outputs=mixed_scores,
        grad_outputs=torch.ones_like(mixed_scores),
        create_graph=True,
        retain_graph=True,
    )[0]

    # Flatten the gradients
    gradients = gradients.view(gradients.size(0), -1)

    # Compute the gradient norm (L2 norm)
    gradient_norm = gradients.norm(2, dim=1)

    # Compute the gradient penalty as (norm(gradients) - 1)^2
    gradient_penalty = torch.mean((gradient_norm - 1) ** 2)

    # Return the gradient penalty, scaled by lambda_gp
    return gradient_penalty

def train_fn(
    disc, gen, loader, opt_disc, opt_gen, l1_loss, bce, g_scaler, d_scaler, L1_LAMBDA=config.L1_LAMBDA, lambda_gp=10
):
    loop = tqdm(loader, leave=True)

    for idx, (x, y) in enumerate(loop):
        x = x.to(config.DEVICE)
        y = y.to(config.DEVICE)

        # Train Discriminator
        with torch.cuda.amp.autocast():
            y_fake = gen(x)
            D_real = disc(x, y)
            D_real_loss = bce(D_real, torch.ones_like(D_real))
            D_fake = disc(x, y_fake.detach())
            D_fake_loss = bce(D_fake, torch.zeros_like(D_fake))
            gradient_penalty = compute_gradient_penalty(disc, y, y_fake.detach(), config.DEVICE, lambda_gp)
            D_loss = ((D_real_loss + D_fake_loss) / 2) + (lambda_gp * gradient_penalty)


        disc.zero_grad()
        d_scaler.scale(D_loss).backward()
        d_scaler.step(opt_disc)
        d_scaler.update()

        # Train generator
        with torch.cuda.amp.autocast():
            D_fake = disc(x, y_fake)
            G_fake_loss = bce(D_fake, torch.ones_like(D_fake))
            L1 = l1_loss(y_fake, y) * L1_LAMBDA
            G_loss = G_fake_loss + L1

        opt_gen.zero_grad()
        g_scaler.scale(G_loss).backward()
        g_scaler.step(opt_gen)
        g_scaler.update()

        if idx % 10 == 0:
            loop.set_postfix(
                D_real=torch.sigmoid(D_real).mean().item(),
                D_fake=torch.sigmoid(D_fake).mean().item(),
            )

In [None]:
!unzip -q Sample_dataset.zip

In [None]:
class PairedImageDataset(Dataset):
    def __init__(self, input_dir, target_dir, transform=None):
        self.input_dir = input_dir
        self.target_dir = target_dir
        self.input_images = sorted(os.listdir(input_dir))
        self.target_images = sorted(os.listdir(target_dir))
        self.transform = transform

        assert len(self.input_images) == len(self.target_images), "Mismatch between input and target images!"

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_image_path = os.path.join(self.input_dir, self.input_images[idx])
        target_image_path = os.path.join(self.target_dir, self.target_images[idx])

        input_image = Image.open(input_image_path).convert("RGB")
        target_image = Image.open(target_image_path).convert("RGB")

        if self.transform:
            input_image = self.transform(input_image)
            target_image = self.transform(target_image)

        return input_image, target_image


input_dir = '/content/Sample_dataset/sobel_images'
target_dir = '/content/Sample_dataset/input_images'

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

dataset = PairedImageDataset(input_dir=input_dir, target_dir=target_dir, transform=transform)

# Split dataset into training and validation (80-20 split)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoader for training and validation
batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Example to iterate over the data loader
for batch_idx, (input_images, target_images) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1}")
    print(f"Input batch size: {input_images.shape}")
    print(f"Target batch size: {target_images.shape}")


Batch 1
Input batch size: torch.Size([16, 3, 256, 256])
Target batch size: torch.Size([16, 3, 256, 256])
Batch 2
Input batch size: torch.Size([4, 3, 256, 256])
Target batch size: torch.Size([4, 3, 256, 256])


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
save_dir = '/content/drive/MyDrive/model_checkpoints/'  # Change this to your desired directory
os.makedirs(save_dir, exist_ok=True)
directory = '/content/evaluation/'
if not os.path.exists(directory):
    os.makedirs(directory)  # Create the directory if it doesn't exist

def main():
    disc = Discriminator(in_channels=3).to(config.DEVICE)
    gen = generator(in_channels=3, features=64).to(config.DEVICE)
    opt_disc = optim.Adam(disc.parameters(), lr=config.LEARNING_RATE, betas=(0.5, 0.999))
    opt_gen = optim.Adam(gen.parameters(), lr=config.LEARNING_RATE, betas=(0.5, 0.999))
    BCE = nn.BCEWithLogitsLoss()
    L1_LOSS = nn.L1Loss()

    if config.LOAD_MODEL:
        load_checkpoint(
            config.CHECKPOINT_GEN, gen, opt_gen, config.LEARNING_RATE,
        )
        load_checkpoint(
            config.CHECKPOINT_DISC, disc, opt_disc, config.LEARNING_RATE,
        )

    g_scaler = torch.cuda.amp.GradScaler()
    d_scaler = torch.cuda.amp.GradScaler()

    for epoch in range(config.NUM_EPOCHS):
        train_fn(
            disc, gen, train_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler,
        )

        # Save models every 50 epochs to Google Drive
        if epoch % 50 == 49:
            print(f"Saving model at epoch {epoch}")
            torch.save({
                'epoch': epoch,
                'generator_state_dict': gen.state_dict(),
                'optimizer_gen_state_dict': opt_gen.state_dict(),
            }, f"{save_dir}generator_epoch_{epoch}.pth")

            torch.save({
                'epoch': epoch,
                'discriminator_state_dict': disc.state_dict(),
                'optimizer_disc_state_dict': opt_disc.state_dict(),
            }, f"{save_dir}discriminator_epoch_{epoch}.pth")

            save_some_examples(gen, val_loader, epoch, folder="evaluation")


if __name__ == "__main__":
    main()


  g_scaler = torch.cuda.amp.GradScaler()
  d_scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
100%|██████████| 2/2 [00:07<00:00,  3.72s/it, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.17it/s, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.28it/s, D_fake=0.522, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.26it/s, D_fake=0.525, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.30it/s, D_fake=0.526, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.21it/s, D_fake=0.529, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.27it/s, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.35it/s, D_fake=0.529, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.33it/s, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.31it/s, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.35it/s, D_fake=0.53, D_real=0.526]
100%|██████████| 2/2 [00:00<00:00,  4.32it

Saving model at epoch 49


100%|██████████| 2/2 [00:00<00:00,  4.00it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.18it/s, D_fake=0.512, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.27it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.39it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.21it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.10it/s, D_fake=0.511, D_real=0.511]
100%|██████████| 2/2 [00:00<00:00,  4.25it/s, D_fake=0.511, D_real=0.511]
100%|██████████| 2/2 [00:00<00:00,  4.38it/s, D_fake=0.512, D_real=0.511]
100%|██████████| 2/2 [00:00<00:00,  4.24it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.15it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.33it/s, D_fake=0.511, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.29it/s, D_fake=0.511, D_real=0.511]
100%|██████████| 2/2 [00:00<00:00,  4.28it/s, D_fake=0.512, D_real=0.512]
100%|██████████| 2/2 [00:00<00:00,  4.

KeyboardInterrupt: 

In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.3-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.2/233.2 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [None]:
import numpy as np
import random
import itertools
import torch

def calculate_validation_loss(disc, gen, val_loader, L1_LOSS, BCE, device, L1_LAMBDA):
    gen.eval()
    total_val_loss = 0
    total_batches = len(val_loader)

    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)

            y_fake = gen(x)

            D_fake = disc(x, y_fake)
            G_fake_loss = BCE(D_fake, torch.ones_like(D_fake))

            L1_loss = L1_LOSS(y_fake, y) * L1_LAMBDA

            val_loss = G_fake_loss + L1_loss

            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / total_batches
    return avg_val_loss

def train_and_validate(disc, gen, train_loader, val_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler, num_epochs, L1_LAMBDA, lambda_gp):
    for epoch in range(num_epochs):
        gen.train()
        disc.train()
        train_fn(disc, gen, train_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler, L1_LAMBDA, lambda_gp)

    val_loss = calculate_validation_loss(disc, gen, val_loader, L1_LOSS, BCE, config.DEVICE, L1_LAMBDA)

    return val_loss

def randomized_search(param_dist, n_trials=10):
    best_params = None
    best_val_loss = float('inf')

    for trial in range(n_trials):
        params = {key: random.choice(value) for key, value in param_dist.items()}
        print(f"Trial {trial + 1} with parameters: {params}")

        learning_rate = params['learning_rate']
        batch_size = params['batch_size']
        L1_LAMBDA = params['L1_LAMBDA']
        lambda_gp = params['lambda_gp']
        optimizer_type = params['optimizer']
        betas = params['betas']
        weight_decay = params['weight_decay']
        num_epochs = params['num_epochs']

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        disc = Discriminator(in_channels=3).to(config.DEVICE)
        gen = generator(in_channels=3, features=64).to(config.DEVICE)

        if optimizer_type == 'Adam':
            opt_disc = optim.Adam(disc.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
            opt_gen = optim.Adam(gen.parameters(), lr=learning_rate, betas=betas, weight_decay=weight_decay)
        elif optimizer_type == 'RMSprop':
            opt_disc = optim.RMSprop(disc.parameters(), lr=learning_rate, weight_decay=weight_decay)
            opt_gen = optim.RMSprop(gen.parameters(), lr=learning_rate, weight_decay=weight_decay)

        BCE = nn.BCEWithLogitsLoss()
        L1_LOSS = nn.L1Loss()
        g_scaler = torch.cuda.amp.GradScaler()
        d_scaler = torch.cuda.amp.GradScaler()

        val_loss = train_and_validate(disc, gen, train_loader, val_loader, opt_disc, opt_gen, L1_LOSS, BCE, g_scaler, d_scaler, num_epochs, L1_LAMBDA, lambda_gp)

        print(f"Validation Loss for Trial {trial + 1}: {val_loss}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = params

    return best_params, best_val_loss

if __name__ == "__main__":
    param_dist = {
        'learning_rate': [1e-5, 1e-4, 2e-4, 1e-3],
        'batch_size': [8, 16, 32],
        'L1_LAMBDA': [50, 100, 150],
        'lambda_gp': [5, 10, 15],
        'optimizer': ['Adam', 'RMSprop'],
        'betas': [(0.5, 0.999), (0.9, 0.999)],
        'weight_decay': [0, 1e-5, 1e-4],
        'num_epochs': [300, 500]
    }

    best_params, best_val_loss = randomized_search(param_dist, n_trials=20)
    print(f"Best parameters: {best_params}, Best validation loss: {best_val_loss}")


Trial 1 with parameters: {'learning_rate': 0.001, 'batch_size': 8, 'L1_LAMBDA': 150, 'lambda_gp': 5, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0.0001, 'num_epochs': 500}


  g_scaler = torch.cuda.amp.GradScaler()
  d_scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
  with torch.cuda.amp.autocast():
100%|██████████| 3/3 [00:00<00:00,  5.60it/s, D_fake=0.544, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  6.40it/s, D_fake=0.548, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  6.09it/s, D_fake=0.542, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  6.08it/s, D_fake=0.542, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  6.17it/s, D_fake=0.543, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  6.12it/s, D_fake=0.543, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  6.14it/s, D_fake=0.542, D_real=0.541]
100%|██████████| 3/3 [00:00<00:00,  6.18it/s, D_fake=0.541, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  6.28it/s, D_fake=0.54, D_real=0.543]
100%|██████████| 3/3 [00:00<00:00,  6.48it/s, D_fake=0.542, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  6.21it/s, D_fake=0.542, D_real=0.542]
100%|██████████| 3/3 [00:00<00:00,  5

Validation Loss for Trial 1: 38.213714599609375
Trial 2 with parameters: {'learning_rate': 0.0002, 'batch_size': 8, 'L1_LAMBDA': 100, 'lambda_gp': 15, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 1e-05, 'num_epochs': 300}


100%|██████████| 3/3 [00:00<00:00,  6.03it/s, D_fake=0.593, D_real=0.595]
100%|██████████| 3/3 [00:00<00:00,  5.91it/s, D_fake=0.592, D_real=0.593]
100%|██████████| 3/3 [00:00<00:00,  5.72it/s, D_fake=0.592, D_real=0.593]
100%|██████████| 3/3 [00:00<00:00,  5.84it/s, D_fake=0.592, D_real=0.594]
100%|██████████| 3/3 [00:00<00:00,  5.82it/s, D_fake=0.59, D_real=0.593]
100%|██████████| 3/3 [00:00<00:00,  5.82it/s, D_fake=0.593, D_real=0.592]
100%|██████████| 3/3 [00:00<00:00,  5.61it/s, D_fake=0.594, D_real=0.592]
100%|██████████| 3/3 [00:00<00:00,  5.80it/s, D_fake=0.593, D_real=0.592]
100%|██████████| 3/3 [00:00<00:00,  6.08it/s, D_fake=0.595, D_real=0.593]
100%|██████████| 3/3 [00:00<00:00,  6.17it/s, D_fake=0.595, D_real=0.592]
100%|██████████| 3/3 [00:00<00:00,  6.20it/s, D_fake=0.595, D_real=0.593]
100%|██████████| 3/3 [00:00<00:00,  6.22it/s, D_fake=0.599, D_real=0.594]
100%|██████████| 3/3 [00:00<00:00,  6.16it/s, D_fake=0.598, D_real=0.594]
100%|██████████| 3/3 [00:00<00:00,  6.2

Validation Loss for Trial 2: 28.092844009399414
Trial 3 with parameters: {'learning_rate': 0.0002, 'batch_size': 8, 'L1_LAMBDA': 50, 'lambda_gp': 10, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 0.0001, 'num_epochs': 300}


100%|██████████| 3/3 [00:00<00:00,  5.96it/s, D_fake=0.469, D_real=0.472]
100%|██████████| 3/3 [00:00<00:00,  5.84it/s, D_fake=0.473, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  5.73it/s, D_fake=0.475, D_real=0.47]
100%|██████████| 3/3 [00:00<00:00,  5.67it/s, D_fake=0.478, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  5.66it/s, D_fake=0.478, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  5.73it/s, D_fake=0.477, D_real=0.47]
100%|██████████| 3/3 [00:00<00:00,  5.77it/s, D_fake=0.478, D_real=0.469]
100%|██████████| 3/3 [00:00<00:00,  5.78it/s, D_fake=0.478, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  6.07it/s, D_fake=0.479, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  6.02it/s, D_fake=0.475, D_real=0.47]
100%|██████████| 3/3 [00:00<00:00,  6.06it/s, D_fake=0.477, D_real=0.469]
100%|██████████| 3/3 [00:00<00:00,  6.04it/s, D_fake=0.479, D_real=0.47]
100%|██████████| 3/3 [00:00<00:00,  6.10it/s, D_fake=0.478, D_real=0.471]
100%|██████████| 3/3 [00:00<00:00,  6.07it

Validation Loss for Trial 3: nan
Trial 4 with parameters: {'learning_rate': 0.001, 'batch_size': 32, 'L1_LAMBDA': 150, 'lambda_gp': 15, 'optimizer': 'RMSprop', 'betas': (0.5, 0.999), 'weight_decay': 1e-05, 'num_epochs': 500}


100%|██████████| 1/1 [00:00<00:00,  2.25it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s, D_fake=0.466, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.16it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.21it/s, D_fake=0.461, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.23it/s, D_fake=0.457, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s, D_fake=0.457, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.24it/s, D_fake=0.457, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.23it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.19it/s, D_fake=0.457, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.23it/s, D_fake=0.459, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s, D_fake=0.458, D_real=0.459]
100%|██████████| 1/1 [00:00<00:00,  2.

Validation Loss for Trial 4: 34.68075942993164
Trial 5 with parameters: {'learning_rate': 0.001, 'batch_size': 32, 'L1_LAMBDA': 50, 'lambda_gp': 15, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 1e-05, 'num_epochs': 500}


100%|██████████| 1/1 [00:00<00:00,  2.11it/s, D_fake=0.507, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.15it/s, D_fake=0.509, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s, D_fake=0.509, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s, D_fake=0.506, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s, D_fake=0.506, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s, D_fake=0.506, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s, D_fake=0.507, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s, D_fake=0.507, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s, D_fake=0.508, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s, D_fake=0.508, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.19it/s, D_fake=0.509, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.19it/s, D_fake=0.509, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.05it/s, D_fake=0.51, D_real=0.506]
100%|██████████| 1/1 [00:00<00:00,  2.0

Validation Loss for Trial 5: 13.27087116241455
Trial 6 with parameters: {'learning_rate': 0.0002, 'batch_size': 16, 'L1_LAMBDA': 150, 'lambda_gp': 5, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 300}


100%|██████████| 2/2 [00:00<00:00,  4.42it/s, D_fake=0.516, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.33it/s, D_fake=0.516, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  4.31it/s, D_fake=0.518, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  4.30it/s, D_fake=0.514, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.34it/s, D_fake=0.518, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.32it/s, D_fake=0.514, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.26it/s, D_fake=0.515, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.31it/s, D_fake=0.513, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.30it/s, D_fake=0.515, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  4.14it/s, D_fake=0.516, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.09it/s, D_fake=0.519, D_real=0.519]
100%|██████████| 2/2 [00:00<00:00,  3.95it/s, D_fake=0.519, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.05it/s, D_fake=0.516, D_real=0.52]
100%|██████████| 2/2 [00:00<00:00,  4.07it/s, D

Validation Loss for Trial 6: 41.745662689208984
Trial 7 with parameters: {'learning_rate': 1e-05, 'batch_size': 16, 'L1_LAMBDA': 100, 'lambda_gp': 15, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 1e-05, 'num_epochs': 500}


100%|██████████| 2/2 [00:00<00:00,  4.06it/s, D_fake=0.547, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.01it/s, D_fake=0.547, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.13it/s, D_fake=0.546, D_real=0.548]
100%|██████████| 2/2 [00:00<00:00,  4.14it/s, D_fake=0.546, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.27it/s, D_fake=0.548, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.30it/s, D_fake=0.549, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.32it/s, D_fake=0.547, D_real=0.548]
100%|██████████| 2/2 [00:00<00:00,  4.29it/s, D_fake=0.551, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.30it/s, D_fake=0.55, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.27it/s, D_fake=0.549, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.23it/s, D_fake=0.551, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.29it/s, D_fake=0.551, D_real=0.548]
100%|██████████| 2/2 [00:00<00:00,  4.28it/s, D_fake=0.551, D_real=0.549]
100%|██████████| 2/2 [00:00<00:00,  4.3

Validation Loss for Trial 7: 30.396015167236328
Trial 8 with parameters: {'learning_rate': 0.0001, 'batch_size': 16, 'L1_LAMBDA': 150, 'lambda_gp': 5, 'optimizer': 'Adam', 'betas': (0.5, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 2/2 [00:00<00:00,  4.21it/s, D_fake=0.514, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  4.20it/s, D_fake=0.512, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  4.21it/s, D_fake=0.512, D_real=0.515]
100%|██████████| 2/2 [00:00<00:00,  4.25it/s, D_fake=0.513, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  4.27it/s, D_fake=0.516, D_real=0.515]
100%|██████████| 2/2 [00:00<00:00,  4.11it/s, D_fake=0.516, D_real=0.515]
100%|██████████| 2/2 [00:00<00:00,  4.25it/s, D_fake=0.516, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  4.19it/s, D_fake=0.517, D_real=0.513]
100%|██████████| 2/2 [00:00<00:00,  4.03it/s, D_fake=0.517, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  4.04it/s, D_fake=0.516, D_real=0.515]
100%|██████████| 2/2 [00:00<00:00,  3.86it/s, D_fake=0.517, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  3.95it/s, D_fake=0.517, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  3.95it/s, D_fake=0.516, D_real=0.514]
100%|██████████| 2/2 [00:00<00:00,  3.

Validation Loss for Trial 8: 44.480838775634766
Trial 9 with parameters: {'learning_rate': 1e-05, 'batch_size': 8, 'L1_LAMBDA': 100, 'lambda_gp': 5, 'optimizer': 'RMSprop', 'betas': (0.9, 0.999), 'weight_decay': 0, 'num_epochs': 500}


100%|██████████| 3/3 [00:00<00:00,  6.23it/s, D_fake=0.521, D_real=0.518]
100%|██████████| 3/3 [00:00<00:00,  6.09it/s, D_fake=0.521, D_real=0.517]
100%|██████████| 3/3 [00:00<00:00,  6.02it/s, D_fake=0.521, D_real=0.519]
100%|██████████| 3/3 [00:00<00:00,  6.02it/s, D_fake=0.521, D_real=0.519]
100%|██████████| 3/3 [00:00<00:00,  6.01it/s, D_fake=0.523, D_real=0.518]
100%|██████████| 3/3 [00:00<00:00,  6.04it/s, D_fake=0.526, D_real=0.52]
100%|██████████| 3/3 [00:00<00:00,  6.12it/s, D_fake=0.525, D_real=0.519]
100%|██████████| 3/3 [00:00<00:00,  6.10it/s, D_fake=0.524, D_real=0.518]
100%|██████████| 3/3 [00:00<00:00,  6.19it/s, D_fake=0.518, D_real=0.515]
100%|██████████| 3/3 [00:00<00:00,  6.06it/s, D_fake=0.514, D_real=0.515]
100%|██████████| 3/3 [00:00<00:00,  6.16it/s, D_fake=0.513, D_real=0.511]
100%|██████████| 3/3 [00:00<00:00,  6.19it/s, D_fake=0.51, D_real=0.509]
100%|██████████| 3/3 [00:00<00:00,  6.03it/s, D_fake=0.508, D_real=0.509]
100%|██████████| 3/3 [00:00<00:00,  6.21

Best parameters:

 {'learning_rate': 0.001, \\
  'batch_size': 8, \\
  'L1_LAMBDA': 150, \\
  'optimizer': 'Adam', \\
  'lambda_gp': 10 \\
  'betas': (0.5, 0.999), \\
  'weight_decay': 1e-05, \\
  'num_epochs': 500}

Best validation loss: 13.156318664550781
