In [1]:
import os
import shutil
from google.colab import drive

drive.mount('/content/drive')

PROJECT_PATH = "/content/drive/MyDrive/Painter_Assignment"
SRC_PATH = os.path.join(PROJECT_PATH, "src")
CHECKPOINT_PATH = os.path.join(PROJECT_PATH, "checkpoints")
KAGGLE_JSON_PATH = os.path.join(PROJECT_PATH, "kaggle.json")

os.makedirs(SRC_PATH, exist_ok=True)
os.makedirs(CHECKPOINT_PATH, exist_ok=True)

Mounted at /content/drive


In [3]:
if os.path.exists(KAGGLE_JSON_PATH):
    shutil.copy(KAGGLE_JSON_PATH, "/content/kaggle.json")
    os.chmod("/content/kaggle.json", 0o600)
    os.environ['KAGGLE_CONFIG_DIR'] = "/content"
else:
    print(f"ERROR: kaggle.json not found in {PROJECT_PATH}")

if not os.path.exists("/content/dataset"):
    print("Downloading dataset from Kaggle...")
    !kaggle competitions download -c gan-getting-started
    !unzip -q gan-getting-started.zip -d /content/dataset
    print("Dataset downloaded and extracted successfully!")
else:
    print("Dataset already exists on local disk.")



Downloading dataset from Kaggle...
Downloading gan-getting-started.zip to /content
 83% 305M/367M [00:06<00:01, 41.5MB/s]
100% 367M/367M [00:06<00:00, 58.0MB/s]
Dataset downloaded and extracted successfully!


In [4]:
!pip install wandb -q
import wandb
from google.colab import userdata

try:
    wandb.login(key=userdata.get('wandb_api_key'))
except:
    wandb.login()

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 2


[34m[1mwandb[0m: You chose 'Use an existing W&B account'
[34m[1mwandb[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: Find your API key here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33melene-gabeskiria2004[0m ([33melene-gabeskiria2004-free-univiersity-of-tbilisi[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
import sys
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import wandb

In [7]:
sys.path.append("/content/drive/MyDrive/Painter_Assignment/src")

from config import Config
from dataset import UnpairedDataset
from models import GeneratorResNet, Discriminator
from utils import weights_init_normal, ReplayBuffer

In [8]:
wandb.init(
    project=Config.PROJECT_NAME,
    config={k:v for k,v in Config.__dict__.items() if not k.startswith('__')},
    resume="allow"
)

In [None]:
G_AB = GeneratorResNet().to(Config.DEVICE) # Photo -> Monet
G_BA = GeneratorResNet().to(Config.DEVICE) # Monet -> Photo
D_A = Discriminator().to(Config.DEVICE)    # Discriminator for Photos
D_B = Discriminator().to(Config.DEVICE)    # Discriminator for Monets

# Initialize Optimizers
optimizer_G = torch.optim.Adam(
    list(G_AB.parameters()) + list(G_BA.parameters()),
    lr=Config.LR_G, betas=(Config.B1, Config.B2)
)
optimizer_D_A = torch.optim.Adam(D_A.parameters(), lr=Config.LR_D, betas=(Config.B1, Config.B2))
optimizer_D_B = torch.optim.Adam(D_B.parameters(), lr=Config.LR_D, betas=(Config.B1, Config.B2))

# Define Loss Functions
criterion_GAN = nn.MSELoss()
criterion_cycle = nn.L1Loss()
criterion_identity = nn.L1Loss()

In [None]:
print(Config.LOAD_MODEL)
print(Config.SAVE_EPOCH_FREQ)

True
1


In [None]:
import importlib
import sys

if 'config' in sys.modules:
    importlib.reload(sys.modules['config'])
    importlib.reload(sys.modules['models'])

    print("Config module reloaded successfully.")
else:
    print("Config module was not previously imported.")

from config import Config

Config module was not previously imported.


ModuleNotFoundError: No module named 'config'

In [None]:
start_epoch = 0

if Config.LOAD_MODEL:
    load_path = f"{Config.CHECKPOINT_DIR}/epoch_{Config.START_EPOCH}.pth"
    print(f"üîÑ Resuming training from: {load_path}")

    checkpoint = torch.load(load_path, map_location=Config.DEVICE)
    G_AB.load_state_dict(checkpoint['G_AB'])
    G_BA.load_state_dict(checkpoint['G_BA'])
    D_A.load_state_dict(checkpoint['D_A'])
    D_B.load_state_dict(checkpoint['D_B'])
    optimizer_G.load_state_dict(checkpoint['optimizer_G'])
    optimizer_D_A.load_state_dict(checkpoint['optimizer_D_A'])
    optimizer_D_B.load_state_dict(checkpoint['optimizer_D_B'])

    start_epoch = checkpoint['epoch'] + 1
else:
    print("Starting Fresh Training...")
    G_AB.apply(weights_init_normal)
    G_BA.apply(weights_init_normal)
    D_A.apply(weights_init_normal)
    D_B.apply(weights_init_normal)

dataset = UnpairedDataset(Config.TRAIN_MONET, Config.TRAIN_PHOTO)
dataloader = DataLoader(
    dataset,
    batch_size=Config.BATCH_SIZE,
    shuffle=True,
    num_workers=Config.NUM_WORKERS
)

# Image buffers to stabilize the discriminator
fake_A_buffer = ReplayBuffer()
fake_B_buffer = ReplayBuffer()

print(f"üì∏ Data Loaded. Monet: {len(dataset.monet_files)}, Photo: {len(dataset.photo_files)}")

üîÑ Resuming training from: /content/drive/MyDrive/Painter_Assignment/checkpoints/epoch_5.pth
üì∏ Data Loaded. Monet: 300, Photo: 7038


In [None]:
from torch.cuda.amp import GradScaler, autocast

scaler = GradScaler()

print("Training Started with Mixed Precision (AMP), Soft Labels, and Instance Noise...")

for epoch in range(start_epoch, Config.EPOCHS):
    for i, batch in enumerate(dataloader):

        real_A = batch["photo"].to(Config.DEVICE)
        real_B = batch["monet"].to(Config.DEVICE)

        valid = torch.ones((real_A.size(0), 1, 16, 16), requires_grad=False).to(Config.DEVICE)
        fake = torch.zeros((real_A.size(0), 1, 16, 16), requires_grad=False).to(Config.DEVICE)
        valid_smooth = torch.full((real_A.size(0), 1, 16, 16), Config.REAL_LABEL_SMOOTH, requires_grad=False).to(Config.DEVICE)

        optimizer_G.zero_grad()

        with autocast():
            loss_id_A = criterion_identity(G_BA(real_A), real_A)
            loss_id_B = criterion_identity(G_AB(real_B), real_B)
            loss_identity = (loss_id_A + loss_id_B) / 2 * Config.LAMBDA_ID

            # GAN loss
            fake_B = G_AB(real_A)
            loss_GAN_AB = criterion_GAN(D_B(fake_B), valid)
            fake_A = G_BA(real_B)
            loss_GAN_BA = criterion_GAN(D_A(fake_A), valid)
            loss_GAN = (loss_GAN_AB + loss_GAN_BA) / 2

            # Cycle loss
            recov_A = G_BA(fake_B)
            loss_cycle_A = criterion_cycle(recov_A, real_A)
            recov_B = G_AB(fake_A)
            loss_cycle_B = criterion_cycle(recov_B, real_B)
            loss_cycle = (loss_cycle_A + loss_cycle_B) / 2 * Config.LAMBDA_CYCLE

            # Total loss
            loss_G = loss_GAN + loss_cycle + loss_identity

        scaler.scale(loss_G).backward()
        scaler.step(optimizer_G)
        scaler.update()

        optimizer_D_A.zero_grad()

        with autocast():
            # get the buffered image FIRST
            fake_A_ = fake_A_buffer.push_and_pop(fake_A)

            # generate noise
            noise_real = torch.randn_like(real_A) * 0.05
            noise_fake = torch.randn_like(fake_A_) * 0.05

            # add noise
            real_A_noisy = real_A + noise_real
            fake_A_noisy = fake_A_.detach() + noise_fake

            # calculate Loss
            loss_real = criterion_GAN(D_A(real_A_noisy), valid_smooth)
            loss_fake = criterion_GAN(D_A(fake_A_noisy), fake)
            loss_D_A = (loss_real + loss_fake) / 2

        scaler.scale(loss_D_A).backward()
        scaler.step(optimizer_D_A)
        scaler.update()

        optimizer_D_B.zero_grad()

        with autocast():
            # get the buffered image FIRST
            fake_B_ = fake_B_buffer.push_and_pop(fake_B)

            # generate noise
            noise_real = torch.randn_like(real_B) * 0.05
            noise_fake = torch.randn_like(fake_B_) * 0.05

            # add noise
            real_B_noisy = real_B + noise_real
            fake_B_noisy = fake_B_.detach() + noise_fake

            # calculate Loss
            loss_real = criterion_GAN(D_B(real_B_noisy), valid_smooth)
            loss_fake = criterion_GAN(D_B(fake_B_noisy), fake)
            loss_D_B = (loss_real + loss_fake) / 2

        scaler.scale(loss_D_B).backward()
        scaler.step(optimizer_D_B)
        scaler.update()

        if i % 100 == 0:
            wandb.log({
                "Loss/Generator": loss_G.item(),
                "Loss/Discriminator": (loss_D_A.item() + loss_D_B.item()),
                "Epoch": epoch
            })
            print(f"[Epoch {epoch}] [Batch {i}] [G loss: {loss_G.item():.4f}] [D loss: {(loss_D_A.item() + loss_D_B.item()):.4f}]")

    img_real_A = real_A[0].detach().cpu() * 0.5 + 0.5
    img_fake_B = fake_B[0].detach().cpu() * 0.5 + 0.5
    img_real_B = real_B[0].detach().cpu() * 0.5 + 0.5
    img_fake_A = fake_A[0].detach().cpu() * 0.5 + 0.5

    wandb.log({
        "Visual/Real Photo": wandb.Image(img_real_A, caption=f"Real Photo (Epoch {epoch})"),
        "Visual/Generated Monet": wandb.Image(img_fake_B, caption=f"Generated Monet (Epoch {epoch})"),
        "Visual/Real Monet": wandb.Image(img_real_B, caption=f"Real Monet (Epoch {epoch})"),
        "Visual/Generated Photo": wandb.Image(img_fake_A, caption=f"Reconstructed Photo (Epoch {epoch})")
    })

    if epoch % Config.SAVE_EPOCH_FREQ == 0:
        save_path = f"{Config.CHECKPOINT_DIR}/epoch_{epoch}.pth"
        torch.save({
            'epoch': epoch,
            'G_AB': G_AB.state_dict(),
            'G_BA': G_BA.state_dict(),
            'D_A': D_A.state_dict(),
            'D_B': D_B.state_dict(),
            'optimizer_G': optimizer_G.state_dict(),
            'optimizer_D_A': optimizer_D_A.state_dict(),
            'optimizer_D_B': optimizer_D_B.state_dict()
        }, save_path)
        print(f"Checkpoint saved: {save_path}")

‚è≥ Training Started with Mixed Precision (AMP), Soft Labels, and Instance Noise...


  scaler = GradScaler()
  with autocast():
  with autocast():
  with autocast():


[Epoch 6] [Batch 0] [G loss: 1.6232] [D loss: 0.3251]
[Epoch 6] [Batch 100] [G loss: 2.4958] [D loss: 0.0797]
[Epoch 6] [Batch 200] [G loss: 2.1762] [D loss: 0.1746]
[Epoch 6] [Batch 300] [G loss: 1.8289] [D loss: 0.2608]
[Epoch 6] [Batch 400] [G loss: 2.2896] [D loss: 0.4124]
[Epoch 6] [Batch 500] [G loss: 2.5710] [D loss: 0.1300]
[Epoch 6] [Batch 600] [G loss: 2.8669] [D loss: 0.0977]
[Epoch 6] [Batch 700] [G loss: 2.7652] [D loss: 0.1246]
[Epoch 6] [Batch 800] [G loss: 2.3175] [D loss: 0.1347]
[Epoch 6] [Batch 900] [G loss: 2.5670] [D loss: 0.0603]
[Epoch 6] [Batch 1000] [G loss: 2.1580] [D loss: 0.1303]
[Epoch 6] [Batch 1100] [G loss: 1.4449] [D loss: 0.0772]
[Epoch 6] [Batch 1200] [G loss: 2.6945] [D loss: 0.1572]
[Epoch 6] [Batch 1300] [G loss: 2.6889] [D loss: 0.2389]
[Epoch 6] [Batch 1400] [G loss: 1.4748] [D loss: 0.0861]
[Epoch 6] [Batch 1500] [G loss: 2.0488] [D loss: 0.2625]
[Epoch 6] [Batch 1600] [G loss: 3.0297] [D loss: 0.1093]
[Epoch 6] [Batch 1700] [G loss: 2.2209] [D 

In [None]:
wandb.finish()

0,1
Epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ
Loss/Discriminator,‚ñà‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
Loss/Generator,‚ñà‚ñÜ‚ñÖ‚ñÖ‚ñà‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÉ‚ñÉ‚ñÑ‚ñÉ‚ñÉ‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÑ‚ñÅ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÅ‚ñÇ‚ñÇ‚ñÉ

0,1
Epoch,9.0
Loss/Discriminator,0.08697
Loss/Generator,1.55063


U-net experiment

In [10]:
import sys
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.amp import GradScaler, autocast # Mixed Precision
import wandb

In [11]:
sys.path.append("/content/drive/MyDrive/University/Painter_Assignment/src")

In [12]:
from config_unet import ConfigUNet
from dataset import UnpairedDataset
from models import GeneratorUNet, Discriminator
from utils import weights_init_normal, ReplayBuffer

In [None]:
wandb.init(
    project=ConfigUNet.PROJECT_NAME,
    config={k:v for k,v in ConfigUNet.__dict__.items() if not k.startswith('__')},
    reinit=True
)

In [None]:
print(f"Starting EXPERIMENT 2: U-Net Architecture on {ConfigUNet.DEVICE}...")

G_AB = GeneratorUNet().to(ConfigUNet.DEVICE) # Photo -> Monet
G_BA = GeneratorUNet().to(ConfigUNet.DEVICE) # Monet -> Photo
D_A = Discriminator().to(ConfigUNet.DEVICE)
D_B = Discriminator().to(ConfigUNet.DEVICE)

# Optimizers & Loss
optimizer_G = torch.optim.Adam(
    list(G_AB.parameters()) + list(G_BA.parameters()),
    lr=ConfigUNet.LR, betas=(ConfigUNet.B1, ConfigUNet.B2)
)
optimizer_D_A = torch.optim.Adam(D_A.parameters(), lr=ConfigUNet.LR, betas=(ConfigUNet.B1, ConfigUNet.B2))
optimizer_D_B = torch.optim.Adam(D_B.parameters(), lr=ConfigUNet.LR, betas=(ConfigUNet.B1, ConfigUNet.B2))

criterion_GAN = nn.MSELoss()
criterion_cycle = nn.L1Loss()
criterion_identity = nn.L1Loss()

# initialize Weights 
G_AB.apply(weights_init_normal)
G_BA.apply(weights_init_normal)
D_A.apply(weights_init_normal)
D_B.apply(weights_init_normal)

dataset = UnpairedDataset(ConfigUNet.TRAIN_MONET, ConfigUNet.TRAIN_PHOTO)
dataloader = DataLoader(
    dataset,
    batch_size=ConfigUNet.BATCH_SIZE,
    shuffle=True,
    num_workers=ConfigUNet.NUM_WORKERS
)

# Buffers & Scaler
fake_A_buffer = ReplayBuffer()
fake_B_buffer = ReplayBuffer()
scaler = GradScaler('cuda')


Starting EXPERIMENT 2: U-Net Architecture on cuda...


In [None]:
for epoch in range(ConfigUNet.START_EPOCH, ConfigUNet.EPOCHS):
    for i, batch in enumerate(dataloader):

        real_A = batch["photo"].to(ConfigUNet.DEVICE)
        real_B = batch["monet"].to(ConfigUNet.DEVICE)

        valid = torch.ones((real_A.size(0), 1, 16, 16), requires_grad=False).to(ConfigUNet.DEVICE)
        fake = torch.zeros((real_A.size(0), 1, 16, 16), requires_grad=False).to(ConfigUNet.DEVICE)

        optimizer_G.zero_grad()

        with autocast('cuda'):
            # Identity
            loss_id_A = criterion_identity(G_BA(real_A), real_A)
            loss_id_B = criterion_identity(G_AB(real_B), real_B)
            loss_identity = (loss_id_A + loss_id_B) / 2 * ConfigUNet.LAMBDA_ID

            # GAN
            fake_B = G_AB(real_A)
            loss_GAN_AB = criterion_GAN(D_B(fake_B), valid)
            fake_A = G_BA(real_B)
            loss_GAN_BA = criterion_GAN(D_A(fake_A), valid)
            loss_GAN = (loss_GAN_AB + loss_GAN_BA) / 2

            # Cycle
            recov_A = G_BA(fake_B)
            loss_cycle_A = criterion_cycle(recov_A, real_A)
            recov_B = G_AB(fake_A)
            loss_cycle_B = criterion_cycle(recov_B, real_B)
            loss_cycle = (loss_cycle_A + loss_cycle_B) / 2 * ConfigUNet.LAMBDA_CYCLE

            loss_G = loss_GAN + loss_cycle + loss_identity

        scaler.scale(loss_G).backward()
        scaler.step(optimizer_G)
        scaler.update()

        optimizer_D_A.zero_grad()
        with autocast('cuda'):
            loss_real = criterion_GAN(D_A(real_A), valid)
            fake_A_ = fake_A_buffer.push_and_pop(fake_A)
            loss_fake = criterion_GAN(D_A(fake_A_.detach()), fake)
            loss_D_A = (loss_real + loss_fake) / 2
        scaler.scale(loss_D_A).backward()
        scaler.step(optimizer_D_A)
        scaler.update()

        optimizer_D_B.zero_grad()
        with autocast('cuda'):
            loss_real = criterion_GAN(D_B(real_B), valid)
            fake_B_ = fake_B_buffer.push_and_pop(fake_B)
            loss_fake = criterion_GAN(D_B(fake_B_.detach()), fake)
            loss_D_B = (loss_real + loss_fake) / 2
        scaler.scale(loss_D_B).backward()
        scaler.step(optimizer_D_B)
        scaler.update()

        if i % 100 == 0:
            wandb.log({"Loss/G": loss_G.item(), "Loss/D": loss_D_A.item()+loss_D_B.item(), "Epoch": epoch})
            print(f"[Epoch {epoch}] [Batch {i}] [G loss: {loss_G.item():.4f}]")


    img_real_A = real_A[0].detach().cpu() * 0.5 + 0.5
    img_fake_B = fake_B[0].detach().cpu() * 0.5 + 0.5
    img_real_B = real_B[0].detach().cpu() * 0.5 + 0.5
    img_fake_A = fake_A[0].detach().cpu() * 0.5 + 0.5

    wandb.log({
        "Visual/Real Photo": wandb.Image(img_real_A, caption=f"Real Photo (Epoch {epoch})"),
        "Visual/Generated Monet": wandb.Image(img_fake_B, caption=f"Generated Monet (Epoch {epoch})"),
        "Visual/Real Monet": wandb.Image(img_real_B, caption=f"Real Monet (Epoch {epoch})"),
        "Visual/Generated Photo": wandb.Image(img_fake_A, caption=f"Reconstructed Photo (Epoch {epoch})")
    })

    if epoch % ConfigUNet.SAVE_EPOCH_FREQ == 0:
        save_path = f"{ConfigUNet.CHECKPOINT_DIR}/epoch_{epoch}.pth"
        torch.save({
            'epoch': epoch,
            'G_AB': G_AB.state_dict(),
            'G_BA': G_BA.state_dict(),
            'D_A': D_A.state_dict(),
            'D_B': D_B.state_dict(),
            'optimizer_G': optimizer_G.state_dict(),
        }, save_path)
        print(f"üíæ UNet Checkpoint saved: {save_path}")

wandb.finish()

[Epoch 0] [Batch 0] [G loss: 11.0310]
[Epoch 0] [Batch 100] [G loss: 2.5375]
[Epoch 0] [Batch 200] [G loss: 3.7616]
[Epoch 0] [Batch 300] [G loss: 1.6436]
[Epoch 0] [Batch 400] [G loss: 1.9185]
[Epoch 0] [Batch 500] [G loss: 2.2433]
[Epoch 0] [Batch 600] [G loss: 1.5672]
[Epoch 0] [Batch 700] [G loss: 1.4593]
[Epoch 0] [Batch 800] [G loss: 1.5946]
[Epoch 0] [Batch 900] [G loss: 1.4376]
[Epoch 0] [Batch 1000] [G loss: 1.2024]
[Epoch 0] [Batch 1100] [G loss: 1.5474]
[Epoch 0] [Batch 1200] [G loss: 1.2385]
[Epoch 0] [Batch 1300] [G loss: 1.6008]
[Epoch 0] [Batch 1400] [G loss: 1.9840]
[Epoch 0] [Batch 1500] [G loss: 1.4590]
[Epoch 0] [Batch 1600] [G loss: 1.5041]
[Epoch 0] [Batch 1700] [G loss: 1.0736]
[Epoch 0] [Batch 1800] [G loss: 1.1419]
[Epoch 0] [Batch 1900] [G loss: 1.1036]
[Epoch 0] [Batch 2000] [G loss: 1.1801]
[Epoch 0] [Batch 2100] [G loss: 1.5649]
[Epoch 0] [Batch 2200] [G loss: 1.1619]
[Epoch 0] [Batch 2300] [G loss: 1.3058]
[Epoch 0] [Batch 2400] [G loss: 1.1057]
[Epoch 0] [