## Imports

In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import random
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import matplotlib.pyplot as  plt
from IPython.display import clear_output
import torchvision.utils as vutils
import numpy as np
from torch.utils.data import DataLoader
from pytorch_image_generation_metrics import get_fid , get_inception_score_and_fid_from_directory,get_inception_score_from_directory
from pytorch_image_generation_metrics.fid_ref import calc_fid_ref



In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Utils

In [16]:



def pil_to_tensor(img, size):
    img = img.resize((size, size), Image.BICUBIC)
    tensor = torch.tensor(torch.ByteTensor(bytearray(img.tobytes()))
                            ).view(img.size[1], img.size[0], 3)
    tensor = tensor.permute(2,0,1).float() / 255.0
    return tensor


def pixelify(img_tensor, pixel_size=8):
    """
    img_tensor: torch.Tensor of shape (1,3,H,W)
    pixel_size: size of the "pixels" (larger = more blocky)
    """
    _, c, h, w = img_tensor.shape

    # downscale
    small = F.interpolate(img_tensor, size=(h//pixel_size, w//pixel_size), mode='nearest')
    
    # upscale back to original resolution
    pixelated = F.interpolate(small, size=(h, w), mode='nearest')
    
    return pixelated


def show_two_images(tensor1, tensor2, nrow=1, titles=("Image1", "Image2")):
    """
    Show two batches of images side by side.
    tensor1, tensor2: [B,C,H,W] PyTorch tensors
    """
    def denormalize(tensor):
        return (tensor * 0.5) + 0.5  # from [-1,1] to [0,1]

    # make grids from each tensor
    grid1 = vutils.make_grid(denormalize(tensor1).cpu(), nrow=nrow)
    grid2 = vutils.make_grid(denormalize(tensor2).cpu(), nrow=nrow)

    npimg1 = grid1.permute(1, 2, 0).detach().numpy()
    npimg2 = grid2.permute(1, 2, 0).detach().numpy()

    # plot side by side
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(npimg1)
    plt.axis("off")
    if titles and len(titles) > 0:
        plt.title(titles[0])

    plt.subplot(1, 2, 2)
    plt.imshow(npimg2)
    plt.axis("off")
    if titles and len(titles) > 1:
        plt.title(titles[1])

    plt.tight_layout()
    plt.show()


def identity(x):
    return x

## Resnet Generator


In [17]:
# --- Basic building blocks ---
class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.block = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(dim, dim, 3),
            nn.InstanceNorm2d(dim),
            nn.ReLU(inplace=True),
            nn.ReflectionPad2d(1),
            nn.Conv2d(dim, dim, 3),
            nn.InstanceNorm2d(dim)
        )

    def forward(self, x):
        return x + self.block(x)

# --- Generator ---
class Generator(nn.Module):
    def __init__(self, in_channels=3, out_channels=3, n_residual_blocks=9):
        super().__init__()
        # Initial convolution
        model = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(in_channels, 64, 7),
            nn.InstanceNorm2d(64),
            nn.ReLU(inplace=True)
        ]

        # Downsampling
        in_features = 64
        out_features = in_features * 2
        for _ in range(2):
            model += [
                nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
            out_features = in_features * 2

        # Residual blocks
        for _ in range(n_residual_blocks):
            model += [ResidualBlock(in_features)]

        # Upsampling
        out_features = in_features // 2
        for _ in range(2):
            model += [
                nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features
            out_features = in_features // 2

        # Output layer
        model += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(64, out_channels, 7),
            nn.Tanh()
        ]

        self.model = nn.Sequential(*model)

    def forward(self, x):
        return self.model(x)


## PixelShuffle Generator

In [18]:
import torch
import torch.nn as nn

# --- PixelShuffle Residual Block ---
class PSResBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.block = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(channels, channels, kernel_size=3),
            nn.InstanceNorm2d(channels),
            nn.ReLU(inplace=True),
            nn.ReflectionPad2d(1),
            nn.Conv2d(channels, channels, kernel_size=3),
            nn.InstanceNorm2d(channels)
        )

    def forward(self, x):
        return x + self.block(x)

# --- PixelShuffle Upsample Block ---
class PixelShuffleUpsample(nn.Module):
    def __init__(self, in_channels, out_channels, upscale_factor=2):
        super().__init__()
        self.conv = nn.Conv2d(
            in_channels,
            out_channels * (upscale_factor ** 2),
            kernel_size=3,
            stride=1,
            padding=1
        )
        self.ps = nn.PixelShuffle(upscale_factor)
        self.norm = nn.InstanceNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.ps(x)
        x = self.norm(x)
        x = self.relu(x)
        return x

# --- Generator with PixelShuffle + PSResBlock ---
class PixelShuffleResGenerator(nn.Module):
    def __init__(self, in_channels=3, out_channels=3, n_residual_blocks=9, base_channels=64):
        super().__init__()

        # Initial convolution
        layers = [
            nn.ReflectionPad2d(3),
            nn.Conv2d(in_channels, base_channels, kernel_size=7),
            nn.InstanceNorm2d(base_channels),
            nn.ReLU(inplace=True)
        ]

        # Downsampling
        in_features = base_channels
        for _ in range(2):
            out_features = in_features * 2
            layers += [
                nn.Conv2d(in_features, out_features, kernel_size=3, stride=2, padding=1),
                nn.InstanceNorm2d(out_features),
                nn.ReLU(inplace=True)
            ]
            in_features = out_features

        # PSResBlocks
        for _ in range(n_residual_blocks):
            layers += [PSResBlock(in_features)]

        # Upsampling with PixelShuffle
        for _ in range(2):
            out_features = in_features // 2
            layers += [PixelShuffleUpsample(in_features, out_features, upscale_factor=2)]
            in_features = out_features

        # Output layer
        layers += [
            nn.ReflectionPad2d(3),
            nn.Conv2d(base_channels, out_channels, kernel_size=7),
            nn.Tanh()
        ]

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)


## UNET Generator

In [19]:
import torch
import torch.nn as nn

class UNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, down=True, use_dropout=False):
        super().__init__()
        if down:
            self.block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 4, stride=2, padding=1, bias=False),
                nn.InstanceNorm2d(out_channels),
                nn.LeakyReLU(0.2, inplace=True)
            )
        else:
            self.block = nn.Sequential(
                nn.ConvTranspose2d(in_channels, out_channels, 4, stride=2, padding=1, bias=False),
                nn.InstanceNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )
            if use_dropout:
                self.block.add_module("dropout", nn.Dropout(0.5))

    def forward(self, x):
        return self.block(x)

class UNetGenerator(nn.Module):
    def __init__(self, in_channels=3, out_channels=3, base_filters=64):
        super().__init__()

        # Encoder
        self.down1 = UNetBlock(in_channels, base_filters, down=True)
        self.down2 = UNetBlock(base_filters, base_filters*2, down=True)
        self.down3 = UNetBlock(base_filters*2, base_filters*4, down=True)
        self.down4 = UNetBlock(base_filters*4, base_filters*8, down=True)
        self.down5 = UNetBlock(base_filters*8, base_filters*8, down=True)

        # Bottleneck
        self.bottleneck = nn.Sequential(
            nn.Conv2d(base_filters*8, base_filters*8, 4, stride=2, padding=1),
            nn.ReLU(inplace=True)
        )

        # Decoder
        self.up1 = UNetBlock(base_filters*8, base_filters*8, down=False, use_dropout=True)
        self.up2 = UNetBlock(base_filters*16, base_filters*8, down=False, use_dropout=True)
        self.up3 = UNetBlock(base_filters*16, base_filters*4, down=False)
        self.up4 = UNetBlock(base_filters*8, base_filters*2, down=False)
        self.up5 = UNetBlock(base_filters*4, base_filters, down=False)

        self.final = nn.Sequential(
            nn.ConvTranspose2d(base_filters*2, out_channels, 4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        # downsample
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        bottleneck = self.bottleneck(d5)

        # upsample with skip connections
        u1 = self.up1(bottleneck)
        u2 = self.up2(torch.cat([u1, d5], dim=1))
        u3 = self.up3(torch.cat([u2, d4], dim=1))
        u4 = self.up4(torch.cat([u3, d3], dim=1))
        u5 = self.up5(torch.cat([u4, d2], dim=1))
        return self.final(torch.cat([u5, d1], dim=1))


## PixelArt UNet

In [20]:


class PixelArtBlock(nn.Module):
    def __init__(self, in_channels, out_channels, down=True):
        super().__init__()
        if down:
            self.block = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1),  # no strided conv
                nn.InstanceNorm2d(out_channels),
                nn.ReLU(inplace=True),

            )
        else:
            # Nearest-neighbor upsampling + conv
            self.block = nn.Sequential(
                # nn.Upsample(scale_factor=2, mode='nearest'),
                nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1),
                nn.InstanceNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )

    def forward(self, x):
        return self.block(x)

class PixelArtUNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=3, base_channels=64):
        super().__init__()

        # Encoder (no stride, just convs)
        self.enc1 = PixelArtBlock(in_channels, base_channels, down=True)
        self.enc2 = PixelArtBlock(base_channels, base_channels*2, down=True)

        # Bottleneck
        self.bottleneck = nn.Sequential(
            nn.Conv2d(base_channels*2, base_channels*2, 3, stride=1, padding=1),
            nn.ReLU(inplace=True)
        )

        # Decoder
        self.dec1 = PixelArtBlock(base_channels*2, base_channels, down=False)

        # Final conv
        self.final = nn.Sequential(
            nn.Conv2d(base_channels + base_channels, out_channels, 3, stride=1, padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(e1)
        b = self.bottleneck(e2)
        d1 = self.dec1(b)
        # skip connection
        out = self.final(torch.cat([d1, e1], dim=1))
        return out


## High Capacity Gen

In [21]:
class AdaIN(nn.Module):
    """Adaptive Instance Normalization"""
    def __init__(self, channels, w_dim):
        super().__init__()
        self.instance_norm = nn.InstanceNorm2d(channels, affine=False)
        self.style_scale = nn.Linear(w_dim, channels)
        self.style_bias = nn.Linear(w_dim, channels)

    def forward(self, x, w):
        normalized = self.instance_norm(x)
        style_scale = self.style_scale(w).unsqueeze(2).unsqueeze(3)
        style_bias = self.style_bias(w).unsqueeze(2).unsqueeze(3)
        return style_scale * normalized + style_bias

class NoiseInjection(nn.Module):
    """Adds channel-wise noise with learnable scaling"""
    def __init__(self, channels):
        super().__init__()
        self.weight = nn.Parameter(torch.zeros(1, channels, 1, 1))

    def forward(self, x):
        noise = torch.randn(x.size(0), 1, x.size(2), x.size(3), device=x.device)
        return x + self.weight * noise

class GeneratorBlock(nn.Module):
    """
    Generator block with convolution, AdaIN, and noise injection
    (no upsampling so resolution stays the same)
    """
    def __init__(self, in_channels, out_channels, w_dim):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.adain = AdaIN(out_channels, w_dim)
        self.noise = NoiseInjection(out_channels)
        self.activation = nn.LeakyReLU(0.2)

    def forward(self, x, w):
        x = self.conv(x)
        x = self.adain(x, w)
        x = self.noise(x)
        return self.activation(x)

class HighCapGenerator(nn.Module):
    """
    High-capacity generator that takes an RGB image and outputs
    an RGB image of the same resolution, using a learnable style vector.
    """
    def __init__(self, input_channels=3, w_dim=512,
                 base_channels=512, resolution=256, num_blocks=4):
        super().__init__()

        # Learnable style vector w
        self.w = nn.Parameter(torch.randn(1, w_dim))

        self.blocks = nn.ModuleList()
        self.to_rgb = nn.Conv2d(base_channels, 3, kernel_size=1)

        in_ch = input_channels
        out_ch = base_channels
        for _ in range(num_blocks):
            self.blocks.append(GeneratorBlock(in_ch, out_ch, w_dim))
            in_ch = out_ch  # subsequent blocks use base_channels in/out

        self.final_activation = nn.Tanh()

    def forward(self, x_img):
        """
        x_img: [B, 3, H, W] input image
        """
        # broadcast w to batch size
        w = self.w.expand(x_img.size(0), -1)  # [B, w_dim]

        x = x_img
        for block in self.blocks:
            x = block(x, w)
        x = self.to_rgb(x)
        return self.final_activation(x)


## Inference

In [22]:
import os
from PIL import Image
import torch

# ---- user parameters ----
resolution = 256                        # your desired resolution
input_folder = "landscape_dataset/testA"                   # folder of input images
real_folder = "landscape_dataset/trainB"            # folder of real images for comparison
model_name = "pixelart_unet"                    # one of your model names
epoch_num = 60                                  # for path
# -------------------------


In [23]:
pixelart = str.join('/',os.getcwd().split("\\")) +'/'+ real_folder
fid_file = pixelart + f"/real_trainB_stats_{resolution}.npz"

# calc_fid_ref(pixelart, fid_file,img_size=resolution)

In [24]:
# "resnet" ,"pixelart_unet",
# "interpolation" ,  "shuffle_residual", "highcap" ,
for model_name in [   "highcap"   ]:

    if model_name != "interpolation":
        if model_name == "highcap":
            
            epoch_num = 27
            model = HighCapGenerator().to(device)
        elif model_name == "normal_unet":
            
            epoch_num = 100
            model = UNetGenerator().to(device)
        elif model_name == "pixelart_unet":
            
            epoch_num = 100
            model = PixelArtUNet().to(device)
        elif model_name == "shuffle_residual":
            
            epoch_num = 99
            model = PixelShuffleResGenerator().to(device)
        elif model_name == "resnet":
            
            epoch_num = 50
            model = Generator().to(device)


        model_path = f"models/landscape_dataset_{model_name}/epoch_{epoch_num}/G_XtoY.pth"
        save_folder = f"landscape_dataset/generated_images_{model_name}_{resolution}"  
        os.makedirs(save_folder, exist_ok=True) 

        state = torch.load(model_path, map_location=device)
        model.load_state_dict(state)
        model.eval()
    else:
        model = identity
        save_folder = f"landscape_dataset/generated_images_{model_name}_{resolution}"  
        os.makedirs(save_folder, exist_ok=True) 




    for fname in os.listdir(input_folder):
        if fname.lower().endswith((".png", ".jpg", ".jpeg")):
            pil_img = Image.open(os.path.join(input_folder, fname)).convert("RGB")
            img_tensor = pil_to_tensor(pil_img, resolution).unsqueeze(0).to(device)

            with torch.no_grad():
                fake = model(img_tensor)

            fake = (fake.cpu()+1.0)/2.0
            fake = pixelify(fake, pixel_size=3)  # change 8 to control block size

            # save generated image
            out_pil = Image.fromarray(
                (fake.squeeze(0).permute(1,2,0).numpy() * 255).astype("uint8")
            )
            out_pil.save(os.path.join(save_folder, f"gen_{fname}"))



for model_name in [   "highcap"  ]:


    save_folder = f"landscape_dataset/generated_images_{model_name}_{resolution}"  
    os.makedirs(save_folder, exist_ok=True) 


    generated = str.join('/',os.getcwd().split("\\")) +'/'+ save_folder


    ((im,iv),fid) = get_inception_score_and_fid_from_directory(generated,fid_file)
    print(f"Inception mean: {im}, Inception Var: {iv}, FID: {fid}, model: {model_name}")


  state = torch.load(model_path, map_location=device)
  tensor = torch.tensor(torch.ByteTensor(bytearray(img.tobytes()))


Inception mean: 3.9572978314313616, Inception Var: 0.25296477131024947, FID: 145.25289658036303, model: highcap


In [25]:
# pixelart = str.join('/',os.getcwd().split("\\")) +'/'+ real_folder
# generated = str.join('/',os.getcwd().split("\\")) +'/'+ save_folder

# fid_file = pixelart + "/real_trainB_stats.npz"

# ((im,iv),fid) = get_inception_score_and_fid_from_directory(generated,fid_file)
# print(f"Inception mean: {im}, Inception Var: {iv}, FID: {fid}, model: {model_name}")

In [26]:
# pixelart = str.join('/',os.getcwd().split("\\")) +'/'+ real_folder
# fid_file = pixelart + "/real_trainB_stats.npz"

# calc_fid_ref(pixelart, fid_file)