In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

In [3]:
npz_file = '/content/gdrive/My Drive/character_font.npz'

class NPZDataset(Dataset):
    def __init__(self, npz_file, transform=None, filter_label=None, num_samples=None):
        # Load the data from the .npz file
        data = np.load(npz_file)
        self.images = data['images']
        self.labels = data['labels']
        self.transform = transform

        # Filter by label if specified
        if filter_label is not None:
            # Find indices of the desired label
            label_indices = np.where(self.labels == filter_label)[0]

            # If num_samples is specified, limit the number of samples
            if num_samples is not None:
                label_indices = label_indices[:num_samples]

            # Filter images and labels
            self.images = self.images[label_indices]
            self.labels = self.labels[label_indices]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Get the image and label for the given index
        image = self.images[idx]
        label = self.labels[idx]

        # Reshape the image to add a channel dimension
        image = image[np.newaxis, ...]  # Add channel dimension at the beginning

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        # Convert to PyTorch tensor if necessary
        image = torch.tensor(image, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.long)

        return image, label

In [4]:
class Generator(nn.Module):
    def __init__(self, latent_dim):
        super(Generator, self).__init__()
        
        self.init_size = 2  # Initial spatial size
        #self.latent_dim = latent_dim
        
        # In the paper it is not specified how z transforms into a 4x4x512 (for 64x64 data) so we will use a linear layer to do so.
        self.fc = nn.Linear(latent_dim, 512 * self.init_size * self.init_size)
        # No relu because we just had to reshape the latent vector to something that when flattened is 2 * 32^2, which 2x2x512 is

        
        # We make the transposed convolutional layers (fractionally strided convolutions)
        self.deconv_blocks = nn.Sequential(
            # Block 1: Input (2x2x512) -> Output (4x4x256)
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            
            # Block 2: Input (4x4x256)-> Output (8x8x128)
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            
            # Block 3: Input (8x8x128) -> Output (16x16x64)
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(),
            
            # Final Block: Input (16x16x64) -> Output (32x32x1)
            nn.ConvTranspose2d(64, 1, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid()
        )
    
    def forward(self, z):
        # Pass the latent vector through the fully connected layer
        out = self.fc(z)
        
        # Reshape to match the initial feature map dimensions. out.size(0) = batch size.  
        out = out.view(out.size(0), 512, self.init_size, self.init_size)
        
        # We pass "out" through the transposed convolutional blocks
        img = self.deconv_blocks(out)
        
        return img


In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        # We make the transposed convolutional layers (fractionally strided convolutions)
        self.deconv_blocks = nn.Sequential(
            # Block 1: Input (32x32x1) -> Output (16x16x64)
            nn.Conv2d(1, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            
            # Block 2: Input (16x16x64)-> Output (8x8x128)
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            
            # Block 3: Input (8x8x128) -> Output (4x4x256)
            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2),
            
            # Final Block: Input (4x4x256) -> Output (2x2x512)
            nn.Conv2d(256, 512, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid()
        )
    
    def forward(self, img):
        
        # We pass the image through the convolutional blocks
        result = self.deconv_blocks(img)
        
        return result.view(-1, 1) # The dimensions will be (batchsize, 1) where 1 is the prediction (0 or 1)
