In [1]:
import torch 
import torch.nn as nn
import numpy as np 
import os
from PIL import Image
from torchvision.transforms import v2
import torchvision.transforms as t
from sklearn.model_selection import train_test_split

#a list of resized images from the dataset in the form of tensors
images = [] 

transform = t.Compose([
    t.ToTensor(),
    v2.Resize(size=(500, 500)),
    v2.Grayscale()])

#transforming each image in the directory, appending the images to the images list
for root, dirs, files in os.walk("/kaggle/input/iam-handwriting-top50/data_subset/data_subset", topdown=False):
    for name in files:
        im = Image.open(os.path.join(root, name))
        im = transform(im)
        images.append(im)



In [2]:
#making sure the values are between 0 and 1
print(images[1])

tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]])


In [3]:
#splitting the images list into train, test sets
X_train, X_test = train_test_split(images[:3000], test_size=0.3)
print(X_train[1].size())

torch.Size([1, 784, 784])


In [4]:
!nvidia-smi

Wed Jan 10 19:00:50 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0              25W / 250W |      0MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [79]:
class Generator(nn.Module):
    def __init__(self, z_dim: int, out_size=784, lin1_channels: int=264, output_channels: int=1, device: str='cuda'):
        """
        The constructor of the Generator part of the network.

        z_dim: the dimension of the input noise vector
        lin1_hw: the height, width of the feature map produced by the first, dense layer. defaults to 4
        lin1_channels: the number of channels in the output of the first, dense layer. defaults to 128
        output_channels: the number of channels in the output of the network. defaults to 1 
        device: the device to send the calculations to. defaults to 'cuda' 
        """
        super(Generator, self).__init__()  

        self.z_dim = z_dim
        self.lin1_channels = lin1_channels
        self.output_channels = output_channels
        self.device = device

        self.G = nn.Sequential(
            nn.Linear(z_dim, lin1_channels, True, self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(lin1_channels, lin1_channels * 2, True, self.device),
            nn.LeakyReLU(0.1),
            
            nn.Dropout(0.5),
            
            nn.Linear(lin1_channels * 2, lin1_channels * 3, True, self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(lin1_channels * 3, out_size, True, self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(out_size, out_size, True, self.device),
            nn.Tanh()
        )

    def forward(self, z: torch.Tensor) -> torch.Tensor:
        """
        The forward pass of the network.

        returns a tensor (an image).
        """
    
        return self.G(z)

In [80]:
class Discriminator(nn.Module):
    def __init__(self, input_size:int=784, conv1_channels: int=64, device: str='cuda'):
        """
        The constructor of the Discriminator part of the network.
        
        input_channels: the number of channels in the input. defaults to 1
        conv1_channels: the number of channels in the output of the first conv layer. defaults to 64
        device: the device to perform the calculations on. defaults to 'cuda'
        """ 
        super(Discriminator, self).__init__()

        self.input_size = input_size
        self.conv1_channels = conv1_channels
        self.device = device

        self.D = nn.Sequential(
            nn.Linear(input_size, 128, device=self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(128, 100, device=self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(100, 64, device=self.device),
            nn.LeakyReLU(0.1),
            
            nn.Linear(64, 1, device=self.device),
            nn.Sigmoid()
        )

    
    def forward(self, x):
        return self.D(x)

In [81]:
noise_dim = 784

#generating a random noise matrix (a vector simply doesn't work)
z = torch.randn(noise_dim, noise_dim, device='cuda')
print(z.shape)

torch.Size([784, 784])


In [82]:
torch.cuda.empty_cache()

In [83]:
from torch.optim.adam import Adam

optimizerG = Adam(G.parameters(), lr=1e-5, betas=[0.9, 0.9])
optimizerD = Adam(D.parameters(), lr=1e-5, betas=[0.9, 0.9])

loss = nn.BCELoss()

In [101]:
train_iters = 1000

torch.autograd.set_detect_anomaly(True)

for epoch in range(train_iters):
    ### TRAINING D ON A REAL IMAGE ###
    
    z = torch.randn(noise_dim, noise_dim, device='cuda')
    
    idx = np.random.randint(len(X_train))
    
    D.zero_grad()
    
    real_image = X_train[idx].to('cuda')
        
    output_real = D.forward(real_image)
    print(output_real.size())
    
    label_real = torch.ones_like(output_real)
    
    loss_D_real = loss(output_real, label_real)
    
    loss_D_real.backward()
    
    
    ### TRAINING D ON A FAKE IMAGE ###
    fake = G.forward(z)
    
    output_fake = D.forward(fake)
    
    D_inp_fake = torch.zeros_like(output_fake)

    loss_D_fake = loss(output_fake, D_inp_fake)

    loss_D = loss_D_real + loss_D_fake

    optimizerD.step()
    
    
    ### TRAINING G ###
    
    G.zero_grad()
    
    output_D_for_G = D.forward(fake)

    label_fake = torch.zeros_like(output)
    
    loss_G = loss(output_D_for_G, label_fake)
    
    loss_G.backward()
    
    optimizerG.step()

torch.Size([1, 784, 1])


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [784, 528]], which is output 0 of LeakyReluBackward1, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!