In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, utils

## 0. Convert all image datas to jpeg format

In [None]:
import os
from PIL import Image

def convert_images_to_jpeg(input_directory, output_directory, target_size=(256*2, 256*2)):
    """Resizes all images in a directory to the target size and saves them in another directory.

    Args:
        input_directory: The path to the directory containing the images to resize.
        output_directory: The path to the directory to save the resized images.
        target_size: The desired target size of the images.
    """

    os.makedirs(output_directory, exist_ok=True)

    for file in os.listdir(input_directory):
        if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"):
            input_image_path = os.path.join(input_directory, file)
            output_image_path = os.path.join(output_directory, file)
            
            image = Image.open(input_image_path)
            resized_image = image.resize(target_size, Image.LANCZOS)

            if resized_image.mode != 'RGB':
                resized_image = resized_image.convert('RGB')

            resized_image.save(output_image_path, format='JPEG')


In [None]:
convert_images_to_jpeg("mountain_dataset", "resized_dataset")

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"
print(f"{device} is set")

In [None]:
def load_dataset(data_path, transform):
    train_dataset = torchvision.datasets.DatasetFolder(
        root=data_path,
        loader=torchvision.datasets.folder.default_loader,
        transform=transform
    )
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        num_workers=0,
        shuffle=True
    )
    return train_loader
    

# Model trained on 4K usual resolution images

## 1. Neural Networks

### 1.0 Discriminator

In [None]:
class Discriminator(nn.Module):
    def __init__(self, img_dim):
        super().__init__()
        self.disc = nn.Sequential(
            nn.Linear(img_dim, 1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.disc(x)
        


### 1.1 Generator

In [None]:
class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super().__init__()
        self.gen = nn.Sequential(
            nn.Linear(z_dim, 512),
            nn.LeakyReLU(0.1),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.1),
            nn.Linear(1024, img_dim),
            nn.Tanh()
        )

    def forward(self, x):
        return self.gen(x)


In [None]:
lr = 0.001 #1e-4
z_dim = 64
image_dim = 256 * 256 * 3
batch_size = 32
num_epochs = 80


In [None]:
disc = Discriminator(image_dim).to(device)
gen = Generator(z_dim, image_dim).to(device)
fixed_noise = torch.randn((batch_size, z_dim)).to(device)


### 2. Load Custom Data

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.image_files = [f for f in os.listdir(data_path) if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png')]
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.data_path, self.image_files[idx])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

data_path = "resized_dataset"
dataset = CustomDataset(data_path, transform=torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
]))


### 2.1 Generate loader, Discriminator optimizer, Generator optimizer and Loss function

In [None]:

loader = DataLoader(dataset, batch_size=32, shuffle=True, pin_memory=True)
opt_disc = optim.Adam(disc.parameters(), lr=lr)
opt_gen = optim.Adam(gen.parameters(), lr=lr)
criterion = nn.BCELoss()


## 3.Training

In [None]:
import os
import random
import tqdm

from torchvision.utils import save_image

step = 0

for epoch in range(num_epochs):
    for batch_idx, real in enumerate(loader):
        real = real.view(-1, image_dim).to(device)
        batch_size = real.shape[0]

        ### Train Discriminator: max log(D(real)) + log(1 - D(G(z)))
        noise = torch.randn(batch_size, z_dim).to(device)
        fake = gen(noise)
        disc_real = disc(real).view(-1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))

        disc_fake = disc(fake).view(-1)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake) / 2

        disc.zero_grad()
        lossD.backward(retain_graph=True)
        opt_disc.step()

        ### Train Generator maximize log(D(G(z)))
        output = disc(fake).view(-1)
        lossG = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        lossG.backward()
        opt_gen.step()

        
        if batch_idx == 0:
            print(
                f"Epoch: [{epoch+1}/{num_epochs}], Loss D: {lossD:.4f}, Loss G: {lossG:.4f}"
            )

            with torch.no_grad():
                index = random.randint(0, batch_size-1)
                fake = gen(fixed_noise[index]).reshape(3, 256, 256)
                save_image(fake, f"generated_images/epoch{epoch}.png", normalize=True)
                step += 1
                
                

In [None]:
print(
    f"Discriminator Parameters:\t{sum(p.numel() for p in disc.parameters())}\n"
    f"Generator Parameters:\t\t{sum(p.numel() for p in gen.parameters())}"
)

## 4.Generate example image

In [None]:
import torch
from torchvision.utils import save_image
from PIL import Image

# Assuming you have 'gen' (generator) already defined and trained
generator = gen

# Set the device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Generate images using random noise
num_images = 10  # Number of images to generate
z_dim = 64      # Size of the generator's input noise vector

# Generate random noise
noise = torch.randn(num_images, z_dim).to(device)

# Generate fake images
fake_images = generator(noise)

# Create output directory if it doesn't exist
output_dir = 'generated_images'

# Save generated images
from torchvision import utils

def convert_tensor_to_image(tensor):
    """Converts a torch tensor to a PIL Image."""
    image = transforms.ToPILImage()(tensor.reshape(3, 256, 256))
    return image

# Convert the fake images to PIL Images
image_names = [f'generated_image_{i+1}.png' for i in range(num_images)]

pil_images = [convert_tensor_to_image(image) for image in fake_images]

# Apply the custom transformations
preprocessed_images = [transform(image) for image in pil_images]

# Save the preprocessed images
for image, name in zip(preprocessed_images, image_names):
    utils.save_image(image, os.path.join(output_dir, name))

print(f'{num_images} images generated and saved in {output_dir}')


## 5.Save & Load Model

In [None]:
model_dir = 'saved_model/'
torch.save(gen.state_dict(), os.path.join(model_dir, 'final_generator.pth'))
torch.save(disc.state_dict(), os.path.join(model_dir, 'final_discriminator.pth'))

# Model trained on 13GB of high resolution images

## More complex Discriminator and Generator

* Training on 768x768 high resolution images
* More complex Discriminator
* Longer training
* z_dim = 100
* Different Loss Function, batch size, learning rate

In [40]:
device="cuda" if torch.cuda.is_available() else "cpu"
print(f"{device} is set")

cuda is set


In [63]:
class Discriminator(nn.Module):
    def __init__(self, img_dim):
        super().__init__()
        self.disc = nn.Sequential(
            nn.Linear(img_dim, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 2),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.disc(x)

class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super().__init__()
        self.gen = nn.Sequential(
            nn.Linear(z_dim, 512),    # Input: z_dim, Output: 1024
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1024),    # Input: 1024, Output: 2048
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 2048),    # Input: 2048, Output: 4096
            nn.LeakyReLU(0.2),
            nn.Linear(2048, 1024+512),    # Input: 4096, Output: 8192
            nn.LeakyReLU(0.2),
            nn.Linear(1024+512, 1024+512+256),   # Input: 8192, Output: 16384
            nn.LeakyReLU(0.2),
            nn.Linear(1024+512+256, 1024),  # Input: 16384, Output: 32768
            nn.LeakyReLU(0.2),
            nn.Linear(1024, 512),  # Input: 16384, Output: 32768
            nn.LeakyReLU(0.2),
            nn.Linear(512, img_dim), # Input: 32768, Output: img_dim
            nn.Tanh()
        )

    def forward(self, x):
        return self.gen(x)


In [None]:
import os
from PIL import Image

def convert_images_to_jpeg(input_directory, output_directory, target_size=(512, 512)):
    """Resizes all images in a directory to the target size and saves them in another directory.

    Args:
        input_directory: The path to the directory containing the images to resize.
        output_directory: The path to the directory to save the resized images.
        target_size: The desired target size of the images.
    """

    os.makedirs(output_directory, exist_ok=True)

    for file in os.listdir(input_directory):
        if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"):
            input_image_path = os.path.join(input_directory, file)
            output_image_path = os.path.join(output_directory, file)
            
            image = Image.open(input_image_path)
            resized_image = image.resize(target_size, Image.LANCZOS)

            # if resized_image.mode != 'RGB':
            #     resized_image = resized_image.convert('RGB')

            resized_image.save(output_image_path, format='JPEG')
convert_images_to_jpeg("mountain_dataset\dataset", "resized_dataset")

In [64]:
lr = 0.00002
z_dim = 100
image_dim = 256 * 256 * 3
batch_size = 32
num_epochs = 100

In [65]:
disc = Discriminator(image_dim).to(device)
gen = Generator(z_dim, image_dim).to(device)
fixed_noise = torch.randn((batch_size, z_dim)).to(device)

In [66]:
import os
class CustomDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.image_files = [f for f in os.listdir(data_path) if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png')]
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = os.path.join(self.data_path, self.image_files[idx])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

data_path = "resized_dataset"
dataset = CustomDataset(data_path, transform=torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
]))

In [67]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
])

In [68]:
loader = DataLoader(dataset, batch_size=64, shuffle=True, pin_memory=True)
opt_disc = optim.Adam(disc.parameters(), lr=lr)
opt_gen = optim.Adam(gen.parameters(), lr=lr)
criterion = nn.BCELoss()

In [69]:
print(
    f"Discriminator Parameters:\t{sum(p.numel() for p in disc.parameters())}\n"
    f"Generator Parameters:\t\t{sum(p.numel() for p in gen.parameters())}"
)

Discriminator Parameters:	101484674
Generator Parameters:		111798528


In [70]:
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    
    # Print the available GPU devices
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPUs available. Using CPU.")


GPU 0: NVIDIA GeForce RTX 3080 Laptop GPU


In [71]:
import os
import random
import tqdm
from PIL import Image
from torchvision.utils import save_image

step = 0

for epoch in range(num_epochs):
    for batch_idx, real in enumerate(loader):
        real = real.view(-1, image_dim).to(device)
        batch_size = real.shape[0]

        ### Train Discriminator: max log(D(real)) + log(1 - D(G(z)))
        noise = torch.randn(batch_size, z_dim).to(device)
        fake = gen(noise)
        disc_real = disc(real).view(-1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))

        disc_fake = disc(fake).view(-1)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake) / 2

        disc.zero_grad()
        lossD.backward(retain_graph=True)
        opt_disc.step()

        ### Train Generator maximize log(D(G(z)))
        output = disc(fake).view(-1)
        lossG = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        lossG.backward()
        opt_gen.step()

        
        if batch_idx == 0:
            print(
                f"Epoch: [{epoch+1}/{num_epochs}], Loss D: {lossD:.4f}, Loss G: {lossG:.4f}"
            )

Epoch: [1/100], Loss D: 0.6938, Loss G: 0.7250
Epoch: [2/100], Loss D: 0.0884, Loss G: 1.9636
Epoch: [3/100], Loss D: 0.0241, Loss G: 3.6243
Epoch: [4/100], Loss D: 0.0000, Loss G: 100.0000
Epoch: [5/100], Loss D: 0.0000, Loss G: 100.0000
Epoch: [6/100], Loss D: 0.0000, Loss G: 100.0000


KeyboardInterrupt: 

In [72]:
import torch
from torchvision.utils import save_image
from PIL import Image

# Assuming you have 'gen' (generator) already defined and trained
generator = gen

# Generate images using random noise
num_images = 10  # Number of images to generate
z_dim = 100      # Size of the generator's input noise vector

# Generate random noise
noise = torch.randn(num_images, z_dim).to(device)

# Generate fake images
fake_images = generator(noise)

# Create output directory if it doesn't exist
output_dir = 'generated_images'

# Save generated images
from torchvision import utils

def convert_tensor_to_image(tensor):
    """Converts a torch tensor to a PIL Image."""
    image = transforms.ToPILImage()(tensor.reshape(3, 256, 256))
    return image

# Convert the fake images to PIL Images
image_names = [f'generated_image_{i+1}.png' for i in range(num_images)]

pil_images = [convert_tensor_to_image(image) for image in fake_images]

# Apply the custom transformations
preprocessed_images = [transform(image) for image in pil_images]

# Save the preprocessed images
for image, name in zip(preprocessed_images, image_names):
    utils.save_image(image, os.path.join(output_dir, name))

print(f'{num_images} images generated and saved in {output_dir}')


10 images generated and saved in generated_images


In [73]:
model_dir = 'saved_model/'
os.makedirs(model_dir)
torch.save(gen.state_dict(), os.path.join(model_dir, 'final_generator_90k.pth'))
torch.save(disc.state_dict(), os.path.join(model_dir, 'final_discriminator_90k.pth'))