In [1]:
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vgg19

In [2]:
!nvidia-smi


Wed Jun 19 05:03:39 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
import zipfile
zip_path = '/content/sample_data/humans.zip'
extract_path = '/content/sample_data/human'

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [9]:
from PIL import Image
import os

# Path to your HR images directory
hr_images_dir = '/content/sample_data/human/images'

# Path to where you want to save the LR images
lr_images_dir = '/content/sample_data/human/low/images'

# Define the scaling factor for downsampling
scaling_factor = 4  # Example: HR image size / 4

# Ensure the LR images directory exists
os.makedirs(lr_images_dir, exist_ok=True)

# Iterate through HR images
for filename in os.listdir(hr_images_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        hr_image_path = os.path.join(hr_images_dir, filename)
        hr_image = Image.open(hr_image_path)

        # Resize HR image to obtain LR image using LANCZOS interpolation
        lr_image = hr_image.resize((hr_image.width // scaling_factor, hr_image.height // scaling_factor))

        # Save LR image
        lr_image.save(os.path.join(lr_images_dir, filename.replace('.jpg', '.jpg').replace('.png', '.png')))

In [4]:
import torch
import torch.nn as nn

class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_channels):
        super(DenseBlock, self).__init__()
        self.c1 = nn.Conv2d(in_channels, growth_channels, 3, 1, 1)
        self.c2 = nn.Conv2d(in_channels + growth_channels, growth_channels, 3, 1, 1)
        self.c3 = nn.Conv2d(in_channels + 2 * growth_channels, growth_channels, 3, 1, 1)
        self.c4 = nn.Conv2d(in_channels + 3 * growth_channels, growth_channels, 3, 1, 1)
        self.c5 = nn.Conv2d(in_channels + 4 * growth_channels, in_channels, 3, 1, 1)
        self.leaky_relu = nn.LeakyReLU(0.2, inplace=True)

    def forward(self, x):
        out1 = self.leaky_relu(self.c1(x))
        out = torch.cat([x, out1], dim=1)
        out2 = self.leaky_relu(self.c2(out))
        out = torch.cat([x, out1, out2], dim=1)
        out3 = self.leaky_relu(self.c3(out))
        out = torch.cat([x, out1, out2, out3], dim=1)
        out4 = self.leaky_relu(self.c4(out))
        out = torch.cat([x, out1, out2, out3, out4], dim=1)
        out5 = self.leaky_relu(self.c5(out))
        return x + out5

class RRDB(nn.Module):
    def __init__(self, in_channels=3, growth_channels=16, beta=0.2):
        super(RRDB, self).__init__()
        self.dense1 = DenseBlock(in_channels, growth_channels)
        self.dense2 = DenseBlock(in_channels, growth_channels)
        self.dense3 = DenseBlock(in_channels, growth_channels)
        self.beta = beta

    def forward(self, x):
        residual = x
        out1 = x + self.dense1(x) * self.beta
        out2 = x + self.dense2(out1) * self.beta
        out3 = x + self.dense3(out2) * self.beta
        return out3 + residual

class Generator(nn.Module):
    def __init__(self, in_channels=3, num_blocks=8, growth_channels=16):
        super(Generator, self).__init__()
        self.initial_conv = nn.Conv2d(in_channels, growth_channels, 3, 1, 1)
        self.blocks = nn.Sequential(*[RRDB(growth_channels, growth_channels) for _ in range(num_blocks)])
        self.conv = nn.Conv2d(growth_channels, growth_channels, 3, 1, 1)
        self.upsample = nn.Sequential(
            nn.Conv2d(growth_channels, growth_channels * 4, 3, 1, 1),
            nn.PixelShuffle(2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(growth_channels, growth_channels * 4, 3, 1, 1),
            nn.PixelShuffle(2),
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.final_conv = nn.Conv2d(growth_channels, in_channels, 3, 1, 1)

    def forward(self, x):
        x = self.initial_conv(x)
        residual = x
        x = self.blocks(x)
        x = self.conv(x) + residual
        x = self.upsample(x)
        return self.final_conv(x)




In [5]:
class Disc_Conv(nn.Module):
    def __init__(self, in_channels=3, out_channels=64//4):
        super(Disc_Conv, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.LeakyReLU(0.2, inplace=True)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        return x


class Discriminator(nn.Module):
    def __init__(self, in_channels=3, input_size=64//4):
        super(Discriminator, self).__init__()
        self.conv = nn.Conv2d(in_channels, 64//4, 3, 1, 1)
        self.leaky_relu = nn.LeakyReLU(0.2, inplace=True)
        self.block1 = Disc_Conv(in_channels=64//4, out_channels=64//4)
        self.block2 = Disc_Conv(in_channels=64//4, out_channels=128//4)
        self.block3 = Disc_Conv(in_channels=128//4, out_channels=128//4)
        self.block4 = Disc_Conv(in_channels=128//4, out_channels=256//4)
        self.block5 = Disc_Conv(in_channels=256//4, out_channels=256//4)
        self.block6 = Disc_Conv(in_channels=256//4, out_channels=512//4)
        self.block7 = Disc_Conv(in_channels=512//4, out_channels=512//4)

        self.feature_size = 512 * (input_size // 8) * (input_size // 8)

        self.dense1 = nn.Linear(self.feature_size, 1024//4)
        self.dense2 = nn.Linear(1024//4, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.leaky_relu(self.conv(x))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)
        x = self.block7(x)
        x = x.view(-1, self.feature_size)  # Flatten the tensor
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.sigmoid(x)
        return x

In [6]:
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Loss functions and optimizers
l1_loss = nn.L1Loss().to(device)
vgg = vgg19(pretrained=True).features[:35].eval().to(device)
for param in vgg.parameters():
    param.requires_grad = False

adversarial_loss = nn.BCEWithLogitsLoss().to(device)
gen_optimizer = optim.Adam(generator.parameters(), lr=1e-4)
disc_optimizer = optim.Adam(discriminator.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:06<00:00, 88.8MB/s]


In [7]:
def train_step(real_high_res, real_low_res):
    real_high_res = real_high_res.to(device)
    real_low_res = real_low_res.to(device)

    # Training Discriminator
    disc_optimizer.zero_grad()
    fake_high_res = generator(real_low_res)
    real_output = discriminator(real_high_res)
    fake_output = discriminator(fake_high_res.detach())
    real_loss = adversarial_loss(real_output, torch.ones_like(real_output))
    fake_loss = adversarial_loss(fake_output, torch.zeros_like(fake_output))
    disc_loss = (real_loss + fake_loss) / 2
    disc_loss.backward()
    disc_optimizer.step()

    # Training Generator
    gen_optimizer.zero_grad()
    fake_output = discriminator(fake_high_res)
    adv_loss = adversarial_loss(fake_output, torch.ones_like(fake_output))
    perc_loss = l1_loss(vgg(fake_high_res), vgg(real_high_res))
    pixel_loss = l1_loss(fake_high_res, real_high_res)
    gen_loss = pixel_loss + 0.001 * adv_loss + 0.006 * perc_loss
    gen_loss.backward()
    gen_optimizer.step()

    return disc_loss.item(), gen_loss.item()


In [10]:
from PIL import Image
import os
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# Define transforms for loading images
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts PIL Image to tensor
])

# Assuming you have defined your generator, discriminator, optimizers, loss functions, and other necessary components

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    for filename in os.listdir(hr_images_dir):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            hr_image_path = os.path.join(hr_images_dir, filename)
            lr_image_path = os.path.join(lr_images_dir, filename)

            # Load high-resolution and low-resolution images
            real_high_res = Image.open(hr_image_path).convert('RGB')
            real_low_res = Image.open(lr_image_path).convert('RGB')

            # Apply transforms
            real_high_res = transform(real_high_res)
            real_low_res = transform(real_low_res)

            # Add batch dimension (if necessary)
            real_high_res = real_high_res.unsqueeze(0)
            real_low_res = real_low_res.unsqueeze(0)

            # Move to device
            real_high_res = real_high_res.to(device)
            real_low_res = real_low_res.to(device)

            # Call train_step function
            disc_loss, gen_loss = train_step(real_high_res, real_low_res)

            print(f"Epoch [{epoch}/{num_epochs}], Discriminator Loss: {disc_loss}, Generator Loss: {gen_loss}")

# Save trained models
torch.save(generator.state_dict(), 'generator_final.pt')
torch.save(discriminator.state_dict(), 'discriminator_final.pt')


Epoch [0/20], Discriminator Loss: 0.7218595147132874, Generator Loss: 9.721864700317383
Epoch [0/20], Discriminator Loss: 0.7069230079650879, Generator Loss: 6.63599157333374
Epoch [0/20], Discriminator Loss: 0.6956802606582642, Generator Loss: 7.213558197021484
Epoch [0/20], Discriminator Loss: 0.6889640688896179, Generator Loss: 6.548274993896484
Epoch [0/20], Discriminator Loss: 0.6838451623916626, Generator Loss: 7.5840067863464355
Epoch [0/20], Discriminator Loss: 0.682355523109436, Generator Loss: 5.958262920379639
Epoch [0/20], Discriminator Loss: 0.6843824982643127, Generator Loss: 5.4914326667785645
Epoch [0/20], Discriminator Loss: 0.6764742732048035, Generator Loss: 5.216032028198242
Epoch [0/20], Discriminator Loss: 0.6808547973632812, Generator Loss: 4.4762349128723145
Epoch [0/20], Discriminator Loss: 0.6813269853591919, Generator Loss: 4.0645751953125
Epoch [0/20], Discriminator Loss: 0.665312647819519, Generator Loss: 4.329738616943359
Epoch [0/20], Discriminator Loss: 

In [11]:
generator.eval()

Generator(
  (initial_conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (blocks): Sequential(
    (0): RRDB(
      (dense1): DenseBlock(
        (c1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c3): Conv2d(48, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c4): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c5): Conv2d(80, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (leaky_relu): LeakyReLU(negative_slope=0.2, inplace=True)
      )
      (dense2): DenseBlock(
        (c1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c2): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c3): Conv2d(48, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c4): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (c5): Con

In [12]:
import zipfile
zip_path = '/content/sample_data/valid.zip'
extract_path = '/content/sample_data/valid'

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [13]:
import numpy as np
import skimage.metrics
from PIL import Image

def calculate_psnr(img1, img2):
    img1 = np.array(img1)
    img2 = np.array(img2)
    return skimage.metrics.peak_signal_noise_ratio(img1, img2)

def calculate_ssim(img1, img2):
    img1 = np.array(img1)
    img2 = np.array(img2)
    return skimage.metrics.structural_similarity(img1, img2, multichannel=True)

# Evaluate images
psnr_values = []
ssim_values = []

In [15]:
from torchvision.transforms import ToTensor, ToPILImage
# Define paths
hr_images_dir = '/content/sample_data/valid/valid/images'
lr_images_dir = '/content/sample_data/valid/valid/low/images'
sr_images_dir = '/content/sample_data/valid/valid/sr/images'

# Ensure the SR images directory exists
os.makedirs(sr_images_dir, exist_ok=True)

# Load the generator model
# generator = ...  # Your generator model
# generator.load_state_dict(torch.load('generator_final.pt'))
generator.eval()  # Set to evaluation mode

# Define transform
transform = ToTensor()
to_pil_image = ToPILImage()

# Evaluate images
psnr_values = []
ssim_values = []

for filename in os.listdir(lr_images_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        hr_image_path = os.path.join(hr_images_dir, filename)
        lr_image_path = os.path.join(lr_images_dir, filename)

        # Generate super-resolution image
        with torch.no_grad():
            lr_image = Image.open(lr_image_path).convert('RGB')
            lr_tensor = transform(lr_image).unsqueeze(0).to(device)

            sr_tensor = generator(lr_tensor).squeeze(0).cpu()
            sr_image = to_pil_image(sr_tensor)
            sr_image_path = os.path.join(sr_images_dir, filename.replace('.jpg', '.jpg').replace('.png', '.png'))
            sr_image.save(sr_image_path)

            # Evaluate PSNR and SSIM
            hr_image = Image.open(hr_image_path).convert('RGB')

            psnr = calculate_psnr(hr_image, sr_image)
            ssim = calculate_ssim(hr_image, sr_image)

            psnr_values.append(psnr)
            ssim_values.append(ssim)
            print(f"Image: {filename}, PSNR: {psnr}, SSIM: {ssim}")

avg_psnr = np.mean(psnr_values)
avg_ssim = np.mean(ssim_values)
print(f"Average PSNR: {avg_psnr}, Average SSIM: {avg_ssim}")

  return skimage.metrics.structural_similarity(img1, img2, multichannel=True)


Image: UN0682820-JPG_webp.rf.20c6bee6dd398a988376e99e039c6380.jpg, PSNR: 18.034058721498205, SSIM: 0.555694868242597
Image: download_jpg.rf.428a85a7993641189f88d75818111ac7.jpg, PSNR: 15.740658907522448, SSIM: 0.5517235977321402
Image: PF_22-03-02_indiaGender_featured_webp.rf.01fcc4223e680d7c9fb7a0b196201225.jpg, PSNR: 15.393739353133618, SSIM: 0.4683911275210642
Image: Christian-family-in-Orissa_jpg.rf.8d884dfe61bef2e31e42b3c24b8efce5.jpg, PSNR: 16.90600840287153, SSIM: 0.44903585106467947
Image: p1-29_jpg.rf.254f918dcd65ec4bf17e365194fdfaa8.jpg, PSNR: 18.06007249936845, SSIM: 0.5094387264877781
Image: family-four-smile-camera-260nw-559949731_webp.rf.4f6401b7fd5939e43528fa14258e4942.jpg, PSNR: 15.844587619957055, SSIM: 0.5732783588997378
Image: com-kRY-621x414-LiveMint_webp.rf.49e666e4a2b61358a91ad5319175c259.jpg, PSNR: 16.26274092598369, SSIM: 0.4744612084735739
Image: images-4-_jpg.rf.747a36c69c02e514cbb80c666d567431.jpg, PSNR: 15.582027632774318, SSIM: 0.6109427379726219
Image: ima