In [1]:
#@title Imports and Boilerplate

import urllib.request

import torch

import torch.nn as nn
import tqdm

import numpy as np

import cv2
import torch
import numpy as np
from tqdm import tqdm
from torch import nn
import matplotlib.pyplot as plt

def tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray:
    tensor = tensor * 256
    tensor[tensor > 255] = 255
    tensor[tensor < 0] = 0
    tensor = tensor.type(torch.uint8).permute(1, 2, 0).cpu().numpy()

    return tensor

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
from torch.utils.data import Dataset, DataLoader
import glob
from PIL import Image
import torchvision.transforms as transforms

class DIV2K_valid_HR_dataset(Dataset):
    def __init__(self, path):
        self.path = path
        self.img_list = sorted(glob.glob(self.path + "/*.png"))[:32]
        self.tf = transforms.ToTensor()
    
    def __len__(self):
        return len(self.img_list)
    
    def __getitem__(self, idx):
        img_path = self.img_list[idx]
        img = Image.open(img_path)
        img = img.resize((512,512))
        img_t = self.tf(img)
        return img_t

In [4]:
dataset = DIV2K_valid_HR_dataset(path="DIV2K_valid_HR")
dataloader = DataLoader(dataset=dataset, batch_size=1, shuffle=False)

In [5]:
class GaussianFourierFeatureTransform(torch.nn.Module):
    """
    An implementation of Gaussian Fourier feature mapping.

    "Fourier Features Let Networks Learn High Frequency Functions in Low Dimensional Domains":
       https://arxiv.org/abs/2006.10739
       https://people.eecs.berkeley.edu/~bmild/fourfeat/index.html

    Given an input of size [batches, num_input_channels, width, height],
     returns a tensor of size [batches, mapping_size*2, width, height].
    """

    def __init__(self, num_input_channels, mapping_size=256, scale=10):
        super().__init__()

        self._num_input_channels = num_input_channels
        self._mapping_size = mapping_size
        self._B = torch.randn((num_input_channels, mapping_size)) * scale

    def forward(self, x):
        assert x.dim() == 4, 'Expected 4D input (got {}D input)'.format(x.dim())

        batches, channels, width, height = x.shape

        assert channels == self._num_input_channels,\
            "Expected input to have {} channels (got {} channels)".format(self._num_input_channels, channels)

        # Make shape compatible for matmul with _B.
        # From [B, C, W, H] to [(B*W*H), C].
        x = x.permute(0, 2, 3, 1).reshape(batches * width * height, channels)

        x = x @ self._B.to(x.device)

        # From [(B*W*H), C] to [B, W, H, C]
        x = x.view(batches, width, height, self._mapping_size)
        # From [B, W, H, C] to [B, C, W, H]
        x = x.permute(0, 3, 1, 2)

        x = 2 * np.pi * x
        return torch.cat([torch.sin(x), torch.cos(x)], dim=1)

In [6]:
# https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python

import math
def calculate_psnr(img1, img2):
    # img1 and img2 have range [0, 255]
    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    mse = np.mean((img1 - img2)**2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(255.0 / math.sqrt(mse))

# https://cvnote.ddlee.cc/2019/09/12/psnr-ssim-python
import math
import numpy as np
import cv2

def ssim(img1, img2):
    C1 = (0.01 * 255)**2
    C2 = (0.03 * 255)**2

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    kernel = cv2.getGaussianKernel(11, 1.5)
    window = np.outer(kernel, kernel.transpose())

    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]  # valid
    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2
    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
                                                            (sigma1_sq + sigma2_sq + C2))
    return ssim_map.mean()


def calculate_ssim(img1, img2):
    '''calculate SSIM
    the same outputs as MATLAB's
    img1, img2: [0, 255]
    '''
    if not img1.shape == img2.shape:
        raise ValueError('Input images must have the same dimensions.')
    if img1.ndim == 2:
        return ssim(img1, img2)
    elif img1.ndim == 3:
        if img1.shape[2] == 3:
            ssims = []
            for i in range(3):
                ssims.append(ssim(img1, img2))
            return np.array(ssims).mean()
        elif img1.shape[2] == 1:
            return ssim(np.squeeze(img1), np.squeeze(img2))
    else:
        raise ValueError('Wrong input image dimensions.')

In [7]:
def create_new_model():
    model = nn.Sequential(
            nn.Conv2d(
                256,
                256,
                kernel_size=1,
                padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(256),

            nn.Conv2d(
                256,
                256,
                kernel_size=1,
                padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(256),

            nn.Conv2d(
                256,
                256,
                kernel_size=1,
                padding=0),
            nn.ReLU(),
            nn.BatchNorm2d(256),

            nn.Conv2d(
                256,
                3,
                kernel_size=1,
                padding=0),
            nn.Sigmoid(),

        ).to(device)
    return model

psnr_dict = {}
ssim_dict = {}
import lpips
# Note: this can be done outside of the training loop, since the result at this stage is unchanged during the course of training.
for ind, img in enumerate(tqdm(dataloader)):
    target = img.to(device)
    # Create input pixel coordinates in the unit square. This will be the input to the model.
    coords = np.linspace(0, 1, target.shape[2], endpoint=False)
    xy_grid = np.stack(np.meshgrid(coords, coords), -1)
    xy_grid = torch.tensor(xy_grid).unsqueeze(0).permute(0, 3, 1, 2).float().contiguous().to(device)
    x = GaussianFourierFeatureTransform(2, 128, 10)(xy_grid)
    model = create_new_model()
    optimizer = torch.optim.Adam(list(model.parameters()), lr=1e-4)
    loss_fn_alex = lpips.LPIPS(net='vgg').to(device) # best forward scores
    for epoch in range(2000):
        optimizer.zero_grad()
        
        generated = model(x)

        loss = loss_fn_alex(target, generated)

        loss.backward()
        optimizer.step()

        # if epoch % 100 == 0:
        #     print("epoch %d, Total loss %0.6f" % (epoch, loss))
        #     fig, axes = plt.subplots(1,2, figsize=(12,6))
        #     axes[0].imshow(tensor_to_numpy(target[0]))
        #     axes[1].imshow(tensor_to_numpy(generated[0]))
        #     plt.show()

    psnr_dict[ind] = calculate_psnr(tensor_to_numpy(target[0]),tensor_to_numpy(generated[0]))
    ssim_dict[ind] = calculate_ssim(tensor_to_numpy(target[0]),tensor_to_numpy(generated[0]))
    print(psnr_dict[ind], ssim_dict[ind])

  0%|          | 0/32 [00:00<?, ?it/s]

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Loading model from: /home/intern/anaconda3/envs/fourier/lib/python3.7/site-packages/lpips/weights/v0.1/vgg.pth


  0%|          | 0/32 [06:44<?, ?it/s]


KeyboardInterrupt: 

In [None]:
import torchsummary
torchsummary.summary(model, (256,512,512))

In [None]:
psnr_dict

In [None]:
ssim_dict

In [None]:
psnr_total = 0
for val in psnr_dict.values():
    psnr_total += val
psnr_average = psnr_total / len(psnr_dict)
psnr_average

In [None]:
ssim_total = 0
for val in ssim_dict.values():
    ssim_total += val

ssim_average = ssim_total / len(ssim_dict)
ssim_average

In [None]:
import json

with open('baseline_lpips_vgg_psnr_dict.json', 'w') as fp:
    json.dump(psnr_dict, fp)
with open('baseline_lpips_vgg_ssim_dict.json', 'w') as fp:
    json.dump(ssim_dict, fp)