In [1]:
#example loads the CAR ISR and attempts to run inference
#!pip install torch

In [10]:
# %load CAR/modules.py
import functools
import numpy as np
import os
import torch
import torch.nn as nn
import requests
from PIL import Image, ImageOps
import numpy as np


LEAKY_FACTOR = 0.2
MULT_FACTOR = 1


# TEST PASSED
class PixelUnShuffle(nn.Module):
    """
    Inverse process of pytorch pixel shuffle module
    """
    def __init__(self, down_scale):
        """
        :param down_scale: int, down scale factor
        """
        super(PixelUnShuffle, self).__init__()

        if not isinstance(down_scale, int):
            raise ValueError('Down scale factor must be a integer number')
        self.down_scale = down_scale

    def forward(self, input):
        """
        :param input: tensor of shape (batch size, channels, height, width)
        :return: tensor of shape(batch size, channels * down_scale * down_scale, height / down_scale, width / down_scale)
        """
        b, c, h, w = input.size()
        assert h % self.down_scale == 0
        assert w % self.down_scale == 0

        oc = c * self.down_scale ** 2
        oh = int(h / self.down_scale)
        ow = int(w / self.down_scale)

        output_reshaped = input.reshape(b, c, oh, self.down_scale, ow, self.down_scale)
        output = output_reshaped.permute(0, 1, 3, 5, 2, 4).reshape(b, oc, oh, ow)

        return output


class DownsampleBlock(nn.Module):
    def __init__(self, scale, input_channels, output_channels, ksize=1):
        super(DownsampleBlock, self).__init__()
        self.downsample = nn.Sequential(
            PixelUnShuffle(scale),
            nn.Conv2d(input_channels * (scale ** 2), output_channels, kernel_size=ksize, stride=1, padding=ksize//2)
        )

    def forward(self, input):
        return self.downsample(input)


class UpsampleBlock(nn.Module):
    def __init__(self, scale, input_channels, output_channels, ksize=1):
        super(UpsampleBlock, self).__init__()
        self.upsample = nn.Sequential(
            nn.Conv2d(input_channels, output_channels * (scale ** 2), kernel_size=1, stride=1, padding=ksize//2),
            nn.PixelShuffle(scale)
        )

    def forward(self, input):
        return self.upsample(input)


class ResidualBlock(nn.Module):
    def __init__(self, input_channels, channels, ksize=3,
                 use_instance_norm=False, affine=False):
        super(ResidualBlock, self).__init__()
        self.channels = channels
        self.ksize = ksize
        padding = self.ksize // 2
        if use_instance_norm:
            self.transform = nn.Sequential(
                nn.ReflectionPad2d(padding),
                nn.Conv2d(input_channels, channels, kernel_size=self.ksize, stride=1),
                nn.InstanceNorm2d(channels, affine=affine),
                nn.LeakyReLU(0.2),
                nn.ReflectionPad2d(padding),
                nn.Conv2d(channels, channels, kernel_size=self.ksize, stride=1),
                nn.InstanceNorm2d(channels)
            )
        else:
            self.transform = nn.Sequential(
                nn.ReflectionPad2d(padding),
                nn.Conv2d(input_channels, channels, kernel_size=self.ksize, stride=1),
                nn.LeakyReLU(0.2),
                nn.ReflectionPad2d(padding),
                nn.Conv2d(channels, channels, kernel_size=self.ksize, stride=1),
            )

    def forward(self, input):
        return input + self.transform(input) * MULT_FACTOR


class NormalizeBySum(nn.Module):
    def forward(self, x):
        return x / torch.sum(x, dim=1, keepdim=True).clamp(min=1e-7)


class MeanShift(nn.Conv2d):
    def __init__(self, rgb_range, rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):
        super(MeanShift, self).__init__(3, 3, kernel_size=1)
        std = torch.Tensor(rgb_std)
        self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
        self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
        for p in self.parameters():
            p.requires_grad = False


class DSN(nn.Module):
    def __init__(self, k_size, input_channels=3, scale=4):
        super(DSN, self).__init__()

        self.k_size = k_size

        self.sub_mean = MeanShift(1)

        self.ds_1 = nn.Sequential(
            nn.ReflectionPad2d(2),
            nn.Conv2d(input_channels, 64, 5),
            nn.LeakyReLU(LEAKY_FACTOR)
        )

        self.ds_2 = DownsampleBlock(2, 64, 128, ksize=1)
        self.ds_4 = DownsampleBlock(2, 128, 128, ksize=1)

        res_4 = list()
        for idx in range(5):
            res_4 += [ResidualBlock(128, 128)]
        self.res_4 = nn.Sequential(*res_4)

        self.ds_8 = DownsampleBlock(2, 128, 256)

        self.kernels_trunk = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            UpsampleBlock(8 // scale, 256, 256, ksize=1),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU()
        )

        self.kernels_weight = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, k_size ** 2, 3)
        )

        self.offsets_trunk = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            UpsampleBlock(8 // scale, 256, 256, ksize=1),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU()
        )

        self.offsets_h_generation = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, k_size ** 2, 3),
            nn.Tanh()
        )

        self.offsets_v_generation = nn.Sequential(
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, 256, 3),
            nn.ReLU(),
            nn.ReflectionPad2d(1),
            nn.Conv2d(256, k_size ** 2, 3),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.sub_mean(x)

        x = self.ds_1(x)
        x = self.ds_2(x)
        x = self.ds_4(x)
        x = x + self.res_4(x)
        x = self.ds_8(x)

        kt = self.kernels_trunk(x)
        k_weight = torch.clamp(self.kernels_weight(kt), min=1e-6, max=1)
        kernels = k_weight / torch.sum(k_weight, dim=1, keepdim=True).clamp(min=1e-6)

        ot = self.offsets_trunk(x)
        offsets_h = self.offsets_h_generation(ot)
        offsets_v = self.offsets_v_generation(ot)

        return kernels, offsets_h, offsets_v


In [11]:
# %load CAR/EDSR/common.py
import math

import torch
import torch.nn as nn
import torch.nn.functional as F


def default_conv(in_channels, out_channels, kernel_size, bias=True):
    return nn.Conv2d(
        in_channels, out_channels, kernel_size,
        padding=(kernel_size // 2), bias=bias)


class MeanShift(nn.Conv2d):
    def __init__(self, rgb_range, rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):
        super(MeanShift, self).__init__(3, 3, kernel_size=1)
        std = torch.Tensor(rgb_std)
        self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
        self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
        for p in self.parameters():
            p.requires_grad = False


class BasicBlock(nn.Sequential):
    def __init__(
            self, conv, in_channels, out_channels, kernel_size, stride=1, bias=False,
            bn=True, act=nn.ReLU(True)):

        m = [conv(in_channels, out_channels, kernel_size, bias=bias)]
        if bn:
            m.append(nn.BatchNorm2d(out_channels))
        if act is not None:
            m.append(act)

        super(BasicBlock, self).__init__(*m)


class ResBlock(nn.Module):
    def __init__(
            self, conv, n_feats, kernel_size,
            bias=True, bn=False, act=nn.ReLU(True), res_scale=1):

        super(ResBlock, self).__init__()
        m = []
        for i in range(2):
            m.append(conv(n_feats, n_feats, kernel_size, bias=bias))
            if bn:
                m.append(nn.BatchNorm2d(n_feats))
            if i == 0:
                m.append(act)

        self.body = nn.Sequential(*m)
        self.res_scale = res_scale

    def forward(self, x):
        res = self.body(x).mul(self.res_scale)
        res += x

        return res


class Upsampler(nn.Sequential):
    def __init__(self, conv, scale, n_feats, bn=False, act=False, bias=True):

        m = []
        if (scale & (scale - 1)) == 0:  # Is scale = 2^n?
            for _ in range(int(math.log(scale, 2))):
                m.append(conv(n_feats, 4 * n_feats, 3, bias))
                m.append(nn.PixelShuffle(2))
                if bn:
                    m.append(nn.BatchNorm2d(n_feats))
                if act == 'relu':
                    m.append(nn.ReLU(True))
                elif act == 'prelu':
                    m.append(nn.PReLU(n_feats))

        elif scale == 3:
            m.append(conv(n_feats, 9 * n_feats, 3, bias))
            m.append(nn.PixelShuffle(3))
            if bn:
                m.append(nn.BatchNorm2d(n_feats))
            if act == 'relu':
                m.append(nn.ReLU(True))
            elif act == 'prelu':
                m.append(nn.PReLU(n_feats))
        else:
            raise NotImplementedError

        super(Upsampler, self).__init__(*m)


In [12]:
# %load CAR/utils.py
import numpy as np
import torch
from scipy import signal
from PIL import Image


def matlab_style_gauss2D(shape=(3, 3), sigma=0.5):
    """
    2D gaussian mask - should give the same result as MATLAB's fspecial('gaussian',[shape],[sigma])
    Acknowledgement : https://stackoverflow.com/questions/17190649/how-to-obtain-a-gaussian-filter-in-python (Author@ali_m)
    """
    m, n = [(ss - 1.) / 2. for ss in shape]
    y, x = np.ogrid[-m:m + 1, -n:n + 1]
    h = np.exp(-(x * x + y * y) / (2. * sigma * sigma))
    h[h < np.finfo(h.dtype).eps * h.max()] = 0
    sumh = h.sum()
    if sumh != 0:
        h /= sumh
    return h


def calc_ssim(X, Y, sigma=1.5, K1=0.01, K2=0.03, R=255):
    '''
    X : y channel (i.e., luminance) of transformed YCbCr space of X
    Y : y channel (i.e., luminance) of transformed YCbCr space of Y
    Please follow the setting of psnr_ssim.m in EDSR (Enhanced Deep Residual Networks for Single Image Super-Resolution CVPRW2017).
    Official Link : https://github.com/LimBee/NTIRE2017/tree/db34606c2844e89317aac8728a2de562ef1f8aba
    The authors of EDSR use MATLAB's ssim as the evaluation tool,
    thus this function is the same as ssim.m in MATLAB with C(3) == C(2)/2.
    '''
    gaussian_filter = matlab_style_gauss2D((11, 11), sigma)

    X = X.astype(np.float64)
    Y = Y.astype(np.float64)

    window = gaussian_filter

    ux = signal.convolve2d(X, window, mode='same', boundary='symm')
    uy = signal.convolve2d(Y, window, mode='same', boundary='symm')

    uxx = signal.convolve2d(X * X, window, mode='same', boundary='symm')
    uyy = signal.convolve2d(Y * Y, window, mode='same', boundary='symm')
    uxy = signal.convolve2d(X * Y, window, mode='same', boundary='symm')

    vx = uxx - ux * ux
    vy = uyy - uy * uy
    vxy = uxy - ux * uy

    C1 = (K1 * R) ** 2
    C2 = (K2 * R) ** 2

    A1, A2, B1, B2 = ((2 * ux * uy + C1, 2 * vxy + C2, ux ** 2 + uy ** 2 + C1, vx + vy + C2))
    D = B1 * B2
    S = (A1 * A2) / D
    mssim = S.mean()

    return mssim


def cal_psnr(img_1, img_2, benchmark=False):
    assert img_1.shape[0] == img_2.shape[0] and img_1.shape[1] == img_2.shape[1]
    img_1 = np.float64(img_1)
    img_2 = np.float64(img_2)

    diff = (img_1 - img_2) / 255.0
    if benchmark:
        gray_coeff = np.array([65.738, 129.057, 25.064]).reshape(1, 1, 3) / 255.0
        diff = diff * gray_coeff
        diff = diff[:, :, 0] + diff[:, :, 1] + diff[:, :, 2]

    mse = np.mean(diff ** 2)
    psnr = -10.0 * np.log10(mse)

    return psnr


def load_img(img_file):
    img = Image.open(img_file).convert('RGB')
    img = np.array(img)
    h, w, _ = img.shape
    img = img[:h // 8 * 8, :w // 8 * 8, :]
    img = np.array(img) / 255.
    img = img.transpose((2, 0, 1))
    img = torch.from_numpy(img).float().unsqueeze(0)

    return img


In [13]:
# %load CAR/EDSR/edsr.py
#from EDSR import common

import torch
import torch.nn as nn

url = {
    'r16f64x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x2-1bc95232.pt',
    'r16f64x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x3-abf2a44e.pt',
    'r16f64x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_baseline_x4-6b446fab.pt',
    'r32f256x2': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x2-0edfb8a3.pt',
    'r32f256x3': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x3-ea3ef2c6.pt',
    'r32f256x4': 'https://cv.snu.ac.kr/research/EDSR/models/edsr_x4-4f62e9ef.pt'
}


class EDSR(nn.Module):
    def __init__(self, n_resblocks=16, n_feats=64, scale=4, conv=default_conv):
        super(EDSR, self).__init__()

        # n_resblocks = 16 * 2
        # n_feats = 64 * 4
        kernel_size = 3
        act = nn.ReLU(True)
        self.url = url['r{}f{}x{}'.format(n_resblocks, n_feats, scale)]
        self.sub_mean = MeanShift(1)
        self.add_mean = MeanShift(1, sign=1)

        # define head module
        m_head = [conv(3, n_feats, kernel_size)]

        # define body module
        m_body = [
            ResBlock(
                conv, n_feats, kernel_size, act=act, res_scale=0.1
            ) for _ in range(n_resblocks)
        ]
        m_body.append(conv(n_feats, n_feats, kernel_size))

        # define tail module
        m_tail = [
            Upsampler(conv, scale, n_feats, act=False),
            conv(n_feats, 3, kernel_size)
        ]

        self.head = nn.Sequential(*m_head)
        self.body = nn.Sequential(*m_body)
        self.tail = nn.Sequential(*m_tail)

    def forward(self, x):
        x = self.sub_mean(x)
        x = self.head(x)

        res = self.body(x)
        res += x

        x = self.tail(res)
        x = self.add_mean(x)

        return x

    def load_state_dict(self, state_dict, strict=True):
        own_state = self.state_dict()
        for name, param in state_dict.items():
            if name in own_state:
                if isinstance(param, nn.Parameter):
                    param = param.data
                try:
                    own_state[name].copy_(param)
                except Exception:
                    if name.find('tail') == -1:
                        raise RuntimeError('While copying the parameter named {}, '
                                           'whose dimensions in the model are {} and '
                                           'whose dimensions in the checkpoint are {}.'
                                           .format(name, own_state[name].size(), param.size()))
            elif strict:
                if name.find('tail') == -1:
                    raise KeyError('unexpected key "{}" in state_dict'
                                   .format(name))


In [15]:
SCALE = 4
upscale_net = EDSR(32, 256, scale=4)
upscale_net = nn.DataParallel(upscale_net, [0])
upscale_net.load_state_dict(torch.load(os.path.join("./models", '{0}x'.format(SCALE), 'usn.pth')))


<All keys matched successfully>

In [None]:
# def download_image(url):
#     return Image.open(requests.get(url, stream=True).raw)

# dog_url = "https://mbtimetraveler.files.wordpress.com/2016/01/sad-cute-dog-high-resolution-wallpaper-for-desktop-background-download-dog-photos-free.jpg?w=1800"
# im = download_image(dog_url)

In [16]:
#im

In [17]:
# def downsample(img, r=0.1):
#     """ downsample an image by a percentage
#         :param img, a PIL Image format
#         :r ratio, the percentage new image size
#     """
#     width, height = img.size
#     w = int(width*r)
#     h = int(height*r)
#     if 'P' in img.mode: # check if image is a palette type
#         img = img.convert("RGB") # convert it to RGB
#         img = img.resize((w,h),Image.ANTIALIAS) # resize it
#         img = img.convert("P",dither=Image.NONE, palette=Image.ADAPTIVE) 
#            #convert back to palette
#     else:
#         img = img.resize((w,h),Image.ANTIALIAS) # regular resize
#     return img

# im_lr = downsample(im)
# im_lr

In [18]:
img = load_img("data/Holopix50k/train/left/-Lq1T4_X3mJnj2rY41uw_left.jpg")

In [19]:
reconstructed_img = upscale_net(img / 255.0)

RuntimeError: CUDA out of memory. Tried to allocate 900.00 MiB (GPU 0; 11.17 GiB total capacity; 9.85 GiB already allocated; 851.44 MiB free; 9.90 GiB reserved in total by PyTorch)