The Paper
https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Shi_Real-Time_Single_Image_CVPR_2016_paper.pdf

https://arxiv.org/abs/1707.05425

https://arxiv.org/abs/1609.05158

# Implementing ESPCN for efficient SISR

The Concept

SISR
Single Image Super Resolution

LR - Image (Low Resolution) and
HR - Image (High Resolution) Pairs
are mapped to each other (SR - Super Resolution) in order to be able to improve the Resolution on novel Images.

Problem: Previous SISR Techniques are too complex, thereby too slow.

Solution: 
Reorder steps to improve performance (first employ image enhancement, then Super Resolution)
Instead of using handcrafted bilinear filters, let CNN learn upscaling filters itself.
Getting rid of gradual stepwise upscaling, only perform SR at the End

Result: Speeds fast enough to do real-time upscaling with modern full hd-videos



#The Data


In [0]:
import numpy as np
#%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import matplotlib.pyplot as plt
!pip install tfds-nightly
import tensorflow_datasets as tfds
# Load the TensorBoard notebook extension
%load_ext tensorboard
from datetime import datetime
import cv2

ModuleNotFoundError: ignored

In [0]:
train = tfds.load("div2k/bicubic_x3", split = 'train')
#val = tfds.load("div2k/bicubic_x3", split = 'validation')

# Batch
#BATCHSIZE = 1
#train = train.batch(BATCHSIZE)
#val = val.batch(BATCHSIZE)

SampleNr = 1
train_hrList = []
train_lrList = []
#val_hrList = []
#val_lrList = []

for features in train.take(SampleNr):
  hr, lr = features["hr"], features["lr"]
  train_hrList.append(hr)
  train_lrList.append(lr)
# for features in val.take(int(SampleNr/10)):
#   hr, lr = features["hr"], features["lr"]
#   val_hrList.append(hr)
#   val_lrList.append(lr)


[1mDownloading and preparing dataset div2k/bicubic_x3/2.0.0 (download: 4.16 GiB, generated: Unknown size, total: 4.16 GiB) to /root/tensorflow_datasets/div2k/bicubic_x3/2.0.0...[0m
EXTRACTING {'train_lr_url': 'https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_train_LR_bicubic_X3.zip', 'valid_lr_url': 'https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_valid_LR_bicubic_X3.zip', 'train_hr_url': 'https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_train_HR.zip', 'valid_hr_url': 'https://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_valid_HR.zip'}


HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Completed...', max=1, style=ProgressStyl…

HBox(children=(IntProgress(value=1, bar_style='info', description='Dl Size...', max=1, style=ProgressStyle(des…

HBox(children=(IntProgress(value=1, bar_style='info', description='Extraction completed...', max=1, style=Prog…











HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/div2k/bicubic_x3/2.0.0.incompleteY2OBS4/div2k-train.tfrecord


HBox(children=(IntProgress(value=0, max=800), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/div2k/bicubic_x3/2.0.0.incompleteY2OBS4/div2k-validation.tfrecord


HBox(children=(IntProgress(value=0), HTML(value='')))

[1mDataset div2k downloaded and prepared to /root/tensorflow_datasets/div2k/bicubic_x3/2.0.0. Subsequent calls will reuse this data.[0m


In [0]:
from PIL import Image
img = train_hrList[0]

img = img.numpy()
img = img.astype(np.uint8)
img.shape
Image.fromarray(np.asarray(img)).show()

#Preprocessing



Image Normalization

For better performance, we want to keep only the luminance channel of the YUV Color space, since humans are most perceptive to luminance, and a simple greyscale conversion does not preserve the luminance.

In [0]:
#Normalize
img = tf.image.per_image_standardization(train_hrList[0])

# Convert to YUV color space
yimg = tf.image.rgb_to_yuv(img)
yimg = yimg.numpy()
# only keep luminance channel
yimg = yimg[:,:,[0]]
# remove channel dimension
yimg = tf.squeeze(yimg)
yimg.shape
plt.imshow(yimg)

TypeError: ignored

In [0]:
#Bring image into correct format for patching
yimg = tf.convert_to_tensor(yimg)
yimg = tf.expand_dims(yimg, 0)
yimg = tf.expand_dims(yimg, 3)
yimg.shape

Here, we create patches of size 17r * 17r

In [0]:
R = 3
patchscale = 17
hr_patchsize = [1, patchscale * R,patchscale * R, 1]
print("Patchsize: " , hr_patchsize)
#lr_patchsize = [1, patchscale, patchscale, 1]
#we want no gaps in between the patches
hr_strides = hr_patchsize
train_hrList_patched = []
#train_lrList_patched = []
#val_hrList_patched = []
#val_lrList_patched = []

train_hrList_patched = tf.image.extract_patches(
    images = yimg, 
    sizes = hr_patchsize, 
    #patches are consecutive, overlapping patches should be considered
    strides = hr_strides,
    #only uniform patches, borderpatches are neglected
    padding = "VALID",
    # i dont understand what it does, but its gotta be 1
    rates=[1, 1, 1, 1])
train_hrList_patched.shape

In [0]:
# Clear out any prior log data.
!rm -rf logs

# Sets up a timestamped log directory.
logdir = "logs/train_data/" + datetime.now().strftime("%Y%m%d-%H%M%S")
# Creates a file writer for the log directory.
file_writer = tf.summary.create_file_writer(logdir)

# Using the file writer, loPe("Training data", hr, step=0)

# Phase Shift Implementation
https://github.com/atriumlts/subpixel
https://github.com/atriumlts/subpixel/blob/master/keras_subpixel.py

TODO 
Describe what Phase shift is doing

#Subpixel Layer



In [0]:
class SubPixel(tf.keras.layers.Conv2D):
    def __init__(self,
                 filters,
                 kernel_size,
                 r,
                 padding='valid',
                 data_format=None,
                 strides=(1,1),
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        super(Subpixel, self).__init__(
            filters=r*r*filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs)
        self.r = r

    def _phase_shift(I, r):
        # Helper function with main phase shift operation
        bsize, a, b, c = I.get_shape().as_list()
        X = tf.reshape(I, (bsize, a, b, r, r))
        X = tf.transpose(X, (0, 1, 2, 4, 3))  # bsize, a, b, 1, 1
        X = tf.split(1, a, X)  # a, [bsize, b, r, r]
        X = tf.concat(2, [tf.squeeze(x) for x in X])  # bsize, b, a*r, r
        X = tf.split(1, b, X)  # b, [bsize, a*r, r]
        X = tf.concat(2, [tf.squeeze(x) for x in X])  #
        bsize, a*r, b*r
        return tf.reshape(X, (bsize, a*r, b*r, 1))
    # R is the upscaling factor
    def PS(X, r = 3, color=False):
      # Main OP that you can arbitrarily use in you tensorflow code
      if color:
        Xc = tf.split(3, 3, X)
        X = tf.concat(3, [_phase_shift(x, r) for x in Xc])
      else:
        X = _phase_shift(X, r)
      return X

    def call(self, inputs):
        return self.PS(super(Subpixel, self).call(inputs))

    def compute_output_shape(self, input_shape):
        unshifted = super(Subpixel, self).compute_output_shape(input_shape)
        return (unshifted[0], self.r*unshifted[1], self.r*unshifted[2], unshifted[3]/(self.r*self.r))

    def get_config(self):
        config = super(Conv2D, self).get_config()
        config.pop('rank')
        config.pop('dilation_rate')
        config['filters']/=self.r*self.r
        config['r'] = self.r
        return config


#The Model

TODO 
Layer Configuration


In [0]:
# MetaParameter
#Define Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, 
                                     beta_1=0.9, 
                                     beta_2=0.999, 
                                     epsilon=1e-07, 
                                     amsgrad=False)
# Use Mean squared error on pixel Base as loss
loss = tf.keras.losses.MSE()
#Define Padding
pad = 'same'
# Define Activation Function
act =  tf.keras.activations.tanh

#Define Number of initial Filters
filternr = 64
#Define Image Shape
img_shape = train_hrList_patched[0].shape

In [0]:
class ESPCN(tf.keras.Model):
    def __init__(self, upscale_factor):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters = filternr,
                                         kernel_size = 5, 
                                         padding= pad, 
                                         activation= act, 
                                         input_shape=img_shape
                                        )
        self.conv2 = tf.keras.layers.Conv2D(filters = filternr/2,
                                        kernel_size = 5, 
                                        padding= pad, 
                                        activation= act 
                                       )
        self.conv3 = tf.keras.layers.Conv2D(filters = filternr/2,
                                         kernel_size = 5, 
                                         padding= pad, 
                                         activation= act 
                                        )

        self.subpixel = SubPixel(filters = filternr/2,
                                kernel_size = 5, 
                                padding = pad, 
                                activation = act )


    def call(self, x):

        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.subpixel(x)
        return x

In [0]:

keras.backend.clear_session()
model = ESPCN(upscale_factor=3)
model.build(input_shape=())
model.summary()

#Training

#Visualization

#Resources

