In [None]:
%matplotlib inline

import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
from scipy.signal import convolve2d
from scipy.stats import norm
from skimage import data, io, color
from skimage.transform import resize

plt.rcParams['figure.figsize'] = [10, 10]

# Prints memory footprint of a numpy structure
def print_mem_info(numpy_array, label):
    total_bytes = numpy_array.size * numpy_array.itemsize
    if total_bytes < 10**3: # less than kilobyte
        print(f"Size of {label}: {total_bytes} bytes")
    elif total_bytes < 10**6: # less than megabyte
        print(f"Size of {label}: {total_bytes / 10**3} kilobytes (raw bytes: {total_bytes})")
    elif total_bytes < 10**9: # less than gigabyte
        print(f"Size of {label}: {total_bytes / 10**6} megabytes (raw bytes: {total_bytes})")
    else:
        print(f"Size of {label}: {total_bytes / 10**9} gigabytes (raw bytes: {total_bytes})")

def show_image(image, title, flip_x_axis=False):
    if flip_x_axis:
        image = np.fliplr(image)
    plt.imshow(image, cmap=plt.get_cmap("gray"))
    plt.title(title)
    plt.colorbar()
    plt.show()
    
def normalise(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def rrmse(observed, ideal, decimal=6):
    return "{:.{}f}".format(np.sqrt((1 / observed.shape[0]**2) * np.sum((observed-ideal)**2) / np.sum(ideal**2)) * 100.0, decimal)

def laplacian_of_gaussian(x, y, sigma):
    p = (x**2.0 + y**2.0) / 2.0 * sigma**2.0
    return -(1.0 / (np.pi * sigma**4.0)) * (1.0 - p) * np.exp(-p)

def decimation_matrix(l, m):
    d_matrix = np.zeros((m**2, l**2), dtype=np.float32)

    tile = np.repeat((1, 0, 1), (2, l - 2, 2)) # assuming taking 2 neighbours per dimension
    t_len = tile.shape[0]
    d = l // m
    r_offset = m**2 // 2
    c_offset = l**2 // 2

    for p in np.arange(l//4): # divide by 4 as 4 neighbours total
        p_offset = p * l
        for q in np.arange(m):
            d_matrix[q+ p_offset//2, q*d + p_offset*2 : q*d+t_len + p_offset*2] = tile # top-left quadrant
            d_matrix[q+r_offset + p_offset//2, q*d+c_offset + p_offset*2: q*d+t_len+c_offset + p_offset*2] = tile # bottom-right quadrant
    return d_matrix

# produces convolution matrix of size l**2 by l**2, where each row is populated by the convolution kernel values at the appropriate neighbours
# note: assumes kernel is a two-dimensonal numpy array of some size n by n
def convolution_matrix(l, kernel):
    
    conv = np.zeros((l**2, l**2), dtype=np.float32)
    full_supp = kernel.shape[0] # assumed square
    half_supp = (full_supp - 1) // 2

    for conv_row in np.arange(l**2):

        row, col = (conv_row // l, conv_row % l)

        for k_row in np.arange(-(half_supp), half_supp + 1):
            # map "kernel row" to rows in conv
            mapped_row = row + k_row
            # ignore any out of bounds rows
            if mapped_row >= 0 and mapped_row < l:
                linear_col = col - half_supp
                # truncate negative columns
                mapped_col_start = max(linear_col, 0)
                # truncate columns which exceed the l dimension
                mapped_col_end = min(linear_col + full_supp, l)
                # left trimming for kernels when overlapping out of bounds region in conv (col < 0)
                left = np.absolute(col - half_supp) if linear_col < 0 else 0
                # right trimming for kernels when overlapping out of bounds region in conv (col >= l)
                right = linear_col + full_supp - l if linear_col + full_supp >= l else 0 
                # copy over kernel row for current k_row, possibly including trimming for out of bounds coordinates
                conv[conv_row][mapped_row * l + mapped_col_start : mapped_row * l + mapped_col_end] = kernel[k_row + half_supp][left: left + full_supp - right]
    return conv

#### Configuration and data set up...

In [None]:
%%time

timesteps = 30 # total timesteps
timesteps_per_y = 5
l = 100
m = 50
n = timesteps // timesteps_per_y
w = np.ones(n)
w_gpu = cp.asarray(w)

β = 1.631635943

# all time steps direct image
filename = "../data/direct_image_ts_0_29_800x800.bin"
x_true = np.fromfile(filename, dtype=np.float32)
x_true = resize(x_true.reshape(800, 800), (l, l), anti_aliasing=False, order=1)
x_true = normalise(x_true)

filename = "../data/direct_psf_ts_0_29_800x800.bin"
x_psf = np.fromfile(filename, dtype=np.float32).reshape(800, 800)[1:, 1:]
x_psf = resize(x_psf, (l-1, l-1), anti_aliasing=False, order=1)
x_psf = np.pad(x_psf, ((1, 0), (1, 0))) # pad with new 0th row/col to ensure trimming from centre

trim_half_len = 2
psf_min = l//2 - (trim_half_len - 1)
psf_max = l//2 + trim_half_len
# x_psf = x_psf.reshape(l, l)[psf_min:psf_max, psf_min:psf_max]
x_psf_trimmed = x_psf.copy()[psf_min:psf_max, psf_min:psf_max]
x_psf_trimmed /= np.sum(x_psf_trimmed)

# Storing all low-res images as flattened rows
y = np.zeros((n, m**2))

# batched time steps direct images
for i in np.arange(n):
    filename = f"../data/direct_image_ts_{i * timesteps_per_y}_{i * timesteps_per_y + timesteps_per_y - 1}.bin"
    y[i] = np.fromfile(filename, dtype=np.float32)
    y[i] = normalise(y[i])
    
y_gpu = cp.asarray(y)

# Decimation matrix
d = decimation_matrix(l, m) # takes the sum of 4 l neighbours to form 1 m pixel
d_gpu = cp.asarray(d)

# Blur matrix (psf)
h = convolution_matrix(l, x_psf_trimmed)
h_gpu = cp.asarray(h)

# Sharpening matrix (laplacian)
laplacian = np.array([[0, -1,  0], [-1,  4, -1], [0, -1,  0]], dtype=np.float32)
s = convolution_matrix(l, laplacian)
s_gpu = cp.asarray(s)

#### Setting up the right hand side of the equation $AX = B$, where $B = \sum\limits_{i=1}^N (w_iH^TD^TY_i)$

In [None]:
b_gpu = cp.zeros(l**2, dtype=np.float32)

for i in np.arange(n):
    b_gpu += w_gpu[i] * h_gpu.T @ d_gpu.T @ y_gpu[i]

#### Setting up the right hand side of the equation $AX=B$, where $A = \begin{bmatrix}
  \beta S^T S + (\sum\limits_{i=1}^N w_i) H^TD^TDH
\end{bmatrix}$

In [None]:
a_gpu = (β * s_gpu.T @ s_gpu) + (h_gpu.T @ d_gpu.T @ d_gpu @ h_gpu * cp.sum(w_gpu))

#### Now solve for $X$...

In [None]:
x = cp.asnumpy(cp.linalg.solve(a_gpu, b_gpu)).reshape(100, 100)

# Dealloc cuda mem
mempool = cp.get_default_memory_pool()
mempool.free_all_blocks()

In [None]:
print(f"RRMSE: Solved X and True X -> {rrmse(normalise(x), normalise(x_true))}")
show_image(normalise(x), "Solved X", flip_x_axis=True)
show_image(normalise(x_true), "True X", flip_x_axis=True)