In [1]:
%matplotlib notebook

In [3]:
from pathlib import Path
from matplotlib import pyplot as plt
from matplotlib import colors
import h5py
import hdf5plugin
import numba
import numpy as np
from scipy import ndimage
import skimage.transform.integral

ModuleNotFoundError: No module named 'hdf5plugin'

In [None]:
f = h5py.File("/dls/science/groups/scisoft/DIALS/dials_data/vmxi_thaumatin/image_15799.nxs", mode="r")

In [None]:
image = f["/entry/data/data"][0]
print(image)

In [None]:
max_valid = 65534
mask = (image <= max_valid).astype(image.dtype)

In [None]:
fig = plt.figure()
plt.imshow(image, norm=colors.SymLogNorm(1))

In [None]:
kernel_size = 7
kernel = np.ones((kernel_size, kernel_size))
kernel

Naive approach using `scipy.ndimage.convolve`:

In [None]:
#%%timeit
masked_image = image * mask
im = masked_image.astype(np.float)
im2 = im**2
sum_image = ndimage.convolve(im, kernel, mode="constant", cval=0)
sum_sq = ndimage.convolve(im2, kernel, mode="constant", cval=0)
n = ndimage.convolve(mask.astype(int), kernel, mode="constant", cval=0)
mean_image = np.zeros(im.shape)
np.divide(sum_image, n, where=(n > 0), out=mean_image)
inv_count = np.zeros(im.shape)
np.divide(1, n, where=(n > 0), out=inv_count)
variance_image = (sum_sq - inv_count * sum_image ** 2) * inv_count
dispersion_index = np.ones(mean_image.shape)
np.divide(variance_image, mean_image, where=(mean_image > 0), out=dispersion_index)

In [None]:
fig = plt.figure()
plt.imshow(mean_image, norm=colors.SymLogNorm(1))

In [None]:
fig = plt.figure()
plt.imshow(variance_image, norm=colors.SymLogNorm(1))

In [None]:
fig = plt.figure()
plt.imshow(dispersion_index, norm=colors.SymLogNorm(1))

Using `np.cumsum` to calculate the summed area tables:

In [None]:
def summed_area_table(image):
    sat = np.empty(image.shape, dtype=image.dtype)
    image.cumsum(axis=0, out=sat)
    sat.cumsum(axis=1, out=sat)
    return sat

def kernel_sum(image, kernel_size):
    pad = (kernel_size-1)//2
    image = np.pad(image, (pad+1, pad))
    sat = summed_area_table(image)
    #sat = skimage.transform.integral.integral_image(image)
    return (
        sat[:-kernel_size,:-kernel_size] + # top left
        sat[kernel_size:,kernel_size:] - # bottom right
        sat[kernel_size:,:-kernel_size] - # top right
        sat[:-kernel_size,kernel_size:] # bottom left
    )

In [None]:
%%timeit

masked_image = image * mask
im = masked_image
im2 = im**2

sum_image = kernel_sum(masked_image, kernel_size)
sum_sq = kernel_sum(im2, kernel_size)
n = kernel_sum(mask, kernel_size)
mean_image = np.zeros(im.shape)
np.divide(sum_image, n, where=(n > 0), out=mean_image)
inv_count = np.zeros(im.shape)
np.divide(1, n, where=(n > 0), out=inv_count)
variance_image = (sum_sq - inv_count * np.square(sum_image)) * inv_count
dispersion_index = np.ones(mean_image.shape)
np.divide(variance_image, mean_image, where=(mean_image > 0), out=dispersion_index)

Using `numba` to calculate the summed area tables:

In [None]:
@numba.njit(numba.int32[:,::1](numba.int32[:,::1]))
def summed_area_table(image):
    sat = np.zeros(image.shape, dtype=image.dtype)
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            sat[i, j] = image[i, j] + sat[i, j - 1] + sat[i - 1, j] - sat[i - 1, j - 1]
    return sat


def kernel_sum(image, kernel_size):
    pad = (kernel_size-1)//2
    image = np.pad(image, (pad+1, pad))
    sat = summed_area_table(image)
    return (
        sat[:-kernel_size,:-kernel_size] + # top left
        sat[kernel_size:,kernel_size:] - # bottom right
        sat[kernel_size:,:-kernel_size] - # top right
        sat[:-kernel_size,kernel_size:] # bottom left
    )

Separating out computation of the variance made an appreciable difference over using `np.divide`. Note how numba handles division by zero (see `error_model` in https://numba.pydata.org/numba-doc/dev/reference/jit-compilation.html?highlight=error_model)

In [None]:
@numba.njit(error_model="python")
def compute_variance(sum_image, sum_image_sq, n):
    return (sum_image_sq - np.square(sum_image) / n) / n

In [None]:
%%timeit
masked_image = image * mask
im = masked_image
im2 = im**2

sum_image = kernel_sum(masked_image, kernel_size)
sum_sq = kernel_sum(im2, kernel_size)
n = kernel_sum(mask, kernel_size)
mean_image = np.zeros(im.shape)
np.divide(sum_image, n, where=(n > 0), out=mean_image)
variance_image = compute_variance(sum_image, sum_sq, n)
dispersion_index = np.ones(mean_image.shape)
np.divide(variance_image, mean_image, where=(mean_image > 0), out=dispersion_index)

In [None]:
fig = plt.figure()
plt.imshow(dispersion_index, norm=colors.SymLogNorm(1))

We tried calculating the three summed area tables simultaneously (as in the dials source code), but this didn't seem to make any measurable difference.