In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.image as mpimg
import numpy as np
from PIL import Image
from scipy import sparse
from scipy import ndimage
from scipy.ndimage import gaussian_filter
from skimage import data
from skimage import img_as_float
from skimage import morphology, measure
from skimage.color import label2rgb

%matplotlib inline

In [None]:
raw_image = mpimg.imread('../input/Train/0.jpg')
dot_image = mpimg.imread('../input/TrainDotted/0.jpg')

# Convert to floats. Will save us headache later.
raw_image = raw_image.astype(float)
dot_image = dot_image.astype(float)
raw_image = raw_image / raw_image.max()
dot_image = dot_image / dot_image.max()

# Spotcheck the data.
plt.figure(figsize=(20,20))
plt.subplot(1, 3, 1)
imgplot = plt.imshow(raw_image[:, :, 0], cmap=plt.cm.gray)
plt.subplot(1, 3, 2)
imgplot = plt.imshow(raw_image[:, :, 1], cmap=plt.cm.gray)
plt.subplot(1, 3, 3)
imgplot = plt.imshow(raw_image[:, :, 2], cmap=plt.cm.gray)
plt.show()

def img_to_float(m):
    m = m.astype(float)
    return m / m.max()

def float_to_img(m):
    return (m * 255.).astype(int)

def spot_check(img, show_pil=False):
    imgplot = plt.imshow(img)
    plt.show()

    if show_pil:
        # We can use this to see a higher resolution image, useful for checking things.
        pil_image = Image.fromarray((img*255.0).astype('uint8'))
        pil_image.show()

In [None]:
# Dot images have some black artifacts. Let's mask those out.
dot_norm = np.linalg.norm(dot_image, axis=2)
threshold = (dot_norm.max() - dot_norm.min()) * 0.02
initial_mask_1d = dot_norm <= threshold

# Broadcast to 3d for true image mask.
_, initial_mask = np.broadcast_arrays(dot_image, initial_mask_1d[..., None])

plt.imshow(initial_mask.astype(float))
plt.show()

In [None]:
##### Remove the background. Mask with the original image.
dot_diff = np.abs(dot_image - raw_image)
dot_diff[initial_mask] = 0
#dot_diff = np.linalg.norm(dot_diff, axis=2)
dot_diff = np.max(dot_diff, axis=2)

# Remove jpeg artifact noise.
dot_diff[dot_diff < 0.3] = 0

# Other ways to consider removing noise.
#dot_diff = ndimage.binary_opening(dot_diff)
#dot_diff = ndimage.binary_closing(dot_diff)

plt.figure(figsize=(20,20))
plt.subplot(1, 2, 1)
imgplot = plt.imshow(dot_diff)

def mask_image(m, mask, c):
    mask_1d = mask < 0.01
    _, mask_3d = np.broadcast_arrays(m, mask_1d[..., None])
    m[mask_3d] = c

dots = dot_image.copy()
mask_image(dots, dot_diff, 0)
plt.subplot(1, 2, 2)
imgplot = plt.imshow(dots)
plt.show()

def channel_histogram(m, idx, color):
    c = (m[:, :, idx] * 255).astype('uint8')
    c = c[c > 0]
    n, bins, patches = plt.hist(c.ravel(), bins=256, facecolor=color)
    plt.plot(bins)

def print_histograms(m):
    plt.figure(figsize=(20,5))
    plt.subplot(1, 3, 1)
    channel_histogram(m, 0, 'red')
    plt.subplot(1, 3, 2)
    channel_histogram(m, 1, 'blue')
    plt.subplot(1, 3, 3)
    channel_histogram(m, 2, 'green')
    plt.show()

print_histograms(dots)


In [None]:
# Other ways to consider removing noise.
dot_diff_eroded = ndimage.binary_erosion(dot_diff)
#dot_diff_eroded = ndimage.binary_erosion(dot_diff_eroded)
dots = dot_image.copy()
mask_image(dots, dot_diff_eroded, 0)

spot_check(dots, False)

print_histograms(dots)

In [None]:
labeled_dots, label_count = morphology.label(dot_diff_eroded, return_num=True, connectivity=2)
rgb_labels = label2rgb(labeled_dots, raw_image)
imgplot = plt.imshow(rgb_labels)
print("Num of labels = {0}".format(label_count))

In [None]:
from skimage import measure
plt.figure(figsize=(20,20))
plt.imshow(dot_image)
axis = plt.gcf().gca()
props = measure.regionprops(labeled_dots)
print(len(props))
for region in props:
    center = region.centroid
    c = plt.Circle((center[1], center[0]), 30, color='y', fill=True, clip_on=False, alpha=0.5)
    axis.add_artist(c)
plt.show()

In [None]:
def anisotropic_diffusion(img, niter=1, kappa=50, gamma=0.1, voxelspacing=None, option=1):
    import numpy
    r"""
    Edge-preserving, XD Anisotropic diffusion.


    Parameters
    ----------
    img : array_like
        Input image (will be cast to numpy.float).
    niter : integer
        Number of iterations.
    kappa : integer
        Conduction coefficient, e.g. 20-100. ``kappa`` controls conduction
        as a function of the gradient. If ``kappa`` is low small intensity
        gradients are able to block conduction and hence diffusion across
        steep edges. A large value reduces the influence of intensity gradients
        on conduction.
    gamma : float
        Controls the speed of diffusion. Pick a value :math:`<= .25` for stability.
    voxelspacing : tuple of floats
        The distance between adjacent pixels in all img.ndim directions
    option : {1, 2}
        Whether to use the Perona Malik diffusion equation No. 1 or No. 2.
        Equation 1 favours high contrast edges over low contrast ones, while
        equation 2 favours wide regions over smaller ones. See [1]_ for details.

    Returns
    -------
    anisotropic_diffusion : ndarray
        Diffused image.
        
    Notes
    -----
    Original MATLAB code by Peter Kovesi,
    School of Computer Science & Software Engineering,
    The University of Western Australia,
    pk @ csse uwa edu au,
    <http://www.csse.uwa.edu.au>

    Translated to Python and optimised by Alistair Muldal,
    Department of Pharmacology,
    University of Oxford,
    <alistair.muldal@pharm.ox.ac.uk>
    
    Adapted to arbitrary dimensionality and added to the MedPy library Oskar Maier,
    Institute for Medical Informatics,
    Universitaet Luebeck,
    <oskar.maier@googlemail.com>
    
    June 2000  original version. -
    March 2002 corrected diffusion eqn No 2. - 
    July 2012 translated to Python -
    August 2013 incorporated into MedPy, arbitrary dimensionality -    

    References
    ----------
    .. [1] P. Perona and J. Malik. 
       Scale-space and edge detection using ansotropic diffusion.
       IEEE Transactions on Pattern Analysis and Machine Intelligence, 
       12(7):629-639, July 1990.
    """
    # define conduction gradients functions
    if option == 1:
        def condgradient(delta, spacing):
            return numpy.exp(-(delta/kappa)**2.)/float(spacing)
    elif option == 2:
        def condgradient(delta, spacing):
            return 1./(1.+(delta/kappa)**2.)/float(spacing)

    # initialize output array
    out = numpy.array(img, dtype=numpy.float32, copy=True)
    
    # set default voxel spacong if not suppliec
    if None == voxelspacing:
        voxelspacing = tuple([1.] * img.ndim)

    # initialize some internal variables
    deltas = [numpy.zeros_like(out) for _ in range(out.ndim)]

    for _ in range(niter):

        # calculate the diffs
        for i in range(out.ndim):
            slicer = [slice(None, -1) if j == i else slice(None) for j in range(out.ndim)]
            deltas[i][slicer] = numpy.diff(out, axis=i)
        
        # update matrices
        matrices = [condgradient(delta, spacing) * delta for delta, spacing in zip(deltas, voxelspacing)]

        # subtract a copy that has been shifted ('Up/North/West' in 3D case) by one
        # pixel. Don't as questions. just do it. trust me.
        for i in range(out.ndim):
            slicer = [slice(1, None) if j == i else slice(None) for j in range(out.ndim)]
            matrices[i][slicer] = numpy.diff(matrices[i], axis=i)

        # update the image
        out += gamma * (numpy.sum(matrices, axis=0))

    return out

In [None]:
img = raw_image.astype(float)
dc = np.abs(img[:, :, 0] - img[:, :, 2])
aniso = anisotropic_diffusion(dc, 10, 200, .01)
#dc[dc < 0] = 0
mod = aniso
plt.figure(figsize=(10,10))
imgplot = plt.imshow(dc, cmap=plt.cm.gray)
plt.show()
plt.figure(figsize=(10,10))
imgplot = plt.imshow(aniso, cmap=plt.cm.gray)
plt.show()
plt.figure(figsize=(10,10))
imgplot = plt.imshow(aniso > .3, cmap=plt.cm.gray)
plt.show()

## Thoughts
On the first image this seems like a reasonable approach. On the second image we aren't getting nearly the same value out of such a simple calculation. I do think I'm going to go back to the first image and see if I can get some type of prediction. But for now I think I'll move on and start brainstorming more complex solutions.