In [None]:
# import modules
import numpy as np
import matplotlib.pyplot as plt
import hdbscan
import scarlet

In [None]:
# load in data
data_ = np.load("Data/CapitolHill.npy")
channels, n = data_.shape
nx = int(np.sqrt(n))
ny = nx
data = np.moveaxis(data_, 0, -1).reshape(-1, channels)

# normalize the data
background = np.sqrt(np.mean(data, axis=0))
norm_data = data.astype('float') - background
norm_data /= (np.maximum(background, norm_data.sum(axis=1)[:, None]))

# put data in form for HDBSCAN
x, y = np.meshgrid(0.01*np.arange(nx), 0.01*np.arange(ny))
arrays = [x.flatten(), y.flatten()]
for i in range(channels):
    arrays.append(norm_data[:, i])
norm_data = np.stack(arrays, axis=1)

In [None]:
# convert image to rgb colour coordinates
def to_rgb(data, channels=3, standardize=True, norm=None):
    channels = np.stack([ c.sum(axis=0) for c in np.array_split(data, channels) ], axis=0)
    # make uniform across channels
    if standardize:
        channels -= channels.mean(axis=(1,2))[:, None, None]
        channels /= channels.std(axis=(1,2))[:, None, None]
    if norm is None:
        norm = scarlet.AsinhPercentileNorm(channels)
    return scarlet.img_to_rgb(channels, norm=norm)

# plot rgb image of Capitol Hill
rgb = to_rgb(data_.reshape(channels, nx, ny))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(rgb, origin='lower')

# label plot
ax.set_title("Hyperspectral Image of Capitol Hill")
ax.axis('off')

In [None]:
# get clustering result using HDBSCAN with eom selection
eom_clusterer = hdbscan.HDBSCAN(min_cluster_size=11).fit(norm_data)
eom_labels = eom_clusterer.labels_.reshape(nx, ny)
eom_clusters = np.unique(eom_labels)
eom_k = eom_clusters.shape
print("Number of clusters: " + str(eom_k))

# use mask to plot noise labels as black
eom_mask = np.array([[True if (x >= 0) else False for x in eom_labels[i]] for i in range(len(eom_labels))])
eom_labels_ma = np.ma.array(eom_labels, mask=~eom_mask)

# plot results 
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(eom_mask, cmap='gray', origin='lower')
ax.imshow(eom_labels_ma, cmap='jet', origin='lower')

# label plot
ax.set_title("'EOM' Clustering Result")
ax.axis('off')

In [None]:
# get clustering result using HDBSCAN with leaf selection
leaf_clusterer = hdbscan.HDBSCAN(min_cluster_size=15, cluster_selection_method='leaf').fit(norm_data)
leaf_labels = leaf_clusterer.labels_.reshape(nx, ny)
leaf_clusters = np.unique(leaf_labels)
leaf_k = leaf_clusters.shape
print("Number of clusters: " + str(leaf_k))

# use mask to plot noise labels as black
leaf_mask = np.array([[True if (x >= 0) else False for x in leaf_labels[i]] for i in range(len(leaf_labels))])
leaf_labels_ma = np.ma.array(leaf_labels, mask=~leaf_mask)

# plot results 
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(leaf_mask, cmap='gray', origin='lower')
ax.imshow(leaf_labels_ma, cmap='jet', origin='lower')

# label plot
ax.set_title("'Leaf' Clustering Result")
ax.axis('off')

In [None]:
# plot all three images next to each other for comparison
fig = plt.figure()
ax_eom = fig.add_subplot(131)
ax_or = fig.add_subplot(132)
ax_leaf = fig.add_subplot(133)
ax_eom.imshow(eom_mask, cmap='gray', origin='lower')
ax_eom.imshow(eom_labels_ma, cmap='jet', origin='lower')
ax_or.imshow(rgb, origin='lower')
ax_leaf.imshow(leaf_mask, cmap='gray', origin='lower')
ax_leaf.imshow(leaf_labels_ma, cmap='jet', origin='lower')

# label plots
ax_eom.set_title("'EOM' Clustering Result")
ax_eom.axis('off')
ax_or.set_title("Hyperspectral Image\n")
ax_or.axis('off')
ax_leaf.set_title("'Leaf' Clustering Result")
ax_leaf.axis('off')
plt.tight_layout()