In [None]:
# import modules
import numpy as np
import matplotlib.pyplot as plt
import hdbscan

In [None]:
# load in the data
data = np.load("Data/3-band-example.npy")
data = np.moveaxis(data, 0, -1).reshape(-1, 3)

# normalise data
data_ = data.astype('float')
data_ /= data_.sum(axis=1)[:, None]

# normalise data, subtracting image background
data__ = data.astype('float') - 100
data__ /= (np.maximum(10, data__.sum(axis=1)[:, None]))

# stack colour information with spatial features
x, y = np.meshgrid(0.1*np.arange(51), 0.1*np.arange(51))
dataxy = np.stack((x.flatten(), y.flatten(), data__[:, 0], data__[:, 1], data__[:, 2]), axis=1)

In [None]:
# plot image
plt.imshow(data.reshape(51, 51, 3), origin='lower')
plt.title("3-Band Image")
plt.axis('off')

In [None]:
# plot normalised data
plt.imshow(data_.reshape(51, 51, 3))
plt.title("Normalised 3-Band Image")
plt.axis('off')

In [None]:
# plot normalised data with background subtracted
plt.imshow(data__.reshape(51, 51, 3))
plt.title("Normalised 3-Band Image with Reduced Background")
plt.axis('off')

In [None]:
# cluster data using HDBSCAN
clusterer = hdbscan.HDBSCAN(min_cluster_size=20, cluster_selection_method='leaf').fit(dataxy)
labels = clusterer.labels_
clusters = np.unique(labels)
k = clusters.shape
print("Number of clusters: " + str(k))

# add mask to data
mask = np.array([True if (x >= 0) else False for x in labels])
labels_ma = np.ma.array(labels, mask=~mask)

# plot data
plt.imshow(mask.reshape(51, 51), cmap='gray', origin='lower')
plt.imshow(labels_ma.reshape(51, 51), cmap='jet', origin='lower')
plt.title("HDBSCAN Clustering on 3-Band Image")
plt.axis('off')

In [None]:
# view condensed tree plot for this result
clusterer.condensed_tree_.plot(select_clusters=True)