In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

<H2>Heatmaps of UMAP embeddings by transform group</H2>

In [None]:
# Numpy arrays of the UMAP embeddings were saved from the embeddings code in the project
# No data were dropped
# Load the numpy arrays
umap_no_transforms = np.load('../data/umap_no_transform.npz')
np_umap_no_transform = umap_no_transforms['umap_no_transform_drop_0']

li_transform_names = ['grayscale', 'little_blur', 'big_blur', 'rotate_180']
umap_transforms = np.load('../data/umap_transforms.npz')
np_umap_grayscale = umap_transforms['umap_grayscale_drop_0']
np_umap_little_blur = umap_transforms['umap_little_blur_drop_0']
np_umap_big_blur = umap_transforms['umap_big_blur_drop_0']
np_umap_rotate = umap_transforms['umap_rotate_180_drop_0']

In [None]:
edges = [[x, x+0.25, x + 0.5, x+0.75] for x in range(-2, 14)]
edges = [item for items in edges for item in items]

In [None]:
heatmap_no_transform, _, _ = np.histogram2d(np_umap_no_transform[:, 0], np_umap_no_transform[:, 1], bins=[edges, edges], density=True)
heatmap_grayscale, _, _ = np.histogram2d(np_umap_grayscale[:, 0], np_umap_grayscale[:, 1], bins=[edges, edges], density=True)
heatmap_little_blur, _, _ = np.histogram2d(np_umap_little_blur[:, 0], np_umap_little_blur[:, 1], bins=[edges, edges], density=True)
heatmap_big_blur, _, _ = np.histogram2d(np_umap_big_blur[:, 0], np_umap_big_blur[:, 1], bins=[edges, edges], density=True)
heatmap_rotate, _, _ = np.histogram2d(np_umap_rotate[:, 0], np_umap_rotate[:, 1], bins=[edges, edges], density=True)

fig, ax = plt.subplots(ncols=6, figsize=(32, 6), gridspec_kw=dict(width_ratios=[1,1,1,1,1,0.1]))

vmin = min(heatmap_no_transform.min(), heatmap_grayscale.min(), heatmap_little_blur.min(), heatmap_big_blur.min(), heatmap_rotate.min())
vmax = max(heatmap_no_transform.max(), heatmap_grayscale.max(), heatmap_little_blur.max(), heatmap_big_blur.max(), heatmap_rotate.max())


sns.heatmap(heatmap_no_transform.T, ax=ax[0], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=10)
sns.heatmap(heatmap_grayscale.T, ax=ax[1], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)
sns.heatmap(heatmap_little_blur.T, ax=ax[2], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)
sns.heatmap(heatmap_big_blur.T, ax=ax[3], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)
sns.heatmap(heatmap_rotate.T, ax=ax[4], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)


ax[0].set_title('No transform')
ax[1].set_title('Grayscale')
ax[2].set_title('Little Blur')
ax[3].set_title('Big Blur')
ax[4].set_title('Rotate 180')

ax[0].invert_yaxis()
ax[1].invert_yaxis()
ax[2].invert_yaxis()
ax[3].invert_yaxis()
ax[4].invert_yaxis()

fig.colorbar(ax[0].collections[0], cax=ax[5])

plt.savefig('../output/transforms_umap.png')

<H2>Heatmaps of UMAP embeddings - no transform and dropped records</H2>

In [None]:
np_umap_no_transform_25 = umap_no_transforms['umap_no_transform_drop_25']
np_umap_no_transform_50 = umap_no_transforms['umap_no_transform_drop_50']
np_umap_no_transform_75 = umap_no_transforms['umap_no_transform_drop_75']

In [None]:
heatmap_no_transform, _, _ = np.histogram2d(np_umap_no_transform[:, 0], np_umap_no_transform[:, 1], bins=[edges, edges], density=True)
heatmap_no_transform_25, _, _ = np.histogram2d(np_umap_no_transform_25[:, 0], np_umap_no_transform_25[:, 1], bins=[edges, edges], density=True)
heatmap_no_transform_50, _, _ = np.histogram2d(np_umap_no_transform_50[:, 0], np_umap_no_transform_50[:, 1], bins=[edges, edges], density=True)
heatmap_no_transform_75, _, _ = np.histogram2d(np_umap_no_transform_75[:, 0], np_umap_no_transform_75[:, 1], bins=[edges, edges], density=True)

fig, ax = plt.subplots(
    ncols=5, figsize=(20, 5), gridspec_kw=dict(width_ratios=[1,1,1,1,0.1]))

vmin = min(heatmap_no_transform.min(), heatmap_no_transform_25.min(), heatmap_no_transform_50.min(), heatmap_no_transform_75.min())
vmax = max(heatmap_no_transform.max(), heatmap_no_transform_25.max(), heatmap_no_transform_50.max(), heatmap_no_transform_75.max())

sns.heatmap(heatmap_no_transform.T, ax=ax[0], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=10)
sns.heatmap(heatmap_no_transform_25.T, ax=ax[1], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)
sns.heatmap(heatmap_no_transform_50.T, ax=ax[2], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)
sns.heatmap(heatmap_no_transform_75.T, ax=ax[3], cbar=False, vmin=vmin, vmax=vmax, xticklabels=10, yticklabels=False)

ax[0].set_title('All records')
ax[1].set_title('Drop 25% of records')
ax[2].set_title('Drop 50% of records')
ax[3].set_title('Drop 75% of records')

ax[0].invert_yaxis()
ax[1].invert_yaxis()
ax[2].invert_yaxis()
ax[3].invert_yaxis()

fig.colorbar(ax[0].collections[0], cax=ax[4])

plt.savefig('../output/drop_data_umap.png')
