In [1]:
!pip install -q medmnist numpy==1.23.1 pillow tensorboard==2.13.0

In [2]:
import numpy as np
import medmnist
from medmnist import INFO
from PIL import Image
import os
from tensorboard.plugins import projector
import shutil


dataset = "pathmnist"
info = INFO[dataset]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

pil_dataset = DataClass(split='train', download=True)

# iterate over all images and extract simple features
data = {"images": [], "features": [], "labels": []}
for i, (pil_img, label) in enumerate(pil_dataset):
    # extract HSV histogram and flatten it to create 1D array
    img = np.asarray(pil_img.convert('HSV'))
    feat = [np.histogram(img[..., i], bins=4, range=(0, 255))[0] for i in range(img.shape[-1])]
    feat = np.concatenate(feat)

    pil_img.resize((512, 512))

    data["images"].append(pil_img)
    data["features"].append(feat)
    data["labels"].append(label)

    if i > 1000:
        break

Using downloaded and verified file: /Users/andreped/.medmnist/pathmnist.npz


In [3]:
def create_sprite_image(pil_images, save_path):
    # Assuming all images have the same width and height
    img_width, img_height = pil_images[0].size
 
    # create a master square images
    row_coln_count = int(np.ceil(np.sqrt(len(pil_images))))
    master_img_width = img_width * row_coln_count
    master_img_height = img_height * row_coln_count
 
    master_image = Image.new(
        mode = 'RGBA',
        size = (master_img_width, master_img_height),
        color = (0, 0, 0, 0)
    )
 
    for i, img in enumerate(pil_images):
        div, mod = divmod(i, row_coln_count)
        w_loc = img_width * mod
        h_loc = img_height * div
        master_image.paste(img, (w_loc, h_loc))
 
    master_image.convert('RGB').save(save_path, transparency=0)

In [4]:
def write_embedding(log_dir, pil_images, features, labels):
    metadata_filename = "metadata.tsv"
    tensor_filename = "features.tsv"
    sprite_image_filename = "sprite.jpg"
    img_width, img_height = pil_images[0].size
 
    if os.path.exists(log_dir):
        # remove existing log directory
        shutil.rmtree(log_dir)
    
    os.makedirs(log_dir, exist_ok=True)
    with open(os.path.join(log_dir, metadata_filename), "w") as f:
        for label in labels:
            f.write("{}\n".format(label))
    with open(os.path.join(log_dir, tensor_filename), "w") as f:
        for tensor in features:
            f.write("{}\n".format("\t".join(str(x) for x in tensor)))
    
    # create sprite images
    sprite_image_path = os.path.join(log_dir, sprite_image_filename)
    create_sprite_image(pil_images, sprite_image_path)
    
    # configure projector
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.metadata_path = metadata_filename
    embedding.tensor_path = tensor_filename
    embedding.sprite.image_path = sprite_image_filename
    embedding.sprite.single_image_dim.extend([img_width, img_height])

    # Create the configuration file
    projector.visualize_embeddings(log_dir, config)

In [7]:
LOG_DIR = os.path.join("logs", "data")
write_embedding(LOG_DIR, data["images"], np.array(data["features"]), np.array(data["labels"]))

# %load_ext tensorboard
%reload_ext tensorboard
 
%tensorboard --logdir {LOG_DIR}

Reusing TensorBoard on port 6006 (pid 92751), started 0:00:19 ago. (Use '!kill 92751' to kill it.)