In [1]:
import os
from pprint import pprint
import numpy as np
from collections import defaultdict
import json

import torch
from torch.utils.data import Dataset, DataLoader
# import torchvision
from torchvision.transforms import v2

import medmnist
from medmnist import INFO

import src.medmnist_dataset as md

In [2]:
# Get CWD
CWD = os.getcwd()

In [3]:
print(medmnist.__version__)

3.0.2


In [4]:
avaliable_datasets = ["pathmnist", "dermamnist", \
                    "octmnist", "pneumoniamnist", "breastmnist", \
                    "bloodmnist", "tissuemnist", "organamnist", \
                    "organcmnist", "organsmnist"] # "retinamnist", "chestmnist"]

In [5]:
for dataset_name in avaliable_datasets:
    
    info = INFO[dataset_name]
    print(f"-"*100)
    print(f"Dataset: {dataset_name}")
    print(f"Python: {info['python_class']}")
    print(f"Task: {info['task']}")
    print(f"Number of classes: {len(info['label'])}")
    print(f"Channels: {info['n_channels']}")
    print("Classes:")
    pprint(info['label'])

----------------------------------------------------------------------------------------------------
Dataset: pathmnist
Python: PathMNIST
Task: multi-class
Number of classes: 9
Channels: 3
Classes:
{'0': 'adipose',
 '1': 'background',
 '2': 'debris',
 '3': 'lymphocytes',
 '4': 'mucus',
 '5': 'smooth muscle',
 '6': 'normal colon mucosa',
 '7': 'cancer-associated stroma',
 '8': 'colorectal adenocarcinoma epithelium'}
----------------------------------------------------------------------------------------------------
Dataset: dermamnist
Python: DermaMNIST
Task: multi-class
Number of classes: 7
Channels: 3
Classes:
{'0': 'actinic keratoses and intraepithelial carcinoma',
 '1': 'basal cell carcinoma',
 '2': 'benign keratosis-like lesions',
 '3': 'dermatofibroma',
 '4': 'melanoma',
 '5': 'melanocytic nevi',
 '6': 'vascular lesions'}
----------------------------------------------------------------------------------------------------
Dataset: octmnist
Python: OCTMNIST
Task: multi-class
Number 

In [6]:
image_size = 64 # 28, 64, 128 or 256
datasets_path = os.path.join(CWD, "datasets", str(image_size))
os.makedirs(datasets_path, exist_ok=True)

md.download(avaliable_datasets, datasets_path, image_size)

Using downloaded and verified file: d:\AI\med-mnist\datasets\64\pathmnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\dermamnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\octmnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\pneumoniamnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\breastmnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\bloodmnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\tissuemnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\organamnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\organcmnist_64.npz
Using downloaded and verified file: d:\AI\med-mnist\datasets\64\organsmnist_64.npz


In [7]:
transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float16, scale=True)
])

In [8]:
organ_datasets = ["organamnist", "organcmnist", "organsmnist"] # These are the same - have the same classes

md.unify_data(
    organ_datasets,
    are_unique_classes  = False,
    image_size          = image_size,
    datasets_path       = datasets_path,
    save_path           = datasets_path,
    filename            = f"organs_{image_size}"
);

Dataset name: organamnist
Unique classes: [ 0  1  2  3  4  5  6  7  8  9 10]
Dataset name: organcmnist
Unique classes: [ 0  1  2  3  4  5  6  7  8  9 10]
Dataset name: organsmnist
Unique classes: [ 0  1  2  3  4  5  6  7  8  9 10]
All train images: (61468, 3, 64, 64)
All train labels: (61468, 1)
All test images: (34821, 3, 64, 64)
All test labels: (34821, 1)
All val images: (11335, 3, 64, 64)
All val labels: (11335, 1)
Unified dataset saved at d:\AI\med-mnist\datasets\64\organs_64.npz


In [9]:
new_avaliable_datasets = ["organs", "pathmnist", "dermamnist", "octmnist", "pneumoniamnist", \
                            "breastmnist", "bloodmnist", "tissuemnist"]

md.unify_data(
    new_avaliable_datasets,
    are_unique_classes  = True,
    image_size          = image_size,
    datasets_path       = datasets_path,
    save_path           = datasets_path,
    filename            = f"unified_{image_size}"
);


Dataset name: organs
Unique classes: [ 0  1  2  3  4  5  6  7  8  9 10]
After mapping: [ 0  1  2  3  4  5  6  7  8  9 10]
Dataset name: pathmnist
Unique classes: [0 1 2 3 4 5 6 7 8]
After mapping: [11 12 13 14 15 16 17 18 19]
Dataset name: dermamnist
Unique classes: [0 1 2 3 4 5 6]
After mapping: [20 21 22 23 24 25 26]
Dataset name: octmnist
Unique classes: [0 1 2 3]
After mapping: [27 28 29 30]
Dataset name: pneumoniamnist
Unique classes: [0 1]
After mapping: [31 32]
Dataset name: breastmnist
Unique classes: [0 1]
After mapping: [33 34]
Dataset name: bloodmnist
Unique classes: [0 1 2 3 4 5 6 7]
After mapping: [35 36 37 38 39 40 41 42]
Dataset name: tissuemnist
Unique classes: [0 1 2 3 4 5 6 7]
After mapping: [43 44 45 46 47 48 49 50]
All train images: (438627, 3, 64, 64)
All train labels: (438627, 1)
All test images: (96487, 3, 64, 64)
All test labels: (96487, 1)
All val images: (59128, 3, 64, 64)
All val labels: (59128, 1)
Unified dataset saved at d:\AI\med-mnist\datasets\64\unified_