# Setup Enviroment

In [17]:
!python -c "import monai" || pip install -q "monai-weekly"
!pip install -q torch==1.10.2 torchtext==0.11.2 torchvision==0.11.3
!pip install -q torchio==0.18.73
!pip install -q pytorch-lightning==1.5.10
!pip install -q pandas==1.1.5 seaborn==0.11.1
!pip install -q pillow==9.4.0

In [18]:
from datetime import datetime
import os
from pathlib import Path
import tempfile
from glob import glob

import torch
from torch.utils.data import random_split, DataLoader
import monai
import pandas as pd
import torchio as tio
import pytorch_lightning as pl
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from config import *
monai.config.print_config()

MONAI version: 1.2.dev2312
Numpy version: 1.24.2
Pytorch version: 1.10.2+cpu
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: 400a6a052f1b2925db6f1323a67a7cf4546403eb
MONAI __file__: c:\Users\LESC\Desktop\UNet-SegRD\lesc-env\lib\site-packages\monai\__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
ITK version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: 5.0.1
scikit-image version: NOT INSTALLED or UNKNOWN VERSION.
Pillow version: 9.4.0
Tensorboard version: 2.12.0
gdown version: NOT INSTALLED or UNKNOWN VERSION.
TorchVision version: 0.11.3+cpu
tqdm version: 4.65.0
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 5.9.4
pandas version: 1.1.5
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional

# Configurations and Tensorboard Setup

In [19]:
sns.set()
plt.rcParams["figure.figsize"] = 12, 8
monai.utils.set_determinism()

%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Setup Data Directory

In [20]:
# directory = os.environ.get("MONAI_DATA_DIRECTORY")
# root_dir = tempfile.mkdtemp() if directory is None else directory
# print(root_dir)

root_dir = "./datasets"
print(root_dir)

./datasets


# Data

In [21]:
# Funções para ler imagens e adaptar o conjunto de dados Diaretdb1
def pil_loader(image_path,is_mask=False):
    with open(image_path, 'rb') as f:
        img = Image.open(f)
        h, w = img.size
        if not is_mask:
            return img.resize((h//2, w//2)).convert('RGB')
            # return img.convert('RGB')
        else:
            return img.resize((h//2, w//2)).convert('L')
            # return img.convert('L')


def create_dir(path:Path):
    if not path.exists():
        if not path.parent.exists():
            create_dir(path.parent)
        path.mkdir()


def adaptar_dataset(root_dir: Path, dir_fundus_imgs: Path, dir_groundtruths_imgs: Path, annotations_path: Path):
    """Com base nos arquivos de anotações .txt do dataset diaretdb1_v1.1, essa função cria uma divisão melhor das
    imagens em TESTSET e TRAINSET para facilitar futuras utilizações desses dados"""

    path_base = Path(root_dir/str(annotations_path.stem).upper())
    create_dir(path_base/dir_fundus_imgs.name)
    labels = pd.read_csv(annotations_path, header=None).sort_values(by=0, ascending=True)
    for dir_masks in ['hardexudates', 'hemorrhages', 'redsmalldots', 'softexudates']:
        create_dir(path_base/'ddb1_groundtruth'/dir_masks)

        for label in labels[0]:
            # Salvar a imagem correspondente das anotações na pasta de fundoscopias:
            img_fundus = pil_loader(dir_fundus_imgs/label)
            img_fundus.save(path_base/dir_fundus_imgs.name/label)
            # Salvar a mascara:
            mask = pil_loader(dir_groundtruths_imgs/dir_masks/label)
            mask.save(path_base/'ddb1_groundtruth'/dir_masks/label)
    print(f"Novo diretório {root_dir.name}/{annotations_path.stem} criado.")

In [35]:
class MedicalDecathlonDataModule(pl.LightningDataModule):
    def __init__(self, dataset_name, batch_size, train_val_ratio, lesion):
        '''lesion: EX: hardexsudates, HE: hemorrhages, MA: microaneurysms, SE'''
        super().__init__()
        self.dataset_name = dataset_name
        self.batch_size = batch_size
        self.base_dir = root_dir
        self.dataset_dir = os.path.join(root_dir, dataset_name)
        self.lesion = lesion
        self.train_val_ratio = train_val_ratio
        self.subjects = None
        self.test_subjects = None
        self.preprocess = None
        self.transform = None
        self.train_set = None
        self.val_set = None
        self.test_set = None

    def download_data(self):
        if not os.path.isdir(self.dataset_dir):
            url = "https://www.it.lut.fi/project/imageret/diaretdb1/diaretdb1_v_1_1.zip"
            monai.apps.download_and_extract(url, output_dir="./datasets")
            # TESTSET:
            adaptar_dataset(ROOT_DATASET_PATH, IMGS_FUNDUS_PATH, MASKS_DIR_PATH, ANNOTATIONS_TEST_PATH)
            # TRAINSET:
            adaptar_dataset(ROOT_DATASET_PATH, IMGS_FUNDUS_PATH, MASKS_DIR_PATH, ANNOTATIONS_TRAIN_PATH)

        mask_dir = sorted(os.listdir(os.path.join(self.dataset_dir, "TRAINSET", "ddb1_groundtruth")))[LESIONS[self.lesion]]

        image_training_paths = sorted(glob(os.path.join(self.dataset_dir, "TRAINSET", "ddb1_fundusimages", "*.png")))
        label_training_paths = sorted(glob(os.path.join(self.dataset_dir, "TRAINSET", "ddb1_groundtruth", mask_dir, "*.png")))
        image_test_paths = sorted(glob(os.path.join(self.dataset_dir, "TESTSET", "ddb1_fundusimages", "*.png*")))
        return image_training_paths, label_training_paths, image_test_paths

    def prepare_data(self):
        # torchio.Image -> 
        image_training_paths, label_training_paths, image_test_paths = self.download_data()

        self.subjects = []
        for image_path, label_path in zip(image_training_paths, label_training_paths):
            # 'image' and 'label' are arbitrary names for the images
            subject = tio.Subject(image=tio.ScalarImage(image_path), label=tio.LabelMap(label_path))
            self.subjects.append(subject)

        self.test_subjects = []
        for image_path in image_test_paths:
            subject = tio.Subject(image=tio.ScalarImage(image_path))
            self.test_subjects.append(subject)

    def get_preprocessing_transform(self):
        preprocess = tio.Compose(
            [
                tio.RescaleIntensity((0, 1)),
                # tio.CropOrPad(self.get_max_shape(self.subjects + self.test_subjects)),
                tio.EnsureShapeMultiple(8),  # for the U-Net
                # tio.OneHot(),
            ]
        )
        return preprocess

    def get_augmentation_transform(self):
        augment = tio.Compose(
            [
                tio.RandomAffine(),
                tio.RandomGamma(p=0.5),
                tio.RandomNoise(p=0.5),
                tio.RandomMotion(p=0.1),
                tio.RandomBiasField(p=0.25),
            ]
        )
        return augment

    def setup(self, stage=None):
        num_subjects = len(self.subjects)
        num_train_subjects = int(round(num_subjects * self.train_val_ratio))
        num_val_subjects = num_subjects - num_train_subjects
        splits = num_train_subjects, num_val_subjects
        train_subjects, val_subjects = random_split(self.subjects, splits)

        self.preprocess = self.get_preprocessing_transform()
        augment = self.get_augmentation_transform()
        self.transform = tio.Compose([self.preprocess, augment])

        self.train_set = tio.SubjectsDataset(train_subjects, transform=self.transform)
        print(next(iter(self.train_set)))
        self.val_set = tio.SubjectsDataset(val_subjects, transform=self.preprocess)
        self.test_set = tio.SubjectsDataset(self.test_subjects, transform=self.preprocess)

    def train_dataloader(self):
        return DataLoader(self.train_set, self.batch_size, num_workers=2)

    def val_dataloader(self):
        return DataLoader(self.val_set, self.batch_size, num_workers=2)

    def test_dataloader(self):
        return DataLoader(self.test_set, self.batch_size, num_workers=2)

In [36]:
data = MedicalDecathlonDataModule(
    dataset_name="diaretdb1_v_1_1",
    batch_size=4,
    train_val_ratio=0.8,
    lesion="EX"
)

data.prepare_data()
data.setup()
print("Training:  ", len(data.train_set))
print("Validation: ", len(data.val_set))
print("Test:      ", len(data.test_set))

loader = data.train_dataloader()
# print(next(iter(loader)))
# batch = next(iter(loader))['image']['data'].shape
# print(batch)

Subject(Keys: ('image', 'label'); images: 2)
Training:   22
Validation:  6
Test:       61


# Lightning model

In [30]:
class Model(pl.LightningModule):
    def __init__(self, net, criterion, learning_rate, optimizer_class):
        super().__init__()
        self.lr = learning_rate
        self.net = net
        self.criterion = criterion
        self.optimizer_class = optimizer_class

    def configure_optimizers(self):
        optimizer = self.optimizer_class(self.parameters(), lr=self.lr)
        return optimizer

    def prepare_batch(self, batch):
        return batch["image"][tio.DATA], batch["label"][tio.DATA]

    def infer_batch(self, batch):
        x, y = self.prepare_batch(batch)
        y_hat = self.net(x)
        return y_hat, y

    def training_step(self, batch, batch_idx):
        y_hat, y = self.infer_batch(batch)
        loss = self.criterion(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        y_hat, y = self.infer_batch(batch)
        loss = self.criterion(y_hat, y)
        self.log("val_loss", loss)
        return loss

In [110]:
# teste:
import nibabel as nib
import matplotlib.pyplot as plt

# Load image
img = nib.load("C:\\Users\\LESC\\Downloads\\Task04_Hippocampus\\labelsTr\\hippocampus_001.nii")
data = img.get_fdata()
# data.shape



(35, 51, 35)

In [108]:
unet = monai.networks.nets.UNet(
    spatial_dims=2,
    in_channels=3,
    out_channels=2,
    channels=(8, 16, 32, 64),
    strides=(2, 2, 2),
)

model = Model(
    net=unet,
    criterion=monai.losses.DiceCELoss(softmax=True),
    learning_rate=1e-2,
    optimizer_class=torch.optim.AdamW,
)
early_stopping = pl.callbacks.early_stopping.EarlyStopping(
    monitor="val_loss",
)
trainer = pl.Trainer(
    gpus=0,
    precision='bf16',
    callbacks=[early_stopping]
)
trainer.logger._default_hp_metric = False

Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


# Training

In [109]:
start = datetime.now()
print("Training started at", start)
trainer.fit(model=model, datamodule=data)
print("Training duration:", datetime.now() - start)


  | Name      | Type       | Params
-----------------------------------------
0 | net       | UNet       | 41.0 K
1 | criterion | DiceCELoss | 0     
-----------------------------------------
41.0 K    Trainable params
0         Non-trainable params
41.0 K    Total params
0.164     Total estimated model params size (MB)


Training started at 2023-03-24 15:23:23.191020
                                                              

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [8, 3, 3, 3], but got 5-dimensional input of size [4, 3, 752, 576, 8] instead

In [None]:
%tensorboard --logdir lightning_logs

# Plot validation results

In [13]:
model.to("cpu")
all_dices = []
get_dice = monai.metrics.DiceMetric(include_background=False, reduction="none")
with torch.no_grad():
    for batch in data.val_dataloader():
        inputs, targets = model.prepare_batch(batch)
        logits = model.net(inputs.to(model.device))
        labels = logits.argmax(dim=1)
        labels_one_hot = torch.nn.functional.one_hot(labels).permute(0, 4, 1, 2, 3)
        get_dice(labels_one_hot.to(model.device), targets.to(model.device))
    metric = get_dice.aggregate()
    get_dice.reset()
    all_dices.append(metric)
all_dices = torch.cat(all_dices)

In [None]:
records = []
for ant, post in all_dices:
    records.append({"Dice": ant, "Label": "Anterior"})
    records.append({"Dice": post, "Label": "Posterior"})
df = pd.DataFrame.from_records(records)
ax = sns.stripplot(x="Label", y="Dice", data=df, size=10, alpha=0.5)
ax.set_title("Dice scores")

# Test

In [15]:
with torch.no_grad():
    for batch in data.test_dataloader():
        inputs = batch["image"][tio.DATA].to(model.device)
        labels = model.net(inputs).argmax(dim=1, keepdim=True).cpu()
        break
batch_subjects = tio.utils.get_subjects_from_batch(batch)
tio.utils.add_images_from_batch(batch_subjects, labels, tio.LabelMap)

In [None]:
for subject in batch_subjects:
    subject.plot()