In [1]:
# Here we take care of paths.

from pathlib import Path
import os
print('Starting path:' + os.getcwd())
if os.getcwd()[-18:] == 'VESUVIUS_Challenge':
    pass
else:
    PATH = Path().resolve().parents[0]
    os.chdir(PATH)

# make sure you are in Paragraph_to_Tex folder
print('Current path:' + os.getcwd())

Starting path:/Users/gregory/PROJECT_ML/VESUVIUS_Challenge/jupyter notebooks
Current path:/Users/gregory/PROJECT_ML/VESUVIUS_Challenge


In [2]:
from collections import defaultdict
from io import StringIO
from pathlib import Path
from typing import Tuple

import lovely_numpy as ln
import monai
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL.Image as Image
import pytorch_lightning as pl
import seaborn as sns
import torch
from monai.data import CSVDataset
from monai.data import DataLoader
from monai.inferers import sliding_window_inference
from monai.visualize import matshow3d
from torchmetrics import Dice
from torchmetrics import MetricCollection
from tqdm.auto import tqdm

2023-04-18 17:07:00,198 - Created a temporary directory at /var/folders/wc/60y8v25x3ns_jgsx6clbdb180000gn/T/tmpz8stxmjr
2023-04-18 17:07:00,204 - Writing /var/folders/wc/60y8v25x3ns_jgsx6clbdb180000gn/T/tmpz8stxmjr/_remote_module_non_scriptable.py


In [3]:
KAGGLE_DIR = PATH / "kaggle"

INPUT_DIR = KAGGLE_DIR / "input"

COMPETITION_DATA_DIR = INPUT_DIR / "vesuvius-challenge-ink-detection"

TRAIN_DATA_CSV_PATH = COMPETITION_DATA_DIR / "data_train_1.0.csv"
TEST_DATA_CSV_PATH = COMPETITION_DATA_DIR / "data_test_1.0.csv"



In [4]:

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# scroll_1 size = 8181, 6330
# scroll_2 size = 14830, 9506
# scroll_3 size = 7606, 5249



class MONAI_CSV_Scrolls_Dataset(pl.LightningDataModule):

    def __init__(self,
                 patch_size = 512,
                 z_start = 0,
                 z_dim = 64,
                 shared_height = None,
                 downsampling =None,
                 train__fragment_id = [1],
                 val_fragment_id = [3],
                 stage = 'train',
                 batch_size=1,
                 num_samples = 1,
                 num_workers =0 ,
                 on_gpu= False,
                 data_csv_path =None,


                 ):
        super().__init__()
        self.save_hyperparameters()
    
        self.df = pd.read_csv(data_csv_path)
        self.keys = ("volume_npy", "mask_npy", "label_npy")
        self.train_transform = self.train_transforms()
        self.val_transform = self.val_transforms()
        self.predict_transform = self.predict_transforms()
        
        
        
    def setup(self, stage=None):
        if stage == "fit" or stage is None:
            train_val_df = self.df[self.df.stage == "train"].reset_index(drop=True)

            train_df = train_val_df[
                train_val_df.fragment_id != self.hparams.val_fragment_id
            ].reset_index(drop=True)

            val_df = train_val_df[
                train_val_df.fragment_id == self.hparams.val_fragment_id
            ].reset_index(drop=True)

            self.train_dataset = self._dataset(train_df, self.train_transform)
            self.val_dataset = self._dataset(val_df, self.val_transform)

            print(f"# train: {len(self.train_dataset)}")
            print(f"# val: {len(self.val_dataset)}")

        if stage == "predict" or stage is None:
            predict_df = self.df[self.df.stage == "test"].reset_index(drop=True)
            self.predict_dataset = self._dataset(predict_df, self.predict_transform)

            
    def _dataset(self, df, transform):
        return CSVDataset(
            src=df,
            transform=transform,
        )
    
    
    
    def train_transforms_old(self):
        return monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(
                    keys="volume_npy",
                ),
                monai.transforms.LoadImaged(
                    keys=("mask_npy", "label_npy"),
                    ensure_channel_first=True,
                ),
               
            ]
        )
        
    def train_transforms_old(self):
        return monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(
                    keys="volume_npy",
                ),
                monai.transforms.LoadImaged(
                    keys=("mask_npy", "label_npy"),
                    ensure_channel_first=True,
                ),
                monai.transforms.RandWeightedCropd(
                    keys=self.keys,
                    spatial_size=self.hparams.patch_size,
                    num_samples=self.hparams.num_samples,
                    w_key="mask_npy",
                ),
                monai.transforms.RandFlipd(
                    keys=self.keys,
                    prob=0.5,
                    spatial_axis=0,
                ),
                monai.transforms.RandFlipd(
                    keys=self.keys,
                    prob=0.5,
                    spatial_axis=1,
                ),
            ]
        )
        

    def val_transforms(self):
        return monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(
                    keys="volume_npy",
                ),
                monai.transforms.LoadImaged(
                    keys=("mask_npy", "label_npy"),
                    ensure_channel_first=True,
                ),
            ]
        )
    
    
    def predict_transforms(self):
        return monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(
                    keys="volume_npy",
                ),
                monai.transforms.LoadImaged(
                    keys="mask_npy",
                    ensure_channel_first=True,
                ),
            ]
        )
    

        

    def train_dataloader(self):
        return self._dataloader(self.train_dataset, train=True)

    def val_dataloader(self):
        return self._dataloader(self.val_dataset)

    def predict_dataloader(self):
        return self._dataloader(self.predict_dataset)
    

    def _dataloader(self, dataset, train=False):
        return DataLoader(
            dataset,
            batch_size=self.hparams.batch_size,
            shuffle=train,
            num_workers=self.hparams.num_workers,
        )



In [5]:
def visualize_dataloaders(dataloaders, train=True):
    for stage, dataloader in dataloaders.items():
        for batch_idx, batch in enumerate(dataloader):
            volumes = batch["volume_npy"]
            masks = batch["mask_npy"]
            
            if train:
                labels = batch["label_npy"]
            else: 
                labels = masks
                
            for volume, mask, label in zip(volumes, masks, labels):
                fig, axes = plt.subplots(1, 3, figsize=(15, 5))
                plt.suptitle(f"stage: {stage}, fragment: {batch_idx}")

                for idx, image in enumerate((volume, mask, label)):
                    matshow3d(
                        volume=image,
                        fig=axes[idx],
                        title=f"{list(image.shape)}, {image.min().item()}, {image.max().item()}",
                        vmin=0.0,
                        vmax=1.0,
                        every_n=2,
                        fill_value=1.0,
                        margin=4,
                        cmap="gray",
                    )

In [6]:
data_module = MONAI_CSV_Scrolls_Dataset(
    batch_size=1,
    data_csv_path=TRAIN_DATA_CSV_PATH,
    num_workers=0,
    num_samples=1,
    patch_size=128,
    val_fragment_id=3,
)

data_module.setup(stage="fit")

dataloaders = {
    "train": data_module.train_dataloader(),
    "val": data_module.val_dataloader(),
}

visualize_dataloaders(dataloaders)

# train: 2
# val: 1


RuntimeError: applying transform <monai.transforms.compose.Compose object at 0x29e414f10>

In [7]:
dataloader = data_module.train_dataloader()
for batch_idx, batch in enumerate(dataloader):
            volumes = batch["volume_npy"]
            masks = batch["mask_npy"]
            print(volumes)

RuntimeError: applying transform <monai.transforms.compose.Compose object at 0x29e414f10>

In [None]:
data_module.train_dataset[0]

In [None]:
transform = monai.transforms.Compose(
            [
                monai.transforms.LoadImaged(
                    keys="volume_npy",
                ),
                monai.transforms.LoadImaged(
                    keys=("mask_npy", "label_npy"),
                    ensure_channel_first=True,
                ),
                monai.transforms.RandWeightedCropd(
                    keys=("volume_npy", "mask_npy", "label_npy"),
                    spatial_size=(512,512),
                    num_samples=3,
                    w_key="mask_npy",
                ),
                monai.transforms.RandFlipd(
                    keys=("volume_npy", "mask_npy", "label_npy"),
                    prob=0.5,
                    spatial_axis=0,
                ),
                monai.transforms.RandFlipd(
                    keys=("volume_npy", "mask_npy", "label_npy"),
                    prob=0.5,
                    spatial_axis=1,
                ),
            ]
        )
        

In [None]:
df = pd.read_csv(TRAIN_DATA_CSV_PATH)
dataset =  CSVDataset(
            src=df,
            transform=transform,
        )

In [None]:
dataset[0][0]['label_npy'].shape