# Devel
Developing code, snippets, debugging, etc.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../..')

### Model sizes
Report the size of different model architectures.

In [None]:
dl_model = deeplabv3_model(input_mode='grayscale')
dl_model.eval()
 
print(f'Number of parameters in DeepLabV3 model: {count_parameters(dl_model)}.')

un_model = UNet(n_channels=1, n_classes=1)
un_model.eval()

print(f'Number of parameters in UNet model: {count_parameters(un_model)}.')

In [None]:
# Predict on an image with each model.
img = Image.open(
    '/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/images/'
    '1023340_1280.png'
).convert('L')

img = Resize((256, 256))(img)
img = ToTensor()(img)
img = img.unsqueeze(0)

# Predict using deeplabV3 model.
dl_out = dl_model(img)['out']

print(
    f'Output of deeplab model is of size: {dl_out.shape}, min of '
    f'{dl_out.min():.3f}, and max of {dl_out.max():.3f}'
)

un_out = un_model(img)

print(
    f'Output of UNet model is of size: {un_out.shape}, min of '
    f'{un_out.min():.3f}, and max of {un_out.max():.3f}'
)

### Evaluation Code
When a model is trained we want some helper functions that can be used to get
a report of the model performance on a dataset or set of images.

To do:
* Given a dataset - CSV file, calculate a metric, e.g. DICE, for predictions on all those images
* To the function above, add an optional parameter for saving prediction masks for all those images
* Also just need a function that predicts the masks without calculating the metric, such as for dataets that have no ground truth labels

In [4]:
# Load a trained model.
import torch

model = deeplabv3_model(input_mode='rgb')

model.load_state_dict(torch.load(
    '/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/models/'
    'tissue-model-ml/best.pt'
))

<All keys matched successfully>

In [26]:
from typing import Union
import numpy as np
from tqdm.notebook import tqdm

import torch.nn as nn
from torch.utils.data import DataLoader

from neurotk import imwrite
from neurotk.utils import create_dirs, get_filename
from neurotk.torch.datasets import BinarySSDataset
from neurotk.torchvision.semantic_segmentation_transforms import (
    Compose, ToTensor, Resize
)

import pandas as pd
from pandas.core.frame import DataFrame

from os import makedirs
from os.path import join


def evaluate(
    model: nn.Module, data: Union[str, DataFrame], save_dir: str = '.',
    img_size: int = 512, input_mode: str = 'rgb', batch_size: int = 6,
    device: str = 'cpu', thr: float = 0.7, save_mask: bool = False
):
    """Evaluate a model on a dataset.
    
    Args:
        model: Model.
        data: Filepath to csv or the dataframe with the images to evaluate. The
            fp column contains the image filepath and label column contains the
            label mask images (optional).
        save_dir: Directory to save images to.
        img_size: Image size to use when predicting, images will be resized.
        input_mode: Use 'rgb' or 'grayscale' images.
        batch_size: Size of batches to use when predicting.
        device: Either cpu, cuda, or a specific cuda index (e.g. 0, 1, 2, etc.).
        thr: Threshold value for predictions for binary conversion.
        save_mask: Save binary prediction masks.
    
    """
    assert input_mode in ('grayscale', 'rgb')

    if device in ('cpu', 'cuda'):
        device = torch.device(device)
    else:
        device = torch.device(f'cuda:{device}')

    model.eval()
    model.to(device)

    makedirs(save_dir, exist_ok=True)
    
    transforms = Compose([ToTensor(), Resize((img_size, img_size))])

    if isinstance(data, str):
        data =  pd.read_csv(data)

    # Create dataloader.
    dataloader = DataLoader(
        BinarySSDataset(data, transforms=transforms), batch_size=batch_size, 
        shuffle=False
    )

    # Create subdirectories to same images.
    pred_dir = join(save_dir, 'predictions/masks')
    create_dirs([pred_dir])

    for batch in tqdm(dataloader):
        image, mask = batch['image'].to(device), batch['mask'].to(device)
        info = batch['info']

        with torch.set_grad_enabled(False):
            pred = model(image)['out']

            # Threshold the prediction.
            pred = pred.cpu().detach().numpy()

        pred = (pred > thr).astype(np.uint8) * 255

        if save_mask:
            # Loop through each image and save it.
            for i in range(pred.shape[0]):
                mask = pred[i][0]

                # Get file name.
                fn = get_filename(info['fp'][i])

                imwrite(join(pred_dir, fn + '.png'), mask)

    return 'Done'

    
temp = evaluate(
    model, 
    '/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/dataset-512.csv',
    save_dir='temp',
    device='0'
)
temp

  0%|          | 0/90 [00:00<?, ?it/s]

'Done'

### DeepLabV3: Best Hyperparams from WandDB Ray

In [7]:
from pandas import read_csv

df = read_csv('wandb_export_2023-09-18T10_43_58.816-04_00.csv')

# Report the best loss and the best dice metric*
# * checking if these are the same model
r1 = df.sort_values('epoch_loss').iloc[0]
print(f'Best model based on loss is {r1.Name}.')
print(f"   loss={r1.epoch_loss:.6f}   dice={r1['epoch_dice coefficient']:.3f}")

r2 = df.sort_values('epoch_dice coefficient', ascending=False).iloc[0]
print(f'\nBest model based on Dice is {r2.Name}.')
print(f"   loss={r2.epoch_loss:.6f}   dice={r2['epoch_dice coefficient']:.3f}")

print('\nChoosing the model base on loss, because the Dice metric is based on\n'
      'a single threshold while the loss does not use an arbitrary threshold.')
display(r1)

Best model based on loss is mild-violet-65.
   loss=0.002063   dice=0.961

Best model based on Dice is generous-sky-58.
   loss=0.003978   dice=0.975

Choosing the model base on loss, because the Dice metric is based on
a single threshold while the loss does not use an arbitrary threshold.


Name                                mild-violet-65
State                                     finished
Notes                                            -
User                                      dagutman
Tags                                           NaN
Created                   2023-09-15T22:20:13.000Z
Runtime                                       2739
Sweep                                          NaN
batch_size                                       8
epochs                                          30
lr                                        0.000228
epoch_dice coefficient                    0.960836
epoch_loss                                0.002063
Name: 15, dtype: object

### Trained Models Evaluation
Use trained semantic segmentation models, mainly DeepLabV3, to validate on datasets.

Report: metrics for the dataset, prediction masks, and overlays for visualization.

In [4]:
# Imports
import torch
import pandas as pd
from sklearn.model_selection import train_test_split

from neurotk.torch.models import deeplabv3_model
from neurotk.torch.validation import validate_semantic_segmentation

In [16]:
# Load a model with pre-trained weights.
model = deeplabv3_model(classes=1, input_mode='rgb')

model_fp = '/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/models/' + \
           'wandb-deeplab-tissue-model/best.pt'
           
data = pd.read_csv('/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/' + \
                   'datasets/tissue-dataset/dataset.csv')
data = train_test_split(data, test_size=0.2, random_state=64)[1]
           
model.load_state_dict(torch.load(model_fp))
_ = model.eval()

iou = validate_semantic_segmentation(
    model, data, device='cuda', sigmoid=False, thr=0.7,
    save_dir='/jcDataStore/Data/NeuroTK-Dash/ml-tissue-detection/'
             'models/wandb-deeplab-tissue-model/',
    save_figs=True
)

print(f'IoU: {iou}')

100%|██████████| 18/18 [00:17<00:00,  1.06it/s]

IoU: 0.9638580637595507



