In [1]:
import torch
import pandas as pd
import numpy as np

from models.setup import ModelSetup
from models.build import create_multimodal_rcnn_model
from data.load import get_datasets, get_dataloaders

from utils.init import reproducibility, clean_memory_get_device
from data.constants import DEFAULT_REFLACX_LABEL_COLS, XAMI_MIMIC_PATH

## Suppress the assignement warning from pandas.r
pd.options.mode.chained_assignment = None  # default='warn'

## Supress user warning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

%matplotlib inline

In [2]:
# clean gpu memory and tell pytorch to use deterministic algorithm.
device = clean_memory_get_device()
reproducibility()

This notebook will running on device: [CUDA]


# Parameters setup

In [3]:
use_iobb = True
io_type_str = "IoBB" if use_iobb else "IoU"
labels_cols = DEFAULT_REFLACX_LABEL_COLS
iou_thrs = np.array([0.5])

common_args = {
    "save_early_stop_model": True,
    "optimiser": "sgd",
    "lr": 1e-3,
    "weight_decay": 1e-5,
    "image_backbone_pretrained": True,
    "fixation_backbone_pretrained": True,
    "record_training_performance": True,
    "image_size": 256,
    "batch_size": 4,
    "warmup_epochs": 0,
    "lr_scheduler": "ReduceLROnPlateau",
    "reduceLROnPlateau_factor": 0.1,
    "reduceLROnPlateau_patience": 999,
    "reduceLROnPlateau_full_stop": True,
    "multiStepLR_milestones": 100,
    "multiStepLR_gamma": 0.1,
    "use_mask": True,
    "gt_in_train_till": 999,
    "box_head_dropout_rate": 0,
    "measure_test": True,
}

fusion_add_args = {
    "fuse_depth": 0,
    "fusion_residule": False,
    "fusion_strategy": "add", 
}

small_model_args = {
    "mask_hidden_layers": 64,
    "fuse_conv_channels": 64,
    "representation_size": 64, 
    "backbone_out_channels": 64,
}

mobilenet_args = {
    "backbone": "mobilenet_v3",
    "using_fpn": False,
}


# [TODO]: clean the model setup for fixation map.
model_setup = ModelSetup(
        name="forward_testing_model",
        use_fixations=True,
        **mobilenet_args,
        **small_model_args,
        **common_args,
        **fusion_add_args,
    )


# Initiate datasets and dataloaders
The batch size is also defined in this section. For testing purpose, we only set it as 2.

In [4]:
dataset_params_dict = {
    "XAMI_MIMIC_PATH": XAMI_MIMIC_PATH,
    "bbox_to_mask": model_setup.use_mask,
    "labels_cols": DEFAULT_REFLACX_LABEL_COLS,
    "with_fixation": True
}

detect_eval_dataset, train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_params_dict=dataset_params_dict
)

train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
    train_dataset, val_dataset, test_dataset, batch_size=1
)

## Example instance from dataset:
Inside each instance we have:

- Images
- Fixation heatmaps
- Targets (Dictionary)

And, inside the target, there're:

- boxes (bounding boxes of abnormality)
- label (disease index (Note: the class **0** means the background))
- image_id (idx to get that image)
- area (the areas that bouding boxes contain)
- iscrowd (if it's a place with multiple bouding boxes, we assume all the the bouding boxes are not crowd.)

In [5]:
# sizes of train, val and test are correct

# 
print(f'Size of a training instance - images, fixations, targets = {len(train_dataset[0])}')

# 
print(f'Each image has {len(train_dataset[0][0])} channels ')

Size of a training instance - images, fixations, targets = 3
Each image has 3 channels 


In [6]:
# targets
train_dataset[0][2]

{'boxes': tensor([], size=(0, 4), dtype=torch.float64),
 'labels': tensor([], dtype=torch.int64),
 'image_id': tensor([0]),
 'area': tensor([], dtype=torch.float64),
 'iscrowd': tensor([], dtype=torch.int64),
 'dicom_id': '34cedb74-d0996b40-6d218312-a9174bea-d48dc033',
 'image_path': 'D:\\XAMI-MIMIC\\patient_18111516\\CXR-JPG\\s55032240\\34cedb74-d0996b40-6d218312-a9174bea-d48dc033.jpg',
 'fixations_path': 'D:\\XAMI-MIMIC\\patient_18111516\\REFLACX\\P102R108387\\fixations.csv',
 'masks': tensor([], size=(0, 3056, 2544), dtype=torch.uint8)}

# Define model

In [7]:
model = create_multimodal_rcnn_model(
    labels_cols,
    model_setup,
    rpn_nms_thresh=0.3,
    box_detections_per_img=10,
    box_nms_thresh=0.2,
    rpn_score_thresh=0.0,
    box_score_thresh=0.05,
)

model.to(device)


Using pretrained backbone. mobilenet_v3
Using pretrained backbone. mobilenet_v3
forward_testing_model will use mask, [64] layers.


MultimodalMaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
    

# Prepare data to feed
We prepare three main data to test the model:

- CXR image
- Fixation heatmaps
- Target

And, for each data, we adjust the format to what the model expect.

In [8]:
data = next(iter(train_dataloader))
data = train_dataset.prepare_input_from_data(data, device)

In [9]:
data[:-1]

([tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
           [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
           ...,
           [0.7686, 0.7725, 0.7451,  ..., 0.8667, 0.8627, 0.8588],
           [0.7725, 0.7725, 0.7490,  ..., 0.8706, 0.8627, 0.8627],
           [0.7725, 0.7686, 0.7529,  ..., 0.8588, 0.8471, 0.8510]],
  
          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
           [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
           ...,
           [0.7686, 0.7725, 0.7451,  ..., 0.8667, 0.8627, 0.8588],
           [0.7725, 0.7725, 0.7490,  ..., 0.8706, 0.8627, 0.8627],
           [0.7725, 0.7686, 0.7529,  ..., 0.8588, 0.8471, 0.8510]],
  
          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039],
           [0.0039, 0.

# Test Feedforward (Training)

In [10]:
model.train()

loss_dict, outputs = model(*data[:-1], targets=data[-1])

In [11]:
loss_dict

{'loss_classifier': tensor(1.3321, device='cuda:0', grad_fn=<NllLossBackward0>),
 'loss_box_reg': tensor(0., device='cuda:0', grad_fn=<DivBackward0>),
 'loss_mask': tensor(0., device='cuda:0', grad_fn=<MulBackward0>),
 'loss_objectness': tensor(0.6932, device='cuda:0',
        grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_rpn_box_reg': tensor(0., device='cuda:0', grad_fn=<DivBackward0>)}

In [12]:
images, fixations, targets = data

In [13]:
original_image_sizes= []
for img in images:
    val = img.shape[-2:]
    assert len(val) == 2
    original_image_sizes.append((val[0], val[1]))

images, targets = model.transform(images, targets)

img_features = model.backbone(images.tensors)

print(img_features.shape)

torch.Size([1, 64, 8, 8])


# Results
Four different losses are given in the output

We will use these losses to optimise the network while training



## Test Feedforward

### Detection.

A detection contain *boxes*, *lables*, and *scores*.

- *boxes*: All the bounding boxes for this image. 
- *labels*: Labels corresponded to the bounding boxes.
- *score*: Score (Confidence) for each boudning box.

In [14]:
loss_dict, outputs

({'loss_classifier': tensor(1.3321, device='cuda:0', grad_fn=<NllLossBackward0>),
  'loss_box_reg': tensor(0., device='cuda:0', grad_fn=<DivBackward0>),
  'loss_mask': tensor(0., device='cuda:0', grad_fn=<MulBackward0>),
  'loss_objectness': tensor(0.6932, device='cuda:0',
         grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
  'loss_rpn_box_reg': tensor(0., device='cuda:0', grad_fn=<DivBackward0>)},
 [{'boxes': tensor([[ 587.8787,  740.5051, 1699.9360, 1174.5237],
           [ 842.5342,  537.4828, 1411.8909,  725.8585],
           [ 881.6542, 1784.5619, 1420.1460, 2539.0000],
           [   0.0000,  195.6173,  270.5373, 1069.7511],
           [ 494.5350,    9.1685, 1033.6853,  775.6381],
           [  35.8862,    0.0000, 3050.0000, 2518.8296],
           [1626.4299,  535.4458, 2202.1589,  737.1761],
           [ 100.1350,  529.7561,  651.8814,  733.9246],
           [   0.0000,   85.5293,  130.5414,  517.6898],
           [1219.4171,  538.9279, 1783.7306,  732.5659]], device='cud

# Toy example

In [15]:
example_image_input = [torch.randn(( 3, 512, 512)).to(device)]
example_fixations_input = [torch.randn(( 3, 512, 512)).to(device)]

In [16]:
model.eval()

model(example_image_input, fixations=example_fixations_input)

({},
 [{'boxes': tensor([[8.1641e+01, 1.5629e-01, 1.7483e+02, 2.1994e+01],
           [1.0299e+02, 2.7746e-01, 2.8554e+02, 4.6092e+01],
           [2.0970e+02, 1.3748e-01, 3.0301e+02, 2.2135e+01],
           [3.2266e-01, 1.3106e-01, 4.6674e+01, 2.2030e+01],
           [2.7398e+02, 1.4030e-01, 3.6709e+02, 2.2213e+01],
           [4.0171e+02, 4.2666e+02, 4.9510e+02, 4.7021e+02],
           [4.0143e+02, 3.6310e+02, 4.9460e+02, 4.0608e+02],
           [2.4051e-01, 1.4798e+00, 4.6261e+01, 1.5444e+02],
           [3.5861e+02, 6.2046e-01, 5.1200e+02, 4.5563e+01],
           [2.3159e+02, 1.9150e+01, 4.1401e+02, 1.1025e+02]], device='cuda:0',
          grad_fn=<StackBackward0>),
   'labels': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'),
   'scores': tensor([0.2850, 0.2849, 0.2843, 0.2842, 0.2841, 0.2833, 0.2821, 0.2807, 0.2795,
           0.2795], device='cuda:0', grad_fn=<IndexBackward0>),
   'masks': tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
             [0.

# Try to run with training function

In [17]:
from models.dynamic_loss import DynamicWeightedLoss
from utils.train import get_optimiser, get_lr_scheduler, print_params_setup
from utils.engine import xami_train_one_epoch, xami_evaluate, get_iou_types
from  datetime import datetime

In [18]:
loss_keys = [
    "loss_classifier",
    "loss_box_reg",
    "loss_objectness",
    "loss_rpn_box_reg",
]

dynamic_loss_weight = DynamicWeightedLoss(
    keys=loss_keys + ["loss_mask"] if model_setup.use_mask else loss_keys
)
dynamic_loss_weight.to(device)
print_params_setup(model)

params = [p for p in model.parameters() if p.requires_grad]
if dynamic_loss_weight:
    params += [p for p in dynamic_loss_weight.parameters() if p.requires_grad]

iou_types = get_iou_types(model, model_setup)
optimizer = get_optimiser(params, model_setup)
lr_scheduler = get_lr_scheduler(optimizer, model_setup)

current_time = datetime.now()

[model]: 4,749,539
[model.backbone]: 1,258,848
[model.rpn]: 41,803
[model.roi_heads]: 2,190,040
[model.roi_heads.box_head]: 204,928
[model.roi_heads.box_head.fc6]: 200,768
[model.roi_heads.box_head.fc7]: 4,160
[model.roi_heads.box_predictor]: 1,300
[model.roi_heads.mask_head]: 1,917,952
Using SGD as optimizer with lr=0.001


In [19]:
model.train()

train_loger = xami_train_one_epoch(
    model=model,
    optimizer=optimizer,
    data_loader=train_dataloader,
    device=device,
    epoch=0,
    print_freq=10,
    iou_types=iou_types,
    coco=None,
    score_thres=None,
    evaluate_on_run=False,
    params_dict=None,
    dynamic_loss_weight=dynamic_loss_weight,
)

Epoch: [0]  [   0/2122]  eta: 0:44:20  lr: 0.001000  loss: 2.0230 (2.0230)  loss_classifier: 1.3305 (1.3305)  loss_box_reg: 0.0000 (0.0000)  loss_mask: 0.0000 (0.0000)  loss_objectness: 0.6925 (0.6925)  loss_rpn_box_reg: 0.0000 (0.0000)  time: 1.2536  data: 0.5017  max mem: 1459
Epoch: [0]  [  10/2122]  eta: 0:20:17  lr: 0.001000  loss: 1.9991 (2.2360)  loss_classifier: 1.2492 (1.2381)  loss_box_reg: 0.0000 (0.0069)  loss_mask: 0.0000 (0.2956)  loss_objectness: 0.6921 (0.6921)  loss_rpn_box_reg: 0.0000 (0.0033)  time: 0.5763  data: 0.3409  max mem: 1816
Epoch: [0]  [  20/2122]  eta: 0:18:49  lr: 0.001000  loss: 1.9413 (2.0940)  loss_classifier: 1.0634 (1.0087)  loss_box_reg: 0.0000 (0.0223)  loss_mask: 0.0000 (0.3673)  loss_objectness: 0.6915 (0.6911)  loss_rpn_box_reg: 0.0000 (0.0044)  time: 0.5014  data: 0.3177  max mem: 1816
Epoch: [0]  [  30/2122]  eta: 0:18:20  lr: 0.001000  loss: 1.3928 (1.7779)  loss_classifier: 0.3050 (0.7387)  loss_box_reg: 0.0143 (0.0282)  loss_mask: 0.3319 (