# Preparations

## Data Mounting

In [1]:
# Check, with local module, whether runtime is colaboratory

try:  # local runtime
    import library_check
except ImportError:  # colab runtime
    library_check = None
    from google.colab import drive  # NOQA
    drive.mount('/content/drive')  # NOQA
    colaboratory = True
else:
    colaboratory = False

## Runtime Check

In [2]:
# System Information

import sys
import platform
print(f"OS version: \t\t{platform.platform()}\n"
      f"Python version:\t\t{sys.version}")

OS version: 		macOS-11.5.2-arm64-arm-64bit
Python version:		3.8.10 | packaged by conda-forge | (default, May 11 2021, 06:27:18) 
[Clang 11.1.0 ]


## Library Installation

In [3]:
# Install required libraries

if colaboratory:  # colab runtime
    !pip install pydicom
    # !pip install mxnet-cu101==1.7.0 d2l==0.16.6
    !git clone https://github.com/kdha0727/lung-opacity-and-covid-chest-x-ray-detection/
    %cd lung-opacity-and-covid-chest-x-ray-detection
    import library_check
    library_check.check()
    import data_prep_utils
    root = "/content/drive/Shareddrives/2021 하계 SAT/"
    data_prep_utils.set_root(root)
else:  # local runtime
    library_check.check()
    import data_prep_utils
data_prep_utils.init()

All required libraries are installed.


In [4]:
# After all installation, import all libraries used.

import inspect
import random
import pydicom as dcm
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import seaborn as sns

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torch
import torchvision
import torchsummary

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2


from timm.models.efficientnet import tf_efficientnet_b4
from effdet import get_efficientdet_config, DetBenchTrain
from effdet.efficientdet import HeadNet, BiFpn, _init_weight

from skimage import io, transform

In [5]:
# And, import custom-defined Lazy Data Wrappers and Utilities

from data_prep_utils import covid_19_radiography_dataset
from data_prep_utils import rsna_pneumonia_detection_challenge

from data_prep_utils.dataset import pil_loader, dicom_loader
import train_utils

# Data Analysis and Processing

* Note: All preprocessing processes are modularized as "Data Wrapper" package

**Class Information**
* Normal: 0
* Lung Opacity: 1
* COVID-19: 2
* Viral Pneumonia: 3

In [6]:
print('\n\t'.join(map(str, ['Labels: [', *covid_19_radiography_dataset.class_to_idx.items()])), '\n]')


Labels: [
	('Normal', 0)
	('Lung_Opacity', 1)
	('COVID', 2)
	('Viral Pneumonia', 3) 
]


# Modeling via PyTorch

In [7]:
plt.ion()

<matplotlib.pyplot._IonContext at 0x14f7c3370>

In [8]:
# Dataset Class Source Code

print(inspect.getsource(data_prep_utils.dataset.ImageWithPandas))
print(inspect.getsource(data_prep_utils.dataset.ImageFolder))

class ImageWithPandas(VisionDataset):
    """A generic data loader where the image path and label is given as pandas DataFrame.

    Args:
        dataframe (pandas.DataFrame): A data table that contains image path, target class,
            and extra outputs.
        label_id (string): Data frame`s image path label string.
        label_target (string): Data frame`s target class label string.
        label_extras (tuple[string] or string, optional): Data frame`s label that will
            be used for extra outputs.
        root (string, optional): Root directory path. Use unless data frame`s column
            contains file folders.
        extension (string, optional): An extension that will be concatenated after
            image file name. Use unless data frame`s column contains extension.
        class_to_idx (dict[str, int], optional): A mapping table that converts class
            label string into integer value. If not given, sorted index value will
            be used as cla

In [9]:
data_transform = torchvision.transforms.Compose([
    # torchvision.transforms.RandomResizedCrop(26),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.45],std=[0.225])
])

In [10]:

def get_train_transforms():
    return A.Compose(
        [
            # A.RandomSizedCrop(min_max_height=(1024, 1024), height=1024, width=1024, p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2,
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2,
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            # A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            # A.Resize(height=512, width=512, p=1),
            # A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

In [11]:
classification_dataset_1 = rsna_pneumonia_detection_challenge.torch_classification_dataset(data_transform)

classification_dataset_2 = covid_19_radiography_dataset.torch_classification_dataset(data_transform)

detection_dataset = rsna_pneumonia_detection_challenge.torch_detection_dataset(get_train_transforms())

In [12]:
# Make data loader from dataset

batch_size = 64

train_loader_cls_1 = torch.utils.data.DataLoader(classification_dataset_1,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=2)
train_loader_cls_2 = torch.utils.data.DataLoader(classification_dataset_2,
                                              batch_size=batch_size, shuffle=True,
                                              num_workers=2)

train_loader_det_1 = torch.utils.data.DataLoader(
        detection_dataset,
        batch_size=batch_size,
        shuffle=True,
        # pin_memory=False,
        num_workers=2
)

train_loaders = [train_loader_cls_1, train_loader_cls_2, train_loader_det_1]

## Design Model Architecture
* Base Model: EfficientNet

In [13]:
# from efficientnet_pytorch import EfficientNet
#
# # Shared Feature Extractor
# feature_extractor_depth = 10
# feature_extractor = EfficientNet.from_pretrained(f'efficientnet-b{feature_extractor_depth}', include_top=False)
# feature_extractor.out_channels = feature_extractor._bn1.num_features
#

In [14]:
# class Classifier(nn.Module):
#
#     def __init__(self, backbone, num_classes, out_channels=None, dropout_rate=0.2):
#         super().__init__()
#         out_channels = out_channels or backbone.out_channels
#         self.feature_extractor = backbone
#         self.dropout = nn.Dropout(dropout_rate)
#         self.fc = nn.Linear(out_channels, num_classes)
#
#     def forward(self, x):
#         x = self.feature_extractor(x)
#         x = self.dropout(x)
#         x = self.fc(x)
#         return x
#

In [15]:
detection_config = get_efficientdet_config('tf_efficientdet_d4')
detection_config.update(num_classes=4)

feature_backbone = tf_efficientnet_b4(
    pretrained=False,  # FIXME
    features_only=True,
    out_indices=(2, 3, 4),
    in_chans = 1,
    **detection_config.backbone_args
)
print(feature_backbone.feature_info)

<timm.models.features.FeatureInfo object at 0x157895040>


In [16]:

class EfficientDet(nn.Module):

    def __init__(self, config, backbone):
        super(EfficientDet, self).__init__()
        self.config = config
        self.backbone = backbone
        self.fpn = BiFpn(self.config, backbone.feature_info.get_dicts(keys=['num_chs', 'reduction']))
        self.class_net = HeadNet(self.config, num_outputs=config.num_classes)  # num_classes
        self.box_net = HeadNet(self.config, num_outputs=4)

        for n, m in self.named_modules():
            if 'backbone' not in n:
                _init_weight(m, n)

    @torch.jit.ignore()
    def toggle_head_bn_level_first(self):
        """ Toggle the head batchnorm layers between being access with feature_level first vs repeat
        """
        self.class_net.toggle_bn_level_first()
        self.box_net.toggle_bn_level_first()

    def forward(self, x):
        x = self.backbone(x)
        x = self.fpn(x)
        x_class = self.class_net(x)
        x_box = self.box_net(x)
        return x_class, x_box

    @torch.no_grad()
    def detect(self, x):
        return self.forward(x)[1]

    @torch.no_grad()
    def classify(self, x):
        return self.forward(x)[0]


net = EfficientDet(detection_config, feature_backbone)

In [17]:
plt.ioff()
plt.show()

In [18]:
from train_utils import Trainer, MultipleOptimizerHandler


class AdvancedFitter(Trainer):

    def __init__(
            self,
            model,
            config,
            opt_c,
            opt_d,
            epoch: int,
            train_iter = None,
            val_iter = None,
            test_iter = None,
            snapshot_dir = None,
            verbose: bool = True,
            timer: bool = False,
            log_interval = 20,
    ) -> None:

        self.train_iter = train_iter
        self.val_iter = val_iter
        self.test_iter = test_iter

        self.model = model
        self.config = config
        self.criterion = DetBenchTrain(model, config)
        self.optimizer = MultipleOptimizerHandler({'c': opt_c, 'd': opt_d})
        self.total_epoch: int = epoch
        self.snapshot_dir: pathlib.Path = pathlib.Path(snapshot_dir).resolve()
        self.verbose: bool = verbose
        self.use_timer: bool = timer
        self.log_interval: int = log_interval
        self.save_and_load: bool = bool(snapshot_dir is not None and val_iter is not None)

        # FIXME
        # self.train_batch_size: int = train_iter.batch_size
        # self.train_loader_length: int = len(train_iter)
        # self.train_dataset_length: int = len(getattr(train_iter, 'dataset', train_iter))

        super().__init__()

        # Do not set attribute of instance.
        print("Advanced Fitter Initialized.")

    def _train(self):

        self._require_context()

        self.model.train()

        verbose = self.verbose
        log_interval = self.log_interval

        total_loss = total_accuracy = 0.
        total_batch = 0
        det_loss = 0.
        det_batch = 0

        datasets = self.train_iter

        for data in datasets:

            whole = len(data)
            for iteration, (images, targets) in enumerate(data, 1):

                if isinstance(targets, dict):
                    l = self._train_detection(images, targets)
                    det_loss += l; det_batch += 1
                    if iteration % log_interval == 0 and verbose:
                        self._log_train_doing(l, iteration, whole)

                else:
                    l, a = self._train_classification(images, targets)
                    total_loss += l; total_accuracy += a; total_batch += 1
                    if iteration % log_interval == 0 and verbose:
                        self._log_train_doing(l, iteration, whole)

        avg_loss = total_loss / total_batch
        avg_accuracy = total_accuracy / total_batch

        self._log_train_done(avg_loss, avg_accuracy)

        det_avg_loss = det_loss / det_batch

        self._log_train_done(det_avg_loss)

        return avg_loss, avg_accuracy

    def _train_classification(self, images, targets):

        images = self._to_apply_tensor(images).float()
        prediction = self.model(images)[0]
        loss = F.binary_cross_entropy_with_logits(prediction, targets)

        with torch.no_grad():
            l = loss.item()
            a = torch.eq(torch.argmax(prediction, 1), targets).float().mean().item()

        loss.backward()

        optimizer = self.optimizer['c']
        optimizer.step()
        optimizer.zero_grad()

        return l, a

    def _train_detection(self, images, targets):

        images = self._to_apply_tensor(images).float()
        boxes = [self._to_apply_tensor(target['boxes']).float() for target in targets]
        labels = [self._to_apply_tensor(target['labels']).float() for target in targets]

        loss, _, _ = self.criterion(images, boxes, labels)

        loss.backward()

        optimizer = self.optimizer['d']
        optimizer.step()
        optimizer.zero_grad()

        return loss.item()

    @torch.no_grad()
    def _evaluate(self, *, test=False):

        self.model.eval()

        datasets = self.test_iter if test else self.val_iter

        total_loss = total_accuracy = 0.
        total_batch = 0
        det_loss = 0.
        det_batch = 0

        for data in datasets:

            for images, targets in data:

                if isinstance(targets, dict):
                    l = self._eval_detection(images, targets)
                    det_loss += l; det_batch += 1

                else:
                    l, a = self._eval_classification(images, targets)
                    total_loss += l; total_accuracy += a; total_batch += 1

        avg_loss = total_loss / total_batch
        avg_accuracy = total_accuracy / total_batch

        self._log_eval(avg_loss, avg_accuracy, test=test)

        det_avg_loss = det_loss / det_batch

        self._log_eval(det_avg_loss, test=test)

        return avg_loss, avg_accuracy

    def _eval_classification(self, images, targets):

        images = self._to_apply_tensor(images).float()
        prediction = self.model(images)[0]
        loss = F.binary_cross_entropy_with_logits(prediction, targets)

        l = loss.item()
        a = torch.eq(torch.argmax(prediction, 1), targets).float().mean().item()

        return l, a

    def _eval_detection(self, images, targets):

        images = self._to_apply_tensor(images).float()
        boxes = [self._to_apply_tensor(target['boxes']).float() for target in targets]
        labels = [self._to_apply_tensor(target['labels']).float() for target in targets]

        loss, _, _ = self.criterion(images, boxes, labels)

        return loss.item()

In [19]:
num_epochs = 3

lr = 0.0002

optimizer = torch.optim.AdamW(net.parameters(), lr=lr)

In [None]:
fitter = AdvancedFitter(
    net, detection_config, optimizer, optimizer,
    num_epochs,
    train_iter = train_loaders,
    val_iter = train_loaders,
    snapshot_dir = 'snapshots',
    verbose=True,
    timer=True,
)
fitter.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
with fitter:
    fitter.run()


Advanced Fitter Initialized.

<Start Learning> 				Total 3 epochs

Epoch 1
