# Preparations

## Data Mounting

In [1]:
# Check, with local module, whether runtime is colaboratory

try:  # local runtime
    import library_check
except ImportError:  # colab runtime
    library_check = None
    from google.colab import drive  # NOQA
    drive.mount('/content/drive')  # NOQA
    colaboratory = True
else:
    colaboratory = False

## Runtime Check

In [2]:
# System Information

import sys
import platform
print(f"OS version: \t\t{platform.platform()}\n"
      f"Python version:\t\t{sys.version}")

OS version: 		macOS-11.5.2-arm64-arm-64bit
Python version:		3.8.10 | packaged by conda-forge | (default, May 11 2021, 06:27:18) 
[Clang 11.1.0 ]


## Library Installation

In [3]:
# Install required libraries

if colaboratory:  # colab runtime
    # !pip install pydicom albumentations==0.4.6 efficientnet_pytorch effdet
    # !pip install mxnet-cu101==1.7.0 d2l==0.16.6
    !git clone https://github.com/kdha0727/lung-opacity-and-covid-chest-x-ray-detection/
    %cd lung-opacity-and-covid-chest-x-ray-detection
    !pip install -r requirements.txt
    import library_check
    library_check.check()
    import data_prep_utils
    root = "/content/drive/Shareddrives/2021 하계 SAT/"
    data_prep_utils.set_root(root)
else:  # local runtime
    library_check.check()
    import data_prep_utils
data_prep_utils.init()

All required libraries are installed.


In [4]:
# After all installation, import all libraries used.

import inspect
import random
import pydicom as dcm
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import seaborn as sns
import os

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torch
import torchvision
import torchsummary

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from timm.models.efficientnet import tf_efficientnet_b4
from effdet import get_efficientdet_config, DetBenchTrain
from effdet.efficientdet import HeadNet, BiFpn, _init_weight

from skimage import io, transform

In [5]:
# And, import custom-defined Lazy Data Wrappers and Utilities

from data_prep_utils import covid_19_radiography_dataset
from data_prep_utils import rsna_pneumonia_detection_challenge

from data_prep_utils.dataset import pil_loader, dicom_loader
import train_utils

# Data Analysis and Processing

* Note: All preprocessing processes are modularized as "Data Wrapper" package

**Class Information**
* Normal: 0
* Lung Opacity: 1
* COVID-19: 2
* Viral Pneumonia: 3

In [6]:
print('\n\t'.join(map(str, ['Labels: [', *covid_19_radiography_dataset.class_to_idx.items()])), '\n]')


Labels: [
	('Normal', 0)
	('Lung_Opacity', 1)
	('COVID', 2)
	('Viral Pneumonia', 3) 
]


# Modeling via PyTorch


In [7]:
# Dataset Class Source Code

print(inspect.getsource(data_prep_utils.dataset.ImageWithPandas))
print(inspect.getsource(data_prep_utils.dataset.ImageFolder))

class ImageWithPandas(VisionDataset):
    """A generic data loader where the image path and label is given as pandas DataFrame.

    Args:
        dataframe (pandas.DataFrame): A data table that contains image path, target class,
            and extra outputs.
        label_id (string): Data frame`s image path label string.
        label_target (string): Data frame`s target class label string.
        label_extras (tuple[string] or string, optional): Data frame`s label that will
            be used for extra outputs.
        root (string, optional): Root directory path. Use unless data frame`s column
            contains file folders.
        extension (string, optional): An extension that will be concatenated after
            image file name. Use unless data frame`s column contains extension.
        class_to_idx (dict[str, int], optional): A mapping table that converts class
            label string into integer value. If not given, sorted index value will
            be used as cla

In [8]:
data_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize(256),
    torchvision.transforms.RandomHorizontalFlip(),
    # torchvision.transforms.Normalize(mean=[0.45],std=[0.225])
])

In [9]:

def get_train_transforms():
    return A.Compose(
        [
            # A.RandomSizedCrop(min_max_height=(1024, 1024), height=1024, width=1024, p=0.5),
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2, p=0.9),
            # A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            # A.VerticalFlip(p=0.5),
            A.Resize(height=256, width=256, p=1),
            # A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=256, width=256, p=1.0),
            ToTensorV2(p=1.0),
        ],
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

In [10]:
classification_dataset_1 = rsna_pneumonia_detection_challenge.torch_classification_dataset(data_transform)

classification_dataset_2 = covid_19_radiography_dataset.torch_classification_dataset(data_transform)

detection_dataset = rsna_pneumonia_detection_challenge.torch_detection_dataset(get_train_transforms())


In [11]:
# Make data loader from dataset

from data_prep_utils.samplers import ImbalancedDatasetSampler

batch_size = 1

train_loader_cls_1 = torch.utils.data.DataLoader(classification_dataset_1,
                                                 sampler=ImbalancedDatasetSampler(classification_dataset_1),
                                                 batch_size=batch_size,
                                                 # shuffle=True,
                                                 num_workers=2)
train_loader_cls_2 = torch.utils.data.DataLoader(classification_dataset_2,
                                                 sampler=ImbalancedDatasetSampler(classification_dataset_2),
                                                 batch_size=batch_size,
                                                 # shuffle=True,
                                                 num_workers=2)

train_loader_det_1 = torch.utils.data.DataLoader(
        detection_dataset,
        batch_size=2,
        shuffle=True,
        # pin_memory=False,
        num_workers=2
)

train_loader_det_1_prime = rsna_pneumonia_detection_challenge.torch_detection_dataset(
    get_train_transforms(),
    batch_size=4,
    shuffle=True,
    # pin_memory=False,
    num_workers=2
)

train_loaders = [
    train_loader_cls_1,
    # train_loader_cls_2,
    train_loader_det_1_prime
]

## Design Model Architecture
* Base Model: EfficientNet

In [12]:
from models.efficientnet import get_efficientnet_backbone

efficientnet_backbone = get_efficientnet_backbone(
    depth=4, in_channels=1, image_size=None,
    pretrained=colaboratory,  # FIXME: set this as true
)

In [13]:
class Classifier(nn.Module):

    def __init__(self, backbone, num_classes, out_channels=None, dropout_rate=0.2):
        super().__init__()
        out_channels = out_channels or backbone.out_channels
        self.feature_extractor = backbone
        self.dropout = nn.Dropout(dropout_rate)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(out_channels, num_classes)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.dropout(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x


def get_classifier(backbone, num_classes):
    return Classifier(backbone, num_classes)


In [14]:
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchvision.models.detection.transform import GeneralizedRCNNTransform


def get_detector(backbone, num_classes=2, max_size=800, min_size=1333, **kwargs):

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))
    detector = FasterRCNN(backbone, num_classes, rpn_anchor_generator=anchor_generator, **kwargs)
    detector.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean=[0.485], image_std=[0.229])
    return detector


In [15]:
model_c = get_classifier(efficientnet_backbone, 4)
model_d = get_detector(efficientnet_backbone)


In [16]:
num_epochs = 3

opt_c = torch.optim.AdamW(model_c.parameters(), lr=0.00005)
opt_d = torch.optim.AdamW(model_d.parameters(), lr=0.0001)


In [17]:
from torch.utils.data import ConcatDataset

dset = ConcatDataset([
    classification_dataset_1,
    classification_dataset_2
])


In [18]:
from train_utils import AdvancedFitter

fitter = AdvancedFitter(
    model_c, model_d,
    opt_c, opt_d,
    num_epochs,
    train_iter = train_loaders,
    val_iter = train_loaders,
    snapshot_dir = 'snapshots',
    verbose=True,
    timer=True,
    log_interval=1,
)
fitter.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
with fitter:
    fitter.run()


Advanced Fitter Initialized.

<Start Learning> 				Total 1 epochs

Epoch 1
12
52
11
68
81
[Train]	 Progress: 1/100 (01.00%), 	Loss: 1.285691 88
[Train]	 Progress: 2/100 (02.00%), 	Loss: 1.531008 54
[Train]	 Progress: 3/100 (03.00%), 	Loss: 1.437010 24
[Train]	 Progress: 4/100 (04.00%), 	Loss: 1.264676 40
[Train]	 Progress: 5/100 (05.00%), 	Loss: 1.300494 5
[Train]	 Progress: 6/100 (06.00%), 	Loss: 1.169945 84
[Train]	 Progress: 7/100 (07.00%), 	Loss: 1.139261 26
[Train]	 Progress: 8/100 (08.00%), 	Loss: 1.089011 23
[Train]	 Progress: 9/100 (09.00%), 	Loss: 1.114672 44
[Train]	 Progress: 10/100 (10.00%), 	Loss: 1.100678 34
[Train]	 Progress: 11/100 (11.00%), 	Loss: 1.144141 49
[Train]	 Progress: 12/100 (12.00%), 	Loss: 0.911792 70
[Train]	 Progress: 13/100 (13.00%), 	Loss: 0.764387 24
[Train]	 Progress: 14/100 (14.00%), 	Loss: 1.295549 33
[Train]	 Progress: 15/100 (15.00%), 	Loss: 0.832546 67
[Train]	 Progress: 16/100 (16.00%), 	Loss: 1.193624 46
[Train]	 Progress: 17/100 (17.00%), 	Los

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x11ff71160>
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1324, in __del__
    self._shutdown_workers()
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1297, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/python-dl/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/opt


<Stop Learning> 	Least loss: inf	Duration: 23.65s


KeyboardInterrupt: 

In [None]:
def get_next():
    try:
        iterator = iter(train_loaders[0])
        return next(iterator)
    except RuntimeError:  # 오류안뜰때까지 뽑아보자
        return get_next()

    
nxt = get_next()
print({k: v.shape for k, v in nxt.items()})

In [None]:
next(iter(train_loaders[0]))