## Step - 4 - Inference

<span style="color: #006bff; font-family: Segoe UI; font-size: 1.9em; font-weight: 300;">🏷️ Environment Setup</span>


In [None]:
# Pydicom used for dcm image resize for Inference
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
# !pip install -U fastai --upgrade
!pip install -q git+https://github.com/rwightman/pytorch-image-models.git
!pip install -q iterative-stratification
# !pip install -q wandb --upgrade
!pip install -q nbdev

!pip install timm

In [None]:
import os

device = 'CPU'
if 'TPU_NAME' in os.environ.keys():
    if os.environ['XRT_TPU_CONFIG'] is not None: device = 'TPU'
elif 'CUDA_VERSION' in os.environ.keys():
    if os.environ['CUDA_VERSION'] is not None: device = 'GPU'

print(device)

In [None]:
import pandas as pd
import timm
from timm import *

from fastai.vision.all import *
from fastai.vision.learner import _update_first_layer
from fastai.callback.wandb import *
from nbdev.showdoc import show_doc

# Run Predictions on Test Image Subset

Outline of Model Inference:  
* Study Prediction - 4 Class (EffDet)  
* Study Prediction - 2 Class (EffDet)  
- not sure why doing the 2 class eval
* Image Object Detection - Yolo5

In [None]:
#Predictions on Test
# Inference adapted from: https://www.kaggle.com/bnapora/siim-cov19-efnb7-yolov5-infer

# import os

# from PIL import Image
# import pandas as pd
# from tqdm.auto import tqdm

# import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

### Prepare Test Images

In [None]:
df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
if df.shape[0] == 2477:
    fast_sub = True
    fast_df = pd.DataFrame(([['00086460a852_study', 'negative 1 0 0 1 1'], 
                         ['000c9c05fd14_study', 'negative 1 0 0 1 1'], 
                         ['65761e66de9f_image', 'none 1 0 0 1 1'], 
                         ['51759b5579bc_image', 'none 1 0 0 1 1']]), 
                       columns=['id', 'PredictionString'])
    print(fast_df.head())
else:
    fast_sub = False


In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
image_resize = 512
split = 'test'
save_dir = f'/kaggle/working/test-images-{image_resize}/{split}/'
os.makedirs(save_dir, exist_ok=True)

In [None]:
save_dir_study = save_dir + 'study/'
os.makedirs(save_dir_study, exist_ok=True)

if fast_sub:
    xray = read_xray('/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm')
    im = resize(xray, size=image_resize)  
    study = '00086460a852' + '_study.png'
    im.save(os.path.join(save_dir_study, study))
    xray = read_xray('/kaggle/input/siim-covid19-detection/train/000c9c05fd14/e555410bd2cd/51759b5579bc.dcm')
    im = resize(xray, size=image_resize)  
    study = '000c9c05fd14' + '_study.png'
    im.save(os.path.join(save_dir_study, study))
else:   
    for dirname, _, filenames in tqdm(os.walk(f'/kaggle/input/siim-covid19-detection/{split}')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize(xray, size=image_resize)  
            study = dirname.split('/')[-2] + '_study.png'
            im.save(os.path.join(save_dir, study))


In [None]:
image_id = []
dim0 = []
dim1 = []
splits = []
save_dir_image = save_dir + 'image/'
os.makedirs(save_dir_image, exist_ok=True)

if fast_sub:
    xray = read_xray('/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm')
    im = resize(xray, size=512)  
    im.save(os.path.join(save_dir_image,'65761e66de9f_image.png'))
    image_id.append('65761e66de9f.dcm'.replace('.dcm', ''))
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)
    xray = read_xray('/kaggle/input/siim-covid19-detection/train/000c9c05fd14/e555410bd2cd/51759b5579bc.dcm')
    im = resize(xray, size=512)  
    im.save(os.path.join(save_dir_image, '51759b5579bc_image.png'))
    image_id.append('51759b5579bc.dcm'.replace('.dcm', ''))
    dim0.append(xray.shape[0])
    dim1.append(xray.shape[1])
    splits.append(split)
else:
    for dirname, _, filenames in tqdm(os.walk(f'/kaggle/input/siim-covid19-detection/{split}')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize(xray, size=512)  
            im.save(os.path.join(save_dir_image, file.replace('.dcm', '_image.png')))
            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])
            splits.append(split)
meta = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})

meta.head()

## Study - Predictions

In [None]:
import numpy as np 
import pandas as pd
if fast_sub:
    df = fast_df.copy()
else:
    df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
id_laststr_list  = []
for i in range(df.shape[0]):
    id_laststr_list.append(df.loc[i,'id'][-1])
df['id_last_str'] = id_laststr_list

study_len = df[df['id_last_str'] == 'y'].shape[0]
print('Count of Studies: ', study_len)

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
    
sub_df = sub_df[:study_len]
test_paths = save_dir_study + sub_df['id'] +'.png'

sub_df['negative'] = 0
sub_df['typical'] = 0
sub_df['indeterminate'] = 0
sub_df['atypical'] = 0
sub_df['image_path'] = test_paths

label_cols = sub_df.columns[2:]

sub_df.head()

In [None]:
class Config:
    seed_val = 111
#     seed_everything(seed_val)
    fold_num = 0
    job = 1
    num_classes = 4
    input_dims = 512
    model_arch = "efficientnetv2_rw_s"
    batch_size = 16
    num_workers = 0
    kfold = 5
    loss_func = CrossEntropyLossFlat() # or LabelSmoothingCrossEntropyFlat()
    metrics = [error_rate, accuracy, RocAuc(average='macro'), F1Score(average='macro')]
    job_name = f'{model_arch}_fold{fold_num}_job{job}'
    print("Job Name:", job_name)

    wandb_project = 'SIIM_classifier_public'
    wandb_run_name = job_name
    
    if device=='GPU':
        fp16 = True
    else:
        fp16 = False
    
cfg = Config()

In [None]:
# Adapted from https://walkwithfastai.com/vision.external.timm

from fastai.vision.learner import _add_norm

def create_timm_body(arch:str, pretrained=True, cut=None, n_in=3):
    "Creates a body from any model in the `timm` library."
    model = create_model(arch, pretrained=pretrained, num_classes=0, global_pool='')
    _update_first_layer(model, n_in, pretrained)
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int): return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or function")

def create_timm_model(arch:str, n_out, cut=None, pretrained=True, n_in=3,
                      init=nn.init.kaiming_normal_, custom_head=None,
                      concat_pool=True, **kwargs):
    "Create custom architecture using `arch`, `n_in` and `n_out` from the `timm` library"
    body = create_timm_body(arch, pretrained, None, n_in)
    if custom_head is None:
        nf = num_features_model(nn.Sequential(*body.children()))
        head = create_head(nf, n_out, concat_pool=concat_pool, **kwargs)
    else: head = custom_head
    model = nn.Sequential(body, head)
    if init is not None: apply_init(model[1], init)
    return model

def timm_learner(dls, arch:str, loss_func=None, pretrained=True, cut=None, splitter=None,
                y_range=None, config=None, n_out=None, normalize=True, fp16=False, **kwargs):
    "Build a convnet style learner from `dls` and `arch` using the `timm` library"
    if config is None: config = {}
    if n_out is None: n_out = get_c(dls)
    assert n_out, "`n_out` is not defined, and could not be inferred from data, set `dls.c` or pass `n_out`"
    if y_range is None and 'y_range' in config: y_range = config.pop('y_range')
    model = create_timm_model(arch, n_out, default_split, pretrained, y_range=y_range, **config)
    learn = Learner(dls, model, loss_func=loss_func, splitter=default_split, **kwargs)
    if pretrained: learn.freeze()
    
    # Enable Mixed Precision Training
    if fp16: learn.to_non_native_fp16()
#     if fp16: learn.to_fp16(growth_factor=1.0)
    return learn

In [None]:
batch_tfms = [Normalize.from_stats(*imagenet_stats)]

In [None]:
# val_indices = list(df_annotations[df_annotations['fold'] == cfg.fold_num].image_path.unique())

data_block = DataBlock(blocks=(ImageBlock, CategoryBlock),
#                    splitter=MaskSplitter(list(df_annotations['fold'] == fold)),
                   get_x=ColReader('image_path'),
#                    get_y=ColReader('labels'),
#                    item_tfms=item_tfms,
                   batch_tfms=batch_tfms)



In [None]:
dls = data_block.dataloaders(sub_df,
                            bs=cfg.batch_size,
                            num_workers=cfg.num_workers)

dls.show_batch(figsize=(18,15), max_n=8, nrows=2)

In [None]:
# Prepare new learner object
learn_study = timm_learner(dls,
                     cfg.model_arch,
                     loss_func=cfg.loss_func,
                     pretrained=True,
                     opt_func=ranger,
#                      splitter=default_split,
                     fp16=cfg.fp16,
                     metrics=cfg.metrics,
                     cbs=cbs)
# learn_study.load('/kaggle/working/models/efficientnetv2_rw_s_fold0_job1')
learn_study.load('/kaggle/input/step1siimeffnetstudyclass4cl/models/step1-StudyClass-4Cl.pth')

In [None]:
# Predict - Study Label
test_dl = learn_study.dls.test_dl(test_paths)
preds = learn_study.get_preds(dl=test_dl)

# #Add labels
# class_idxs = preds[0].argmax(dim=1)
# res = [dls.vocab[c] for c in class_idxs]
# print("Actual Labels:", res)

#Update submission df
sub_df[label_cols] = preds[0]
print(sub_df.head())


In [None]:
sub_df.columns = ['id', 'PredictionString1', 'negative', 'typical', 'indeterminate', 'atypical']
df = pd.merge(df, sub_df, on = 'id', how = 'left')
df.head()

## Detection 
### study string

In [None]:
for i in range(study_len):
    negative = df.loc[i,'negative']
    typical = df.loc[i,'typical']
    indeterminate = df.loc[i,'indeterminate']
    atypical = df.loc[i,'atypical']
    df.loc[i, 'PredictionString'] = f'negative {negative} 0 0 1 1 typical {typical} 0 0 1 1 indeterminate {indeterminate} 0 0 1 1 atypical {atypical} 0 0 1 1'

df_study = df[['id', 'PredictionString']]
df_study.head()

### 2 class  
-trained model with only 2 classes

In [None]:
if fast_sub:
    sub_df = fast_df.copy()
else:
    sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')

sub_df = sub_df[study_len:]
test_paths = save_dir_image + sub_df['id'] +'.png'

sub_df['none'] = 0
print(sub_df.head())

label_cols = sub_df.columns[2]

In [None]:
# Predict - Study Label
test_dl = learn_study.dls.test_dl(test_paths)
preds = learn_study.get_preds(dl=test_dl)

#Update submission df
sub_df[label_cols] = preds[0]
# print(sub_df.head())

df_2class = sub_df.reset_index(drop=True)
print(df_2class)