In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import HTML
import glob
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import math
import ast
import cv2

def css_styling():
    styles = open("../input/titlestyle/style2.css", "r").read()
    return HTML(styles)
css_styling()

<div class="heading">
   <h1><span style="color: white">Intro</span></h1>
</div>
<div class="content">

<u>📔 Public notebooks - 546+</u><br>

<u>🥇 Gold medals - 26+</u><br>

<u>🥈 Silver medals - 40+</u><br>

<u>🥉 Bronze medals - 94+</u><br>
    
1️ place solution - <a href="https://www.kaggle.com/c/siim-covid19-detection/discussion/263658">https://www.kaggle.com/c/siim-covid19-detection/discussion/263658</a><br>
    
2️ place solution - <a href="https://www.kaggle.com/c/siim-covid19-detection/discussion/263674">https://www.kaggle.com/c/siim-covid19-detection/discussion/263674</a><br>
    
3️ place solution - <a href="https://www.kaggle.com/c/siim-covid19-detection/discussion/263654">https://www.kaggle.com/c/siim-covid19-detection/discussion/263654</a><br>


Wordcloud made of notebook titles:
<img src="https://i.imgur.com/S3I8ap4.png" alt="img1"/>
</div>

<div style = "font-family: Arial;font-size:1.6em;color: #0a6121;background: #ace6bc;padding:5px;border-style: solid;border-color:#0a6121;">
<b>Summa summarum: EfficientNet, YOLOv5 and dicom files</b> 
</div>

<div class="heading">
   <h1><span style="color: white">Goal</span></h1>
</div>
<div class='content'>
    <h3><b>😷 Categorize chest radiographs as negative for pneumonia, typical, indeterminate, or atypical for COVID-19. If some abnormalities are found, provide the bounding boxes.</b> </h3>
    <h3><b>😷 Image can have positive value for only one label but multiple bounding boxes. This competition is both object detection and classification</b></h3>
</div>

In [None]:
train_study = pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")
train_image = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv")
train_study.head()

In [None]:
train_image.head()

In [None]:
def fix_inverted_radiograms(data, img):
    '''Fixes inverted radiograms - with PhotometricInterpretation == "MONOCHROME1"
    data: the .dcm dataset
    img: the .dcm pixel_array'''
    
    if data.PhotometricInterpretation == "MONOCHROME1":
        img = np.amax(img) - img
    
    img = img - np.min(img)
    img = img / np.max(img)
    img = (img * 255).astype(np.uint8)
    
    return img

def get_image_metadata(study_id, df):
    '''Returns the label and bounding boxes (if any)
    for a speciffic study id.'''
    
    data = df[df["study_id"] == study_id]
    
    if data["Negative for Pneumonia"].values == 1:
        label = "negative_for_pneumonia"
    elif data["Typical Appearance"].values == 1:
        label = "typical"
    elif data["Indeterminate Appearance"].values == 1:
        label = "indeterminate"
    else:
        label = "atypical"
        
    bbox = list(data["boxes"].values)
    
    return label, bbox

def return_coords(box):
    '''Returns coordinates from a bbox'''
    # Get the list of dictionaries
    box = ast.literal_eval(box)[0]
    # Get the exact x and y coordinates
    x1, y1, x2, y2 = box["x"], box["y"], box["x"] + box["width"], box["y"] + box["height"]
    # Save coordinates
    return (int(x1), int(y1), int(x2), int(y2))

def show_dcm_info(study_ids, df):
    '''Show .dcm images along with description.'''
    
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(21,10))

    # Get .dcm paths
    dcm_paths = [glob.glob(f"../input/siim-covid19-detection/train/{study_id}/*/*")[0]
                 for study_id in study_ids]
    datasets = [pydicom.dcmread(path) for path in dcm_paths]
    images = [apply_voi_lut(dataset.pixel_array, dataset) for dataset in datasets]

    # Loop through the information
    for study_id, data, img, i in zip(study_ids, datasets, images, range(2*3)):
        # Fix inverted images
        img = fix_inverted_radiograms(data, img)

        # Below function available in functions section ;)
        label, bbox = get_image_metadata(study_id, df)
        
        # Check for bounding box and add if it's the case
        try: 
            # For no bbox, the list is [nan]
            no_box = math.isnan(bbox[0])
            pass
        except TypeError:
            # Retrieve the bounding box
            all_coords = []
            for box in bbox:
                all_coords.append(return_coords(box))

            for (x1, y1, x2, y2) in all_coords:
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 80, 255), 15)
                cv2.putText(img, label, (x1, y1-14), 
                            cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 0, 0), 4)
                
        # Plot the image
        x = i // 3
        y = i % 3
        
        axes[x, y].imshow(img, cmap="binary")
        axes[x, y].set_title(f"Label: {label} \n Sex: {data.PatientSex} | Body Part: {data.BodyPartExamined}", 
                  fontsize=14, weight='bold')
        axes[x, y].axis('off')
        
train_study["study_id"] = train_study["id"].apply(lambda x: x.split("_")[0])
train = pd.merge(train_image, train_study, 
                 left_on="StudyInstanceUID", right_on="study_id")

train.drop(["id_x", "StudyInstanceUID", "id_y"], axis=1, inplace=True)

show_dcm_info(study_ids=["72044bb44d41", "f807cd855d31", "b949689a9ef1",
                         "612ea5194007", "082cafb03942", "d3e83031ebea"], 
              df=train)

In [None]:
# dicom file example, it contains additional meta data
pydicom.dcmread('../input/siim-covid19-detection/train/72044bb44d41/c39be19d56ba/7452ddd5b44b.dcm')

<div class="heading">
   <h1><span style="color: white">Data preparation</span></h1>
</div>
<div class='content'>
    
<b>DICOM stands for Digital Imaging and Communications in Medicine</b>. It is a standard, internationally accepted format to view, store, retrieve and share medical images. This file consists of a header and image data sets packed into a single file. Notebook for preparing dicom files with extracting of meta data can be found here <a href="https://www.kaggle.com/andradaolteanu/siim-covid-19-box-detect-dcm-metadata">https://www.kaggle.com/andradaolteanu/siim-covid-19-box-detect-dcm-metadata</a><br><br>
    
<b>Data augmentation</b> is a strategy that enables practitioners to significantly increase the diversity of data available for training models, without actually collecting new data. Some methods such as random contrast, random crop, RGB shift and others are presented here <a href="https://www.kaggle.com/ruchi798/siim-covid-19-detection-eda-data-augmentation">https://www.kaggle.com/ruchi798/siim-covid-19-detection-eda-data-augmentation</a><br><br>
    
Image preprocessing with ploting boxes <a href="https://www.kaggle.com/yujiariyasu/catch-up-on-positive-samples-plot-submission-csv">https://www.kaggle.com/yujiariyasu/catch-up-on-positive-samples-plot-submission-csv</a><br><br>
    
    
Very informative notebook with several <b>image transformation techniques including histogram equalization, CLAHE, noise reduction</b> <a href="https://www.kaggle.com/yujiariyasu/catch-up-on-positive-samples-plot-submission-csv">https://www.kaggle.com/yujiariyasu/catch-up-on-positive-samples-plot-submission-csv</a><br><br>

Great notebook with lung <b>segmentation</b> and a list of other processing notebooks <a href="https://www.kaggle.com/davidbroberts/lung-segmentation-without-cnn">https://www.kaggle.com/davidbroberts/lung-segmentation-without-cnn</a><br><br>
    
<b>Data augumentation using albumentations package</b> <a href="https://www.kaggle.com/boltcoder/siim-covid19-simple-data-augmentation-techniques">https://www.kaggle.com/boltcoder/siim-covid19-simple-data-augmentation-techniques</a><br><br>
    

</div>

<div class="heading">
   <h1><span style="color: white">Models</span></h1>
</div>
<div class='content'>
    
<b>YOLO</b> an acronym for 'You only look once', is an object detection algorithm that divides images into a grid system. Each cell in the grid is responsible for detecting objects within itself. This model enables real-time object detection with convolutional neural networks. How to prepare data and run YOLOv5 is explained here <a href="https://www.kaggle.com/ayuraj/train-covid-19-detection-using-yolov5">https://www.kaggle.com/ayuraj/train-covid-19-detection-using-yolov5</a><br><br>
    
Convert dicom to png and use <b>Efficientnet with Keras</b> for classification and YOLOv5 for detection <a href="https://www.kaggle.com/h053473666/siim-cov19-efnb7-yolov5-infer">https://www.kaggle.com/h053473666/siim-cov19-efnb7-yolov5-infer</a><br><br>
    
<b>MMDetection</b> is an open-source toolbox based on PyTorch for Object Detection and Segmentation tasks. The toolbox supports over 50+ baselines. One of the models, CascadeRCNN is presented here <a href="https://www.kaggle.com/sreevishnudamodaran/siim-mmdetection-cascadercnn-weight-bias">https://www.kaggle.com/sreevishnudamodaran/siim-mmdetection-cascadercnn-weight-bias</a><br><br>
    
<b>FasterRCNN</b> using PyTorch <a href="https://www.kaggle.com/piantic/train-siim-covid-19-detection-fasterrcnn">https://www.kaggle.com/piantic/train-siim-covid-19-detection-fasterrcnn</a><br><br>
    
Training several <b>EfficientNet models using TensorFlow</b> <a href="https://www.kaggle.com/sreevishnudamodaran/siim-effnetv2-keras-study-train-tpu-cv0-805">https://www.kaggle.com/sreevishnudamodaran/siim-effnetv2-keras-study-train-tpu-cv0-805</a><br><br>
    
<b>EfficientNet using PyTorch and timm</b> <a href="https://www.kaggle.com/heyytanay/siim-pytorch-classification-only-training-effnets">https://www.kaggle.com/heyytanay/siim-pytorch-classification-only-training-effnets</a><br><br>
    
<b>YOLOv5</b> object detection training with easy to follow explanation <a href="https://www.kaggle.com/ammarnassanalhajali/covid-19-detection-yolov5-3classes-training">https://www.kaggle.com/ammarnassanalhajali/covid-19-detection-yolov5-3classes-training</a><br><br>
    
    
<b>Vision Transformer with Grad-CAM algorithm</b> used to interpret prediction of the model <a href="https://www.kaggle.com/basu369victor/covid19-detection-with-vit-and-heatmap">https://www.kaggle.com/basu369victor/covid19-detection-with-vit-and-heatmap</a> and one more notebook with Grad-CAM <a href="https://www.kaggle.com/sinamhd9/where-s-your-model-looking-at-grad-cam">https://www.kaggle.com/sinamhd9/where-s-your-model-looking-at-grad-cam</a>
</div>

<!-- <div class="heading">
   <h1><span style="color: white">Transformers</span></h1>
</div>
<div class='content'>
</div> -->

<div class="heading">
   <h1><span style="color: white">Other useful notebooks</span></h1>
</div>
<div class='content'>
Load 3D CT scans using dicom files and work with them <a href="https://www.kaggle.com/allunia/pulmonary-dicom-preprocessing">https://www.kaggle.com/allunia/pulmonary-dicom-preprocessing</a>
    
Convert dicom to jpg <a href="https://www.kaggle.com/xhlulu/siim-covid-19-convert-to-jpg-256px">https://www.kaggle.com/xhlulu/siim-covid-19-convert-to-jpg-256px</a>
</div>