# SIIM-FISABIO-RSNA COVID-19 Detection

> **Identify and localize COVID-19 abnormalities on chest radiographs**

In this competition, you’ll identify and localize COVID-19 abnormalities on chest radiographs. In particular, you'll categorize the radiographs as negative for pneumonia or typical, indeterminate, or atypical for COVID-19. You and your model will work with imaging data and annotations from a group of radiologists.


**If you liked this notebook, please feel free to upvote. It is too much appreciated.**

In [None]:
!pip -q install gdcm

# Imports

In [None]:
import os
import random
import pydicom
from pathlib import Path
from glob import glob
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
from skimage import exposure
from pydicom.pixel_data_handlers.util import apply_voi_lut

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Some color settings for the output plots.
LABEL2COLOR = [(249, 192, 12), (0, 185, 241), (114, 0, 218), (249,50,12)]
COLOR_PALETTE = ["#F9C00C", "#00B9F1", "#7200DA", "#F9320C"]

In [None]:
# Reading the input data
BASE_DATA_PATH = Path("../input/siim-covid19-detection/")
!ls {BASE_DATA_PATH}

df_train_img = pd.read_csv(BASE_DATA_PATH / "train_image_level.csv")
df_train_study = pd.read_csv(BASE_DATA_PATH / "train_study_level.csv")
df_sub = pd.read_csv(BASE_DATA_PATH / "sample_submission.csv")

In [None]:
# Bbox labels per image.
df_train_img.head()

In [None]:
# Labels of the studies
df_train_study.head()

# Merging the Image and Study CSVs

In [None]:
CLASS_MAP = {
    0: "Negative for Pneumonia",
    1: "Typical Appearance",
    2: "Indeterminate Appearance",
    3: "Atypical Appearance"
}

# Create a label column.
df_train_study["class_id"] = df_train_study.iloc[:, 1:].values.argmax(1)

# Remove the study part from the ids.
df_train_study["StudyInstanceUID"] = df_train_study["id"].apply(lambda x: x[:-6])
df_train_study = df_train_study[["StudyInstanceUID", "class_id"]]

# Merge the two train csvs together.
df_train = pd.merge(df_train_img, df_train_study, on="StudyInstanceUID")

# Map the class ids to original names for plotting.
df_train["class_label"] = df_train["class_id"].map(CLASS_MAP)

# Generating the image paths from given StudyInstanceUID.
train_dir = BASE_DATA_PATH / "train"
df_train["path"] = df_train["StudyInstanceUID"].apply(lambda s_id: glob(os.path.join(train_dir, s_id + "/*/*"))[0])

In [None]:
df_train.head()

In [None]:
df_train["class_id"].value_counts()

# Class Distributions

In [None]:
plt.figure(figsize=(21, 10))
df_train["label"]
ax = sns.countplot(x="class_label", data=df_train, palette=COLOR_PALETTE)
plt.title('Percentage of the Classes', fontsize=20)

total = len(df_train)

for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_height()/total)
    x = p.get_x() + p.get_width() / 3
    y = p.get_height() + 10
    ax.annotate(percentage, (x, y), weight="bold", fontsize=20)

plt.show()

# Bbox Distributions

In [None]:
bbox_counts = df_train.label.str.count("opacity")
opacity_count = (bbox_counts > 0).sum()
none_count = len(df_train) - opacity_count

df_bbox_counts = pd.DataFrame({"label": ["opacity", "none"], "count": [opacity_count, none_count]})
df_bbox_counts

In [None]:
plt.figure(figsize=(14, 8))
sns.barplot(data=df_bbox_counts, x="count", y="label");

# BBox Distribution per Image

In [None]:
plt.figure(figsize=(14, 8))
sns.histplot(bbox_counts);

In [None]:
print(f"Minimum number of bboxes per image: {min(bbox_counts)}")
print(f"Maximum number of bboxes per image: {max(bbox_counts)}")

# Helper Functions

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, labels=None, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    if labels is None: labels = [None] * len(imgs)
        
    for i, (img, label) in enumerate(zip(imgs, labels)):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
        if label is not None:
            plt.title(label)
    plt.suptitle(title)
    plt.show()

# Plotting X-ray Images

In [None]:
imgs = [dicom2array(path) for path in df_train["path"][:4]]
plot_imgs(imgs)

# Plotting the Bounding Boxes

In [None]:
img_ids = df_train['id'].values
class_ids = df_train['class_id'].unique()

scale = 5
thickness = 7

paths = df_train["path"]
all_boxes = df_train.label.apply(lambda x: [x.split()[idx:idx+6] for idx in range(0, len(x.split()), 6)])

imgs, labels = [], []

for i in np.random.choice(range(len(df_train)), 8):
    img = dicom2array(path=paths[i])
    img = cv2.resize(img, None, fx=1/scale, fy=1/scale)
    img = np.stack([img, img, img], axis=-1)
    
    boxes = all_boxes[i]
    img_labels = [df_train.class_id[i]] * len(boxes)
    for label_id, box in zip(img_labels, boxes):
        color = LABEL2COLOR[label_id]
        img = cv2.rectangle(
            img,
            (int(float(box[2]) / scale), int(float(box[3]) / scale)),
            (int(float(box[4]) / scale), int(float(box[5]) / scale)),
            color, thickness
    )
    img = cv2.resize(img, (500, 500))
    imgs.append(img)
    labels.append(CLASS_MAP[label_id])
    
plot_imgs(imgs, labels, cmap=None)

### To be continued...