# Helpers

In [None]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import sys


import numpy as np
import matplotlib.pyplot as plt
import scipy
import SimpleITK as sitk
from tqdm import tqdm

module_path = os.path.abspath(os.path.join('../..'))

if module_path not in sys.path:
    sys.path.append(module_path)

    
from multitask_method.plotting_utils import display_cross_section, display_normalised_cross_section

# VinDr-CXR

## Raw

In [None]:
from multitask_method.preprocessing.vindr_cxr_preproc import raw_root, gen_vindr_structure, TRAIN, TEST

raw_annotations_dict, raw_image_labels, raw_test_dir, raw_train_dir =  gen_vindr_structure(raw_root)

In [None]:
import pandas as pd
import pydicom

from multitask_method.preprocessing.vindr_cxr_preproc import vindr_preproc_func, generate_vindr_mask

test_anno_df = pd.read_csv(raw_annotations_dict[TEST])
test_label_df = pd.read_csv(raw_image_labels[1], index_col='image_id')

raw_test_samples = sorted(list(raw_test_dir.iterdir()))

def plot_raw_cxr(curr_img, curr_ax):
    curr_ax_im = curr_ax.imshow(curr_img, vmin=curr_img.min(), vmax=curr_img.max(), cmap='gray')
    plt.colorbar(curr_ax_im)

for i, dicom_path in enumerate(raw_test_samples):
    
    sample_id = dicom_path.stem
    raw_dicom = pydicom.dcmread(dicom_path)
    
    raw_arr = raw_dicom.pixel_array.astype(float)
    preproc_arr = vindr_preproc_func(raw_arr, raw_dicom)
    
    raw_annotation = generate_vindr_mask(test_anno_df[test_anno_df['image_id'] == sample_id], raw_arr)
    sample_class_row = test_label_df.loc[sample_id]
    sample_class = ', '.join(sample_class_row[sample_class_row == 1].index.tolist())
    
    fig, ax = plt.subplots(ncols=3, figsize=(20, 6))
    plot_raw_cxr(raw_arr, ax[0])
    plot_raw_cxr(preproc_arr, ax[1])
    ax[2].imshow(raw_annotation)
    fig.suptitle(sample_class)
        
    if i == 5:
        break

In [None]:
import pandas as pd
import pydicom

from multitask_method.preprocessing.vindr_cxr_preproc import vindr_preproc_func, generate_vindr_mask

train_anno_df = pd.read_csv(raw_annotations_dict[TRAIN])
train_label_df = pd.read_csv(raw_image_labels[0])

train_labels_sum = train_label_df.groupby('image_id')['No finding'].sum()
train_sample_ids = sorted(train_labels_sum[train_labels_sum == 3].index.tolist())
raw_train_samples = [raw_train_dir / f'{f}.dicom' for f in train_sample_ids]

def plot_raw_cxr(curr_img, curr_ax):
    curr_ax_im = curr_ax.imshow(curr_img, vmin=curr_img.min(), vmax=curr_img.max(), cmap='gray')
    plt.colorbar(curr_ax_im)

for i, dicom_path in enumerate(raw_train_samples):
    
    sample_id = dicom_path.stem
    raw_dicom = pydicom.dcmread(dicom_path)
    
    raw_arr = raw_dicom.pixel_array.astype(float)
    preproc_arr = vindr_preproc_func(raw_arr, raw_dicom)
    
    raw_annotation = generate_vindr_mask(train_anno_df[train_anno_df['image_id'] == sample_id], raw_arr)
    
    fig, ax = plt.subplots(ncols=3, figsize=(20, 6))
    plot_raw_cxr(raw_arr, ax[0])
    plot_raw_cxr(preproc_arr, ax[1])
    ax[2].imshow(raw_annotation)
    fig.suptitle(sample_id)
        
    if i == 5:
        break

## Preprocessed

In [None]:
from multitask_method.data.vindr_cxr import VinDrCXRDatasetCoordinator
from multitask_method.preprocessing.vindr_cxr_preproc import base_output_dir

test_samples = [5]

full_res_test_container = VinDrCXRDatasetCoordinator(base_output_dir, True, False, False).make_container([5])
low_res_test_container = VinDrCXRDatasetCoordinator(base_output_dir, False, False, False).make_container([5])


full_res_train_container = VinDrCXRDatasetCoordinator(base_output_dir, True, True, True).make_container([5])
low_res_train_container = VinDrCXRDatasetCoordinator(base_output_dir, False, True, True).make_container([5])

In [None]:
for i in range(len(full_res_test_container)):
    full_res_test_pp_img, full_res_test_pp_m, sample_id = full_res_test_container[i]
    
    fig, ax = plt.subplots(ncols=2, figsize=(12, 6))
    ax_im = ax[0].imshow(full_res_test_pp_img[0], vmin=full_res_test_pp_img.min(), vmax=full_res_test_pp_img.max(), cmap='gray')
    plt.colorbar(ax_im)
    ax[1].imshow(full_res_test_pp_m)
    fig.suptitle(sample_id)

In [None]:
for i in range(len(low_res_test_container)):
    low_res_test_pp_img, low_res_test_pp_m, sample_id = low_res_test_container[i]
    
    fig, ax = plt.subplots(ncols=2, figsize=(12, 6))
    ax_im = ax[0].imshow(low_res_test_pp_img[0], vmin=low_res_test_pp_img.min(), vmax=low_res_test_pp_img.max(), cmap='gray')
    plt.colorbar(ax_im)
    ax[1].imshow(low_res_test_pp_m)
    fig.suptitle(sample_id)

In [None]:
for i in range(len(low_res_train_container)):
    low_res_train_pp_img, _, sample_id = low_res_train_container[i]
    
    ax_im = plt.imshow(low_res_train_pp_img[0], vmin=low_res_train_pp_img.min(), vmax=low_res_train_pp_img.max(), cmap='gray')
    plt.colorbar(ax_im)
    plt.title(sample_id)
    plt.show()

In [None]:
for i in range(len(full_res_train_container)):
    full_res_train_pp_img, _, sample_id = full_res_train_container[i]
    
    ax_im = plt.imshow(full_res_train_pp_img[0], vmin=full_res_train_pp_img.min(), vmax=full_res_train_pp_img.max(), cmap='gray')
    plt.colorbar(ax_im)
    plt.title(sample_id)
    plt.show()