In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [None]:
!pip uninstall torch --yes

In [None]:
# install dependencies: (use cu113 because has problems with cu12)
!pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
# install mmcv-full thus we could use CUDA operators
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.12.0/index.html

# Install mmdetection
!rm -rf mmdetection
!git clone https://github.com/open-mmlab/mmdetection --branch 2.x
%cd mmdetection

!pip install -e .



In [None]:
# Check Pytorch installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

# Check MMDetection installation
import mmdet
print(mmdet.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(get_compiling_cuda_version())
print(get_compiler_version())

In [None]:
try:
    shutil.rmtree('/kaggle/working/')
except:
    pass

In [None]:
# We download the pre-trained checkpoints for inference and finetuning.
!mkdir checkpoints
!wget -c https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco-70e1a046.pth \
    -O checkpoints/detr_r50_coco.pth


In [None]:
import mmcv
from mmcv.runner import load_checkpoint

from mmdet.apis import inference_detector, show_result_pyplot
from mmdet.models import build_detector

# Choose to use a config and initialize the detector
config = '/kaggle/working/mmdetection/configs/detr/detr_r50_8x2_150e_coco.py'

# Setup a checkpoint file to load
checkpoint = 'checkpoints/detr_r50_coco.pth'

# Set the device to be used for evaluation
device='cuda:0'

# Load the config
config = mmcv.Config.fromfile(config)
# Set pretrained to be None since we do not need pretrained model here
config.model.pretrained = None

# Initialize the detector
model = build_detector(config.model)

# Load checkpoint
checkpoint = load_checkpoint(model, checkpoint, map_location=device)

# Set the classes of models for inference
model.CLASSES = checkpoint['meta']['CLASSES']

# We need to set the model's cfg for inference
model.cfg = config

# Convert the model to GPU
model.to(device)
# Convert the model into evaluation mode
model.eval()

In [None]:
# Use the detector to do inference
img = 'demo/demo.jpg'
result = inference_detector(model, img)


In [None]:
# Let's plot the result
show_result_pyplot(model, img, result, score_thr=0.3)

In [None]:
%cp -r /kaggle/input/rsna-pneumonia-detection-challenge/stage_2_test_images /kaggle/working/

In [None]:
%cp -r /kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_images /kaggle/working/

In [None]:
import pydicom as dicom # read dcm images from dataset
import cv2 # convert images
from tqdm import tqdm # progress bar

In [None]:
%cd /kaggle/working/stage_2_train_images

In [None]:
for filename in tqdm(os.listdir('.')):  # train images
    ds = dicom.dcmread(filename)
    img = np.asarray(ds.pixel_array) # shape = (height, width)
    img = np.expand_dims(img, axis=0) #shape = (1,height, width)
    img = np.moveaxis(img, -1, 0) # shape = (height, 1, width)
    img = np.moveaxis(img, -1, 0) # shape = (height, width, 1) -> gray scale image array (1 channel)
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) # shape = (height, width, 3) -> color image array(3 channel)
    cv2.imwrite(filename[:-4]+ '.jpg', img) # save image in jpg format 

In [None]:
%rm *.dcm # removing dcm files

In [None]:
%cd /kaggle/working/stage_2_test_images

In [None]:
for filename in tqdm(os.listdir('.')): # test images
    ds = dicom.dcmread(filename)
    img = np.asarray(ds.pixel_array) # shape = (height, width)
    img = np.expand_dims(img, axis=0) #shape = (1,height, width)
    img = np.moveaxis(img, -1, 0) # shape = (height, 1, width)
    img = np.moveaxis(img, -1, 0) # shape = (height, width, 1) -> gray scale image array (1 channel)
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) # shape = (height, width, 3) -> color image array(3 channel)
    cv2.imwrite(filename[:-4] + '.jpg', img) # save image in jpg format 

In [None]:
%rm *.dcm

In [None]:
import mmcv
import matplotlib.pyplot as plt

img = mmcv.imread('/kaggle/working/stage_2_train_images/e65d66fe-2835-4e27-859b-a65065758cab.jpg')
plt.imshow(img)
plt.show()


In [None]:
img.shape # image shape

In [None]:
!cat /kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv # if you want to see labels

In [None]:
len(os.listdir('/kaggle/working/stage_2_train_images'))

In [None]:
%cd /kaggle/working

In [None]:
import numpy as np
import json
import pandas as pd

path = '/kaggle/input/rsna-pneumonia-detection-challenge/stage_2_train_labels.csv' # the path to the CSV file
save_json_path = 'labels.json'


data = pd.read_csv(path)

images = []
categories = []
annotations = []

category = {}
category["supercategory"] = 'none'
category["id"] = 0
category["name"] = 'None'
categories.append(category)

data['fileid'] = data['patientId'].astype('category').cat.codes
data['categoryid']= pd.Categorical(data['Target'],ordered= True).codes
data['categoryid'] = data['categoryid']+1
data['annid'] = data.index

def image(row):
    image = {}
    image["height"] = 1024
    image["width"] = 1024
    image["id"] = row.fileid
    image["file_name"] = row.patientId + '.jpg'
    return image

def category(row):
    category = {}
    category["supercategory"] = 'None'
    category["id"] = row.categoryid
    category["name"] = row[6] # 6 column is the Target
    return category

def annotation(row):
    annotation = {}
    area = (row.width)*(row.height)
    annotation["segmentation"] = []
    annotation["iscrowd"] = 0
    annotation["area"] = area
    annotation["image_id"] = row.fileid

    annotation["bbox"] = [row.x, row.y, row.width,row.height]

    annotation["category_id"] = row.categoryid
    annotation["id"] = row.annid
    return annotation

for row in data.itertuples():
    annotations.append(annotation(row))

imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid')
for row in imagedf.itertuples():
    images.append(image(row))

catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid')
for row in catdf.itertuples():
    categories.append(category(row))

data_coco = {}
data_coco["images"] = images
data_coco["categories"] = categories
data_coco["annotations"] = annotations


json.dump(data_coco, open(save_json_path, "w"), indent=4)

In [None]:
!pip install echo1-coco-split

In [None]:
%cd /kaggle/working/

In [None]:
%rm train.json
%rm test.json
%rm valid.json

In [None]:
!coco-split \
    --has_annotations \
    --valid_ratio .2 \
    --test_ratio .1 \
    --annotations_file /kaggle/working/labels.json

In [None]:
from mmcv import Config
cfg = Config.fromfile('/kaggle/working/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py')

In [None]:
cfg.keys()

In [None]:
from mmdet.apis import set_random_seed
# Modify dataset type and path
cfg.dataset_type = 'CocoDataset'
cfg.data_root = '/kaggle/working/'
cfg.device = 'cuda'
classes = (0,1,) # 

cfg.data.test.type = 'CocoDataset'
cfg.data.test.classes = classes
cfg.data.test.data_root = '/kaggle/working/stage_2_train_images'
cfg.data.test.ann_file = '/kaggle/working/test.json'
cfg.data.test.img_prefix = ''

cfg.data.train.type = 'CocoDataset'
cfg.data.train.classes = classes
cfg.data.train.data_root = '/kaggle/working/stage_2_train_images'
cfg.data.train.ann_file = '/kaggle/working/train.json'
cfg.data.train.img_prefix = ''


cfg.data.val.type = 'CocoDataset'
cfg.data.val.classes = classes
cfg.data.val.data_root = '/kaggle/working/stage_2_train_images'
cfg.data.val.ann_file = '/kaggle/working/valid.json'
cfg.data.val.img_prefix = ''

# modify num classes of the model in box head
cfg.model.roi_head.bbox_head.num_classes = 2
# If we need to finetune a model based on a pre-trained detector, we need to
# use load_from to set the path of checkpoints.
cfg.load_from = '/kaggle/working/mmdetection/checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth'

# Set up working dir to save files and logs.
cfg.work_dir = '/kaggle/working/logs_exps'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'bbox'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 1
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 1


# I will train for 2 epochs to get fast result
cfg.runner.max_epochs = 2

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# We can also use tensorboard to log the training process
cfg.log_config.hooks = [
    dict(type='TextLoggerHook'),
    dict(type='TensorboardLoggerHook')]


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
import os.path as osp



# Build dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector
model = build_detector(cfg.model)
# Add an attribute for visualization convenience
classes = ('nothing', 'pneumonia')
model.CLASSES = classes





In [None]:
print(datasets)

In [None]:
# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# Train
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
img = mmcv.imread('/kaggle/working/stage_2_train_images/00436515-870c-4b36-a041-de91049b9ab4.jpg')
model.cfg = cfg
result = inference_detector(model, img)
show_result_pyplot(model, img, result)

In [None]:
img3 = mmcv.imread('/kaggle/working/stage_2_train_images/00569f44-917d-4c86-a842-81832af98c30.jpg')
result = inference_detector(model, img3)
show_result_pyplot(model, img3, result)

In [None]:
img4 = mmcv.imread('/kaggle/working/stage_2_train_images/0572881e-d1dd-4757-a54e-b240b30da946.jpg') # from test.json
result = inference_detector(model, img4)
show_result_pyplot(model, img4, result)

In [None]:
img5 = mmcv.imread('/kaggle/working/stage_2_train_images/0d121525-812e-4ab2-b29f-ff04b3d97ffa.jpg') # from test.json
result = inference_detector(model, img5)
show_result_pyplot(model, img5, result)
