In [1]:
!pip install pydicom
!pip install pylibjpeg
!pip install pylibjpeg-libjpeg
!pip install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg

[0mCollecting pylibjpeg
  Downloading pylibjpeg-1.4.0-py3-none-any.whl (28 kB)
Installing collected packages: pylibjpeg
Successfully installed pylibjpeg-1.4.0
[0mCollecting pylibjpeg-libjpeg
  Downloading pylibjpeg_libjpeg-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: pylibjpeg-libjpeg
Successfully installed pylibjpeg-libjpeg-1.3.4
Collecting pylibjpeg-openjpeg
  Downloading pylibjpeg_openjpeg-1.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: pylibjpeg-openjpeg
Successfully installed pylibjpeg-openjpeg-1.3.2
[0m

In [2]:
import os
import sys
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image
import cv2
import re
import gc
from tqdm import tqdm
import math

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

import skimage.transform as skTrans
from skimage import exposure

import albumentations as alb
from albumentations.pytorch import ToTensorV2

import pydicom as dicom

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import OneCycleLR

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold

import warnings
warnings.filterwarnings("ignore")

## Configs

In [3]:
SEED = 1927550
IMG_SIZE = 512
BATCH = 5
EPOCH = 7
CLASS = 13 # from 0 to 12
folds = 5
hidden1 = 128
hidden2 = 64
tot_slice = 29
mid_slice = 13
best_acc = 0
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_path = '/kaggle/input'

trainlosslog = []
trainacclog = []
validlosslog = []
validacclog = []

In [4]:
vert_class = {
    '0': 0,
    '1': 1,
    '2': 2,
    '3': 3,
    '4': 4,
    '5': 5,
    '6': 6,
    '7': 7,
    '8': 8,
    '9': 9,
    '10': 10,
    '11': 11,
    '12': 12
}

## load csv

In [5]:
vert_df = pd.read_csv(f'{base_path}/sagittal-preprocess/vert_list.csv')
vert_df

Unnamed: 0,id,vertebrae
0,1.2.826.0.1.3680043.1363_0,10.0
1,1.2.826.0.1.3680043.1363_1,10.0
2,1.2.826.0.1.3680043.1363_2,10.0
3,1.2.826.0.1.3680043.1363_3,10.0
4,1.2.826.0.1.3680043.1363_4,10.0
...,...,...
29827,1.2.826.0.1.3680043.26990_237,0.0
29828,1.2.826.0.1.3680043.26990_238,0.0
29829,1.2.826.0.1.3680043.26990_239,0.0
29830,1.2.826.0.1.3680043.26990_240,0.0


In [6]:
vert_df['StudyInstanceUID'] = 0

for idx in range(len(vert_df)):
    vert_id = vert_df.loc[idx]['id']
    studyuid = vert_id.split('_')[0]
    vert_df['StudyInstanceUID'][idx] = studyuid

In [7]:
vert_df

Unnamed: 0,id,vertebrae,StudyInstanceUID
0,1.2.826.0.1.3680043.1363_0,10.0,1.2.826.0.1.3680043.1363
1,1.2.826.0.1.3680043.1363_1,10.0,1.2.826.0.1.3680043.1363
2,1.2.826.0.1.3680043.1363_2,10.0,1.2.826.0.1.3680043.1363
3,1.2.826.0.1.3680043.1363_3,10.0,1.2.826.0.1.3680043.1363
4,1.2.826.0.1.3680043.1363_4,10.0,1.2.826.0.1.3680043.1363
...,...,...,...
29827,1.2.826.0.1.3680043.26990_237,0.0,1.2.826.0.1.3680043.26990
29828,1.2.826.0.1.3680043.26990_238,0.0,1.2.826.0.1.3680043.26990
29829,1.2.826.0.1.3680043.26990_239,0.0,1.2.826.0.1.3680043.26990
29830,1.2.826.0.1.3680043.26990_240,0.0,1.2.826.0.1.3680043.26990


In [8]:
# cropping voxel can only be done when the slice of image index is known.
# which means, with my stage 1 prediction, it is unable to get cropped voxel preprocessed images
# thus, to solve this,
# no cropping voxel, but predicting on the labels is done

train_df = pd.read_csv(f'{base_path}/rsna-2022-cervical-spine-fracture-detection/train.csv')
df = vert_df.merge(train_df, on='StudyInstanceUID', how='left')

In [19]:
df['vertebrae'] = (pd.to_numeric(df['vertebrae'], downcast='integer')).astype(str)
df

Unnamed: 0,id,vertebrae,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7
0,1.2.826.0.1.3680043.1363_0,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
1,1.2.826.0.1.3680043.1363_1,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
2,1.2.826.0.1.3680043.1363_2,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
3,1.2.826.0.1.3680043.1363_3,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
4,1.2.826.0.1.3680043.1363_4,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
29827,1.2.826.0.1.3680043.26990_237,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29828,1.2.826.0.1.3680043.26990_238,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29829,1.2.826.0.1.3680043.26990_239,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29830,1.2.826.0.1.3680043.26990_240,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1


In [20]:
np.unique(df['vertebrae'])

array(['0', '1', '10', '11', '12', '2', '3', '4', '5', '6', '7', '8', '9'],
      dtype=object)

## Drop Bad Scans

In [21]:
bad_scans = ['1.2.826.0.1.3680043.20574','1.2.826.0.1.3680043.29952']

for uid in bad_scans:
    df.drop(df[df['StudyInstanceUID']==uid].index, axis=0, inplace=True)
df.reset_index(drop=True)

Unnamed: 0,id,vertebrae,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7
0,1.2.826.0.1.3680043.1363_0,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
1,1.2.826.0.1.3680043.1363_1,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
2,1.2.826.0.1.3680043.1363_2,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
3,1.2.826.0.1.3680043.1363_3,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
4,1.2.826.0.1.3680043.1363_4,10,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
29827,1.2.826.0.1.3680043.26990_237,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29828,1.2.826.0.1.3680043.26990_238,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29829,1.2.826.0.1.3680043.26990_239,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
29830,1.2.826.0.1.3680043.26990_240,0,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1


## Leave vertebraes only between 1 and 7

In [22]:
verts = np.unique(df['vertebrae'])
class_verts = [1, 2, 3, 4, 5, 6, 7]
remove_verts = []

for vert in verts:
    if vert_class[vert] not in class_verts:
        remove_verts.append(vert)
    
remove_verts = np.unique(remove_verts)
for vert in remove_verts:
    df.drop(df[df['vertebrae']==vert].index, axis=0, inplace=True)
df.reset_index(drop=True)

Unnamed: 0,id,vertebrae,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7
0,1.2.826.0.1.3680043.1363_44,7,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
1,1.2.826.0.1.3680043.1363_46,7,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
2,1.2.826.0.1.3680043.1363_47,7,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
3,1.2.826.0.1.3680043.1363_48,7,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
4,1.2.826.0.1.3680043.1363_49,7,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
17007,1.2.826.0.1.3680043.26990_199,2,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
17008,1.2.826.0.1.3680043.26990_200,2,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
17009,1.2.826.0.1.3680043.26990_201,1,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1
17010,1.2.826.0.1.3680043.26990_202,2,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1


## Save cropped voxel images

In [23]:
work_path = '/kaggle/input/rsna-2022-cervical-spine-fracture-detection'

In [24]:
def slice_len(ID):
    train_path = f'{work_path}/train_images'
    path = os.path.join(train_path, ID)
    return len([file for file in os.listdir(path)])

In [25]:
def load_dicom(uid, idx, size=IMG_SIZE):
    filename = f'{work_path}/train_images/{uid}/{idx}.dcm'
    
    img = dicom.read_file(filename)
    img = img.pixel_array
    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR)
    img = img - np.min(img)
    img = img / (np.max(img) + 1e-7)
    img = (img * 255).astype(np.uint8)
    
    return img

In [None]:
new_voxel_df = []
count = 0
train_df_uid_list = list(set(list(df['StudyInstanceUID'])))

try: os.mkdir('voxel_image')
except: pass

for uid in tqdm(train_df_uid_list):
    this_df = df[df['StudyInstanceUID']==uid].reset_index(drop=True)
    
    for each_vert in class_verts:
        each_vert_df = this_df[this_df['vertebrae']==f'{each_vert}'].reset_index(drop=True)
        if len(each_vert_df) < 1:
            break
        slices = np.linspace(0, len(each_vert_df)-1, tot_slice, dtype=int)

        cropped_voxel = []
        for each_slice in slices:
            index = each_vert_df.loc[each_slice]['id'].split('_')[1]
            image = load_dicom(uid, index)
            cropped_voxel.append(image)
        mask_id = each_vert_df.loc[slices[mid_slice]]['id']
        mask =  np.load(f'{base_path}/axial-preprocessed-windowing/segmentations/{mask_id}.npz')['arr_0'][:,:,-1]
        cropped_voxel.append(mask)

        save_path = f'voxel_image/{uid}_C{each_vert}.npz'
        with open(f'/kaggle/working/{save_path}', 'wb') as file:
            np.savez_compressed(file, cropped_voxel)

        del(cropped_voxel)
        new_voxel_df.append([f'{uid}_C{each_vert}', uid, f'C{each_vert}', save_path])

  1%|          | 1/87 [00:12<18:10, 12.69s/it]

In [None]:
import shutil

output_filename = 'voxel_image'
dir_name = 'voxel_image'
shutil.make_archive(output_filename, 'zip', dir_name)

path = '/kaggle/working/voxel_image'
for file_name in os.listdir(path):
    file = path + '/' + file_name
    if os.path.isfile(file):
        os.remove(file)

os.rmdir('voxel_image')

In [None]:
save_voxel_df = pd.DataFrame(new_voxel_df, columns=['id', 'StudyInstanceUID', 'vertebrae', 'iamge_path'])
save_voxel_df.to_csv('voxel_crop_df.csv', index=False)