# Load Packages

In [2]:
# !pip install monai --no-index --find-links=file:///kaggle/input/neckfracpackages/monai
# !pip install pydicom --no-index --find-links=file:///kaggle/input/neckfracpackages/pydicom
# !pip install python-gdcm --no-index --find-links=file:///kaggle/input/neckfracpackages/pydicom
# !pip install pylibjpeg --no-index --find-links=file:///kaggle/input/neckfracpackages/pydicom
# !pip install torchvision --no-index --find-links=file:///kaggle/input/neckfracpackages/torchvision

In [3]:
!pip install warmup_scheduler
!pip install monai
!pip install -U "python-gdcm" pydicom pylibjpeg
!pip install -U torchvision
!pip install opencv-python
!pip install albumentations

[0m

In [7]:
!pip install -U pydicom
!pip install albumentations
!pip install monai

[0mCollecting monai
  Using cached monai-1.0.0-202209161346-py3-none-any.whl (1.1 MB)
Installing collected packages: monai
Successfully installed monai-1.0.0
[0m

In [5]:
# Libraries
import os
import re
import gc
import cv2
import wandb
import PIL
from PIL import Image
from sklearn.metrics import classification_report
import random
import math
import shutil
from glob import glob
from tqdm import tqdm
from pprint import pprint
from time import time
import warnings
import itertools
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
from matplotlib import cm
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.patches import Rectangle
from IPython.display import display_html
plt.rcParams.update({'font.size': 16})

# .dcm handling
import pydicom
# import nibabel as nib
from pydicom.pixel_data_handlers.util import apply_voi_lut

# Environment check
warnings.filterwarnings("ignore")

In [8]:
# PyTorch
import torch
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
import torchvision 
import torchvision.transforms as transforms
# from warmup_scheduler import GradualWarmupScheduler
import albumentations

from sklearn.model_selection import GroupKFold, train_test_split, StratifiedKFold
from sklearn.metrics import roc_auc_score, cohen_kappa_score, confusion_matrix

# MONAI 3D
from monai.transforms import Randomizable, apply_transform
from monai.transforms import Compose, Resize, ScaleIntensity, ToTensor, RandAffine
from monai.networks.nets import densenet

### Helper Functions

In [9]:
def read_data():
    '''Reads in all .csv files.'''
    
    train = pd.read_csv(f"{INPUT}/train.csv")
    train_bbox = pd.read_csv(f"{INPUT}/train_bounding_boxes.csv")
    test = pd.read_csv(f"{INPUT}//test.csv")
    # ss = pd.read_csv(f"{INPUT}//sample_submission.csv")
    
    return train, train_bbox, test#, ss

def get_csv_info(csv, name="Default"):
    '''Prints main information for the speciffied .csv file.'''
    
    print(f"=== {name} ===")
    print(f"Shape:", csv.shape)
    print(f"Missing Values:", csv.isna().sum().sum(), "total missing datapoints.")
    print("Columns:", list(csv.columns), "\n")
    
    display_html(csv.head())
    print("\n")
    
def set_seed(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)  
    torch.cuda.manual_seed(seed)  
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True
    
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]

# Configure

In [3]:
# Environment check
warnings.filterwarnings("ignore")
# os.environ["WANDB_SILENT"] = "true"
CONFIG = {'competition': 'RSNA_SpineFracture', '_wandb_kernel': 'aot'}

# set seed
set_seed(0)

# set GPU
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", DEVICE)

# Kaggle Notebook Setup
DF_SIZE = 0.01
N_SPLITS = 5
KERNEL_TYPE = 'densenet121_baseline'
IMG_RESIZE = 100
STACK_RESIZE = 50
use_amp = False
NUM_WORKERS = 1
BATCH_SIZE = 2
LR = 0.05
OUT_DIM = 8
EPOCHS = 2

target_cols = ['C1', 'C2', 'C3', 
               'C4', 'C5', 'C6', 'C7',
               'patient_overall']

competition_weights = {
    '-' : torch.tensor([1, 1, 1, 1, 1, 1, 1, 7], dtype=torch.float, device=DEVICE),
    '+' : torch.tensor([2, 2, 2, 2, 2, 2, 2, 14], dtype=torch.float, device=DEVICE),
}

INPUT = "../input/rsna-2022-cervical-spine-fracture-detection"
CHECKPOINT_PATH='./densenet121_baseline_best_fold0.pth'

NameError: name 'warnings' is not defined

In [11]:
train, train_bbox, test_df = read_data()

In [12]:
# test RSNA dataset

class RSNADataset_test(Dataset, Randomizable):
    
    def __init__(self, csv, mode, transform=None):
        self.csv = csv
        self.mode = mode
        self.transform = transform
        
    def __len__(self):
        return self.csv.shape[0]
    
    def randomize(self) -> None:
        '''-> None is a type annotation for the function that states 
        that this function returns None.'''
        
        MAX_SEED = np.iinfo(np.uint32).max + 1
        self.seed = self.R.randint(MAX_SEED, dtype="uint32")
        
    def __getitem__(self, index):
        # Set Random Seed
        self.randomize()
        
        dt = self.csv.iloc[index, :]
        study_paths = glob(f"{INPUT}/train_images/{dt.StudyInstanceUID}/*")
        study_paths.sort(key=natural_keys)
        
        # Load images
        study_images = [cv2.imread(path)[:,:,::-1] for path in study_paths]
        # Stack all scans into 1
        stacked_image = np.stack([img.astype(np.float32) for img in study_images], 
                                 axis=2).transpose(3,0,1,2)
        
        # Apply transforms
        if self.transform:
            if isinstance(self.transform, Randomizable):
                self.transform.set_random_state(seed=self.seed)
                
            stacked_image = apply_transform(self.transform, stacked_image)
        
        # Determine output of dataset
        if self.mode=="test":
            return {"image": stacked_image,
                   "patient":dt.StudyInstanceUID}
        else:
            targets = torch.tensor(dt[target_cols]).float()
            return {"image": stacked_image,
                    "targets": targets}

In [13]:
# send the data to GPU
def data_to_device(data):
    image, patient = data.values()
    return image.to(DEVICE), patient

In [14]:
# transform
test_transforms = Compose([ScaleIntensity(), 
                          Resize((IMG_RESIZE, IMG_RESIZE, STACK_RESIZE)), 
                          ToTensor()])

In [15]:
# Instantiate Dataset object
test_dataset = RSNADataset_test(csv=train, mode="test", transform=test_transforms)
# The Dataloader
test_dataloader = DataLoader(test_dataset, batch_size=3, shuffle=False)

# Load trained model

In [16]:
%%capture
model = densenet.densenet121(spatial_dims=3, in_channels=3,
                                 out_channels=OUT_DIM)
model.class_layers.out = nn.Sequential(nn.Linear(in_features=1024, out_features=OUT_DIM), 
                                           nn.Softmax(dim=1))
model.to(DEVICE)

DenseNet121(
  (features): Sequential(
    (conv0): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(2, 2, 2), padding=(3, 3, 3), bias=False)
    (norm0): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool3d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (layers): Sequential(
          (norm1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv3d(64, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
          (norm2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv3d(128, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        )
      )
      (denselayer2): _DenseLayer(
        (layers): Sequential(
 

In [17]:
%%capture
# Load checkpoint
if torch.cuda.is_available():
    checkpoint = torch.load(CHECKPOINT_PATH)
else:
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=torch.device('cpu'))

# Load states
model.load_state_dict(checkpoint)

# Evaluation mode
model.eval()
model.to(DEVICE)

DenseNet121(
  (features): Sequential(
    (conv0): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(2, 2, 2), padding=(3, 3, 3), bias=False)
    (norm0): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool3d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (layers): Sequential(
          (norm1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv3d(64, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
          (norm2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv3d(128, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
        )
      )
      (denselayer2): _DenseLayer(
        (layers): Sequential(
 

# Inference

In [None]:
test_preds = []
t0 = time()
for i, data in enumerate(test_dataloader):
        print(f'Iteration {i+1}/{len(test_dataloader)}')
        # Send to device
        imgs, patient = data_to_device(data)
        
        # Make predictions
        preds = model(imgs)
        
        # output to device
        test_preds.append(preds.detach().cpu().numpy())
        
tf = time()
inference_time = tf-t0
print(f"""
    Inference time = {inference_time/3600} hrs
    Number of predictions = {len(test_preds)}
    """)

[2022-10-15 15:29:11.779 pytorch-1-10-gpu-py-ml-g5-12xlarge-d2e622e224858f9cf0efc077a1e9:46 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2022-10-15 15:29:11.897 pytorch-1-10-gpu-py-ml-g5-12xlarge-d2e622e224858f9cf0efc077a1e9:46 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
Iteration 1/673
Iteration 2/673
Iteration 3/673
Iteration 4/673
Iteration 5/673
Iteration 6/673
Iteration 7/673
Iteration 8/673
Iteration 9/673
Iteration 10/673
Iteration 11/673
Iteration 12/673
Iteration 13/673
Iteration 14/673
Iteration 15/673
Iteration 16/673
Iteration 17/673
Iteration 18/673
Iteration 19/673
Iteration 20/673
Iteration 21/673
Iteration 22/673
Iteration 23/673
Iteration 24/673
Iteration 25/673
Iteration 26/673
Iteration 27/673
Iteration 28/673
Iteration 29/673
Iteration 30/673
Iteration 31/673
Iteration 32/673
Iteration 33/673
Iteration 34/673
Iteration 35/673
Iteration 36/673
Iteration 37/673
Iteration 38/673


In [33]:
preds = []
for i in test_preds:
    for j in i:
        preds.append(j)

## Process predictions

In [38]:
def process_test(df, preds):
    cols = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'patient_overall']
    patients = df.StudyInstanceUID.to_list()
    
    df_sub = pd.DataFrame()
    
    for i, p in enumerate(patients):
        scores = list(preds[i])
        if len(scores) < 8:
            scores.append(preds[i].max() + preds[i].mean())
        
        df_temp = pd.DataFrame({'StudyInstanceUID': [p]*len(cols), 'prediction_type': cols, 'fractured': scores})
        df_sub = pd.concat([df_sub, df_temp])
        
        del df_temp
    
    df_sub['row_id'] = df_sub['StudyInstanceUID'] + '_' + df_sub['prediction_type']
    
    return df_sub[['row_id', 'fractured']].reset_index(drop = True)
    

In [39]:
df_sub = process_test(train,preds)

In [42]:
train_melt = pd.melt(train,
                     id_vars=['StudyInstanceUID'],
                     value_vars = ['patient_overall','C1','C2','C3','C4','C5','C6','C7'],
                     value_name = 'actual',
                    )
train_melt['row_id'] = train_melt['StudyInstanceUID'] + '_' + train_melt['variable']
train_melt = train_melt.drop(columns = ['StudyInstanceUID','variable'])
train_melt[train_melt.row_id.str.contains('1.2.826.0.1.3680043.6200',regex=False)]

Unnamed: 0,actual,row_id
0,1,1.2.826.0.1.3680043.6200_patient_overall
2019,1,1.2.826.0.1.3680043.6200_C1
4038,1,1.2.826.0.1.3680043.6200_C2
6057,0,1.2.826.0.1.3680043.6200_C3
8076,0,1.2.826.0.1.3680043.6200_C4
10095,0,1.2.826.0.1.3680043.6200_C5
12114,0,1.2.826.0.1.3680043.6200_C6
14133,0,1.2.826.0.1.3680043.6200_C7


In [None]:
# df_sub = process_test(train,preds)
# df_sub.to_csv('submission.csv', index=False)
# df_sub

In [47]:
model_results = pd.merge(train_melt,df_sub, on ='row_id',how='outer')
model_results.head()

Unnamed: 0,actual,row_id,fractured
0,1,1.2.826.0.1.3680043.6200_patient_overall,0.0
1,1,1.2.826.0.1.3680043.27262_patient_overall,0.0
2,1,1.2.826.0.1.3680043.21561_patient_overall,0.0
3,0,1.2.826.0.1.3680043.12351_patient_overall,1.229994e-15
4,1,1.2.826.0.1.3680043.1363_patient_overall,0.0


In [53]:
model_results.to_csv(f"{CHECKPOINT_PATH[2:-4]}_train_results.csv",index=False)

In [6]:
import pandas as pd
model_results = pd.read_csv(f'./{CHECKPOINT_PATH[2:-4]}_train_results.csv')

In [127]:
def define_eval_metrics(df,verbose = False):
    cls_report = classification_report(y_true=df.actual, y_pred = df.fractured, zero_division=0, output_dict=True)

    recall = cls_report['1']['recall']
    precision = cls_report['1']['precision']

    tn, fp, fn, tp = confusion_matrix(y_true=df.actual, y_pred = df.fractured).ravel()

    fpr = fp / (fp+tn)
    fnr = fn / (fn+tn)

    pred_pos_rate = np.mean(df.fractured == 1)
    actual_pos_rate = np.mean(df.actual == 1)

    if verbose:

        print('\nClassification Report')
        pprint(cls_report)

        print(f"""
        TN: {tn}
        FP: {fp}
        FN: {fn}
        TP: {tp}
        """)

        # print FPR and FNR
        print(f"FPR: {fpr :.4}")
        print(f"FNR: {fnr :.4}")

        # value count + value count normalized for predictions.
        print('Number of fractured vs non-fractured predictions')
        print(df.fractured.value_counts())
        print('Ratio of fractured vs non-fractured predictions')
        print(df.fractured.value_counts(normalize=True))

        # What is the predicted positive fracture rate compared to the actual?
        print(f"""
        Predicted positive fracture rate: {np.round(pred_pos_rate,4)}
        Actual fracture rate {np.round(actual_pos_rate,4)}"""
        )
    
    return([recall,precision,tn,fp,fn,tp,fpr,fnr])

def eval_model(df):
    
    try:
        print(f'Average inference time : {inference_time/(len(df)/8)} s per patient')
    except:
        print('No time data available')

    # initialize patient and vertebrae df
    df['fractured'] = round(df.fractured)
    patient_df = df[df.row_id.str.contains("patient_overall")]
    vert_df =  df[~df.row_id.str.contains("patient_overall")]

    # store eval metrics
    eval_metrics = dict()
    eval_metrics['eval_metric'] = ['recall','precision','tn','fp','fn','tp','fpr','fnr']

    print(f"{'*' * 18} Evaluating model at patient-level {'*' * 18}")
    eval_metrics['patient_level'] = define_eval_metrics(patient_df)

    print(f"{'*' * 18} Evaluating model at vertebrae-level {'*' * 18}")
    eval_metrics['vertebrae_level'] = define_eval_metrics(vert_df)
    
    # which vertebrae are missed the most? FP FN?
    vert_df.loc[:,'vertebrae'] = vert_df.row_id.str.split('_',).apply(lambda x: x[-1])

    for vertebra in vert_df.vertebrae.unique():
        print(f"{'*'*18} {vertebra} {'*'*18}")
        eval_metrics[vertebra] = define_eval_metrics(
            vert_df.loc[vert_df.vertebrae == vertebra])

    
    # issues with patient overall not match patient vertebrae prediction?
    
    return(pd.DataFrame.from_dict(eval_metrics))


In [128]:
eval_model(model_results)

No time data available
****************** Evaluating model at patient-level ******************
****************** Evaluating model at vertebrae-level ******************
****************** C1 ******************
****************** C2 ******************
****************** C3 ******************
****************** C4 ******************
****************** C5 ******************
****************** C6 ******************
****************** C7 ******************


  fnr = fn / (fn+tn)


Unnamed: 0,eval_metric,patient_level,vertebrae_level,C1,C2,C3,C4,C5,C6,C7
0,recall,0.0,0.272161,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,precision,0.0,0.194651,0.0,0.0,0.0,0.0,0.0,0.0,0.194651
2,tn,1058.0,11063.0,1873.0,1734.0,1946.0,1911.0,1857.0,1742.0,0.0
3,fp,0.0,1626.0,0.0,0.0,0.0,0.0,0.0,0.0,1626.0
4,fn,961.0,1051.0,146.0,285.0,73.0,108.0,162.0,277.0,0.0
5,tp,0.0,393.0,0.0,0.0,0.0,0.0,0.0,0.0,393.0
6,fpr,0.0,0.128142,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,fnr,0.475978,0.086759,0.072313,0.141159,0.036157,0.053492,0.080238,0.137197,


In [120]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
pd.set_option("mode.chained_assignment", None)
from pprint import pprint