In [1]:
import os

os.environ["KERAS_BACKEND"] = "tensorflow"  # @param ["tensorflow", "jax", "torch"]

from tensorflow import data as tf_data
import tensorflow_datasets as tfds
import keras
import keras_cv
import numpy as np
from keras_cv import bounding_box
import os
from keras_cv import visualization
import tqdm
import pandas as pd
import pydicom
import tensorflow as tf
import tensorflow_io as tfio

import warnings
warnings.filterwarnings('ignore')

2024-06-12 08:36:07.386800: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-12 08:36:07.387031: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-12 08:36:07.545033: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
BASE_DIR = '/kaggle/input/rsna-2024-lumbar-spine-degenerative-classification/'
TRAIN_DIR = BASE_DIR+'train_images/'
TEST_DIR = BASE_DIR+'test_images/'

PRETRAINED = 'efficientnetv2_s_imagenet'
BATCH_SIZE = 24

IMG_SIZE = [320,320]

In [3]:
studies = os.listdir(TEST_DIR)

In [4]:
labels = pd.read_csv(BASE_DIR+'train.csv')
labels.study_id = labels.study_id.astype(str)

conditions = np.unique(labels.columns[1:])
classes = []
for c in conditions:
    classes.append(c+'_normal')
    classes.append(c+'_moderate')
    classes.append(c+'_severe')
classes_map = {classes[i]:i for i in range(len(classes))}
class_mapping = {i:classes[i] for i in range(len(classes))}
N_CLASSES = len(class_mapping)

In [5]:
def prepare_data(studies):
    image_paths = []
    study_ids = []
    for study_id in studies:
        study_dir = TEST_DIR+study_id+'/'
        for series_id in os.listdir(study_dir):
            series_dir = study_dir+series_id+'/'
            for z in os.listdir(series_dir):
                path = series_dir+z
                study_ids.append(study_id)
                image_paths.append(path)
    
    return tf.data.Dataset.from_tensor_slices((np.array(image_paths), np.array(study_ids)))

In [6]:
test_data = prepare_data(studies)

In [7]:
def load_image(image_path):
    raw_image = tf.io.read_file(image_path)
    sp = tf.strings.split(tf.gather(tf.strings.split(image_path, 'images/'), 1), '/')
    N = tf.size(sp)
    LEN = tf.strings.length(tf.gather(sp, 0))+tf.strings.length(tf.gather(sp, 2))
    
    # Add missing file metadata to avoid warnnigs flooding
    if   LEN==12: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==13: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x92\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==14: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==15: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x94\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==16: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==17: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x96\x00\x00\x00\x02\x00\x01\x00')
    elif LEN==18: raw_image = tf.strings.regex_replace(raw_image, pattern=b'DICM\x02\x00\x01\x00', rewrite=b'DICM\x02\x00\x00\x00UL\x04\x00\x98\x00\x00\x00\x02\x00\x01\x00')
    
    img = tfio.image.decode_dicom_image(raw_image, scale='auto', dtype=tf.float32)
    m, M=tf.math.reduce_min(img), tf.math.reduce_max(img)
    img = (tf.image.grayscale_to_rgb(img)-m)/(M-m)
    img = tf.image.resize(img, IMG_SIZE)[0]
    return img

def load_dataset(image_path, study_id):
    image = load_image(image_path)
    return {"images": tf.cast(image, tf.float32), "study_id":study_id}

In [8]:
test_ds = test_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)

In [9]:
def dict_to_tuple(inputs):
    return inputs["images"], inputs["study_id"]

test_ds = test_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
test_ds = test_ds.prefetch(tf.data.AUTOTUNE)

In [10]:
backbone = keras_cv.models.EfficientNetV2Backbone.from_preset(PRETRAINED)
model = keras.Sequential(
    [
        keras.layers.Input(shape=(None, None, 3)),
        backbone,
        keras.layers.GlobalMaxPooling2D(),
        keras.layers.Dropout(rate=0.3),
        keras.layers.Dense(N_CLASSES, activation="sigmoid"),
    ]
)
model.load_weights("/kaggle/input/rsna-keras-training-starter/best_model.weights.h5")

Attaching 'config.json' from model 'keras/efficientnetv2/keras/efficientnetv2_s_imagenet/2' to your Kaggle notebook...
Attaching 'config.json' from model 'keras/efficientnetv2/keras/efficientnetv2_s_imagenet/2' to your Kaggle notebook...
Attaching 'model.weights.h5' from model 'keras/efficientnetv2/keras/efficientnetv2_s_imagenet/2' to your Kaggle notebook...


In [11]:
Pred = {}
preds = []
study_ids = []
for img, study_id in test_ds.as_numpy_iterator():
    preds.append(model.predict(img))
    study_ids.append(study_id)
    
preds = np.concatenate(preds)
study_ids = np.concatenate(study_ids)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


In [12]:
for p, study_id in zip(preds, study_ids):
    study_id = str(int(study_id))
    if study_id in Pred: Pred[study_id].append(p)
    else: Pred[study_id] = []

In [13]:
submission = pd.DataFrame(columns=['normal_mild', 'moderate', 'severe'])

for study_id in Pred:
    pred = np.array(Pred[study_id]).max(axis=0)
    for i in range(N_CLASSES):
        condition = '_'.join(class_mapping[i].split('_')[:-1])
        intensity = class_mapping[i].split('_')[-1].replace('normal', 'normal_mild')
        row_id = study_id+'_'+condition
        submission.loc[row_id, intensity] = pred[i]

submission = submission.reset_index().rename(columns={'index':'row_id'})

In [14]:
submission

Unnamed: 0,row_id,normal_mild,moderate,severe
0,44036939_left_neural_foraminal_narrowing_l1_l2,0.975957,0.136873,0.004486
1,44036939_left_neural_foraminal_narrowing_l2_l3,0.933156,0.321707,0.032803
2,44036939_left_neural_foraminal_narrowing_l3_l4,0.606356,0.560908,0.084951
3,44036939_left_neural_foraminal_narrowing_l4_l5,0.437151,0.578838,0.162822
4,44036939_left_neural_foraminal_narrowing_l5_s1,0.566973,0.355131,0.275664
5,44036939_left_subarticular_stenosis_l1_l2,0.97336,0.167179,0.05913
6,44036939_left_subarticular_stenosis_l2_l3,0.92639,0.33872,0.132133
7,44036939_left_subarticular_stenosis_l3_l4,0.773044,0.410251,0.34347
8,44036939_left_subarticular_stenosis_l4_l5,0.460911,0.448591,0.459029
9,44036939_left_subarticular_stenosis_l5_s1,0.734016,0.346567,0.160652


In [15]:
submission.to_csv('submission.csv', index=False)