In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!pip install pydicom

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pydicom
  Downloading pydicom-2.3.0-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 7.7 MB/s 
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.3.0


In [4]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.18.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 9.1 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.18.0


In [5]:
!pip install classification-models-3D
!pip install keras_applications

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting classification-models-3D
  Downloading classification_models_3D-1.0.6-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.3 MB/s 
[?25hInstalling collected packages: classification-models-3D
Successfully installed classification-models-3D-1.0.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras_applications
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 3.6 MB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8


In [4]:
!nvidia-smi

Thu Sep 29 11:25:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [8]:
import os
import glob
import random
import collections
import gc
import math

import numpy as np
import pandas as pd

import plotly.offline as py
import plotly.graph_objs as go

import pydicom
import matplotlib.pyplot as plt
import cv2
import scipy
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import backend as K
from tensorflow import keras
from tensorflow.keras import layers as L

from sklearn.model_selection import KFold, StratifiedKFold

In [6]:
#set desired image size and depth (number of patient's images to load)
class Config:
    img_size = 256
    depth = 128
    train_one_fold = True
    epochs = 100


IMG_PATH_TRAIN = '/content/gdrive/MyDrive/train_arrays/'

TRAIN_CSV_PATH = '/content/gdrive/MyDrive/hj/train_200.csv'
# TEST_CSV_PATH = '../input/rsna-2022-cervical-spine-fracture-detection/test.csv'

train_images = os.listdir(IMG_PATH_TRAIN)

train=pd.read_csv(TRAIN_CSV_PATH)
# test=pd.read_csv(TEST_CSV_PATH)


In [7]:
train['numpy_path'] = train['StudyInstanceUID'].apply(lambda x: f'{IMG_PATH_TRAIN}{x}.npy')

In [8]:
train.head(5)

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,total_fractures,numpy_path
0,1.2.826.0.1.3680043.6200,1,1,1,0,0,0,0,0,2,/content/gdrive/MyDrive/train_arrays/1.2.826.0...
1,1.2.826.0.1.3680043.27262,1,0,1,0,0,0,0,0,1,/content/gdrive/MyDrive/train_arrays/1.2.826.0...
2,1.2.826.0.1.3680043.21561,1,0,1,0,0,0,0,0,1,/content/gdrive/MyDrive/train_arrays/1.2.826.0...
3,1.2.826.0.1.3680043.12351,0,0,0,0,0,0,0,0,0,/content/gdrive/MyDrive/train_arrays/1.2.826.0...
4,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0,1,/content/gdrive/MyDrive/train_arrays/1.2.826.0...


In [9]:
class DataLoader(tf.keras.utils.Sequence):
    def __init__(self, df: pd.DataFrame, batch_size, resample_rate: float = None, steps_per_epoch: int = 10000, is_train=True, shuffle=True):
        self.is_train      = is_train
        self.numpy_path  = df.numpy_path
        self.df  = df
        self.batch_size = batch_size
        self.length = len(df)
        self.resample = resample_rate
        self.shuffle = shuffle
        self.steps_per_epoch= steps_per_epoch
        
    def __len__(self):
        return  min(int(np.ceil(self.length / float(self.batch_size))), self.steps_per_epoch)
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
            self.numpy_path  = self.df.numpy_path
    
    def __getitem__(self, index):
                  
        if self.is_train:         
            
            batch_x = []
            batch_y = []
            
            targets = self.df[['patient_overall', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']]

            for i in range(self.batch_size):
                cur_ind = self.batch_size*index + i
                if cur_ind < self.length:
                    batch_x.append(np.load(self.numpy_path.iloc[cur_ind]))
                    batch_y.append(targets.iloc[cur_ind])
              
            if self.resample is not None:
                n_images = batch_x[0].shape[0]
                im_ids = sorted(np.random.choice(list(range(n_images)), int(n_images * self.resample), replace=False))
                batch_x = np.array(batch_x)[:,im_ids]
                   
            #return np.array(batch_x), np.expand_dims(np.array(batch_y), -1).astype(np.float32)
            return np.array(batch_x), np.array(batch_y).astype(np.float32)

        else:
            batch_x = []
            for i in range(self.batch_size):
                cur_ind = self.batch_size*index + i
                if cur_ind < self.length:
                    batch_x.append(np.load(self.numpy_path.iloc[cur_ind]))
            
            return np.array(batch_x)
              



In [14]:
def weighted_loss(y_true, y_pred):

    weighted_losses = {
        '-' : tf.constant([7, 1, 1, 1, 1, 1, 1, 1], dtype=tf.float32),
        '+' : tf.constant([14, 2, 2, 2, 2, 2, 2, 2], dtype=tf.float32)
    }
    
    loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(tf.expand_dims(y_true, -1),tf.expand_dims(y_pred,-1))
    weights  = y_true*weighted_losses['+'] + (1-y_true)*weighted_losses['-'] 
    
    loss = tf.reduce_mean(tf.reduce_sum(loss * weights, axis=1)) / tf.reduce_sum(weights)
    return loss
    
    

In [11]:
from classification_models_3D.tfkeras import Classifiers

model_arch = 'resnet50' 
#model_arch = 'resnet34'

def concat_max_avg(tensor):
    _avr = L.GlobalAveragePooling3D()(tensor)
    _max = L.GlobalMaxPooling3D()(tensor)
    return L.Concatenate()([_avr, _max])

def create_model(input_shape, num_classes):
    inputs = L.Input((*input_shape, 1), name='inputs')
    x = L.Rescaling(1/255.)(inputs)
    x = L.Conv3D(3, (1, 1, 1), padding='same')(x)
    
    net, preprocess_input = Classifiers.get(model_arch)
    #x = preprocess_input(x)
    x = net(input_shape=(*input_shape, 3), include_top=False, weights='imagenet')(x)
    
    x = concat_max_avg(x)
    x = L.Dropout(rate=0.1)(x)
    x = L.Dense(512, activation='relu')(x)
    x = L.Dropout(rate=0.1)(x)
    x = L.Dense(128, activation='relu')(x)
    x = L.Dropout(rate=0.1)(x)
    x = L.Dense(32, activation='relu')(x)
    
    outputs = L.Dense(num_classes, activation='sigmoid')(x)
    model  = tf.keras.Model(inputs, outputs)
   
    model.compile(loss= weighted_loss, #'binary_crossentropy',
                      optimizer=tf.optimizers.Adam(learning_rate=1e-3),
                       metrics=['AUC',tfa.metrics.F1Score(num_classes=num_classes, threshold=0.25, average='macro')])
    return model

create_model((None, Config.img_size, Config.img_size), num_classes = 7).summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputs (InputLayer)            [(None, None, 256,   0           []                               
                                256, 1)]                                                          
                                                                                                  
 rescaling (Rescaling)          (None, None, 256, 2  0           ['inputs[0][0]']                 
                                56, 1)                                                            
                                                                                                  
 conv3d (Conv3D)                (None, None, 256, 2  6           ['rescaling[0][0]']              
                                56, 3)                                                      

In [12]:
LR_MAX = 1e-3
LR_DECAY = 0.80

def lr_func(epoch):   
    lr = np.pi/2 * LR_DECAY**epoch 
    lr = np.sin(lr) * LR_MAX
    return lr


LR = tf.keras.callbacks.LearningRateScheduler(lr_func, verbose = True)

In [13]:
import keras
from datetime import datetime
logdir = "/content/gdrive/MyDrive/checkpoint-latest/logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)


In [14]:
n_folds = 5
pred_columns = ['patient_overall_pred', 'C1_pred', 'C2_pred', 'C3_pred', 'C4_pred', 'C5_pred', 'C6_pred', 'C7_pred']
#pred_columns = ['C1_pred', 'C2_pred', 'C3_pred', 'C4_pred', 'C5_pred', 'C6_pred', 'C7_pred']
train[pred_columns] = 0

estop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='min',restore_best_weights=True)
kf_split = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=0)

for fold,(tr_idx, val_idx) in enumerate(kf_split.split(train, train.patient_overall)):
    
    X_train = train.iloc[tr_idx]
    X_test = train.iloc[val_idx]
    
    train_data = DataLoader(X_train, 2 , steps_per_epoch = 50, resample_rate = 0.8, shuffle = True)
    test_data = DataLoader(X_test, 2, resample_rate = 0.8, shuffle = False)
    
    path_to_checkpoints = f"/content/gdrive/MyDrive/checkpoints/_{model_arch}_best_fold_{fold+1}.hdf5"
    checkpointer = tf.keras.callbacks.ModelCheckpoint("/content/gdrive/MyDrive/checkpoint-latest/checkpoints", monitor="val_loss", mode='min', verbose=1, save_best_only=True)

    #model = get_model()
    model = create_model((None, Config.img_size, Config.img_size), num_classes = 8)

    print(f'Training Model Fold {fold+1}...')
    
    history = model.fit(
        train_data,
        epochs = Config.epochs,
        callbacks = [estop, LR, checkpointer,tensorboard_callback],
        validation_data = (test_data),
    )   
    
    del X_train, train_data
    
    model.save(f"/content/gdrive/MyDrive/checkpoints/_{model_arch}_end_fold_{fold+1}")
    
    test_preds = model.predict(test_data)
    train.loc[val_idx, pred_columns] = test_preds
    
    del X_test,test_preds, test_data
    K.clear_session()
    gc.collect()
    
    if Config.train_one_fold: break

Training Model Fold 1...

Epoch 1: LearningRateScheduler setting learning rate to 0.001.
Epoch 1/100
Epoch 1: val_loss improved from inf to 2.52508, saving model to /content/gdrive/MyDrive/checkpoint-latest/checkpoints

Epoch 2: LearningRateScheduler setting learning rate to 0.0009510565162951536.
Epoch 2/100
Epoch 2: val_loss did not improve from 2.52508

Epoch 3: LearningRateScheduler setting learning rate to 0.0008443279255020151.
Epoch 3/100
Epoch 3: val_loss improved from 2.52508 to 0.33453, saving model to /content/gdrive/MyDrive/checkpoint-latest/checkpoints

Epoch 4: LearningRateScheduler setting learning rate to 0.000720309024887907.
Epoch 4/100
Epoch 4: val_loss improved from 0.33453 to 0.30951, saving model to /content/gdrive/MyDrive/checkpoint-latest/checkpoints

Epoch 5: LearningRateScheduler setting learning rate to 0.0005999176501510881.
Epoch 5/100
Epoch 5: val_loss did not improve from 0.30951

Epoch 6: LearningRateScheduler setting learning rate to 0.00049228987725745

**Inference**



In [10]:

IMG_PATH_TEST = '/content/gdrive/MyDrive/hj/test_arrays_3/'

IMG_PATH = '/content/gdrive/MyDrive/hj/test_images_3/'

TEST_CSV_PATH = '/content/gdrive/MyDrive/hj/test_3.csv'


test_images = os.listdir(IMG_PATH)


test=pd.read_csv(TEST_CSV_PATH)

In [11]:
test_patients = sorted(os.listdir(IMG_PATH))

In [12]:
test = pd.DataFrame({'StudyInstanceUID': test_patients})

test['StudyInstanceUID'] = test_patients
test['numpy_path'] = test['StudyInstanceUID'].apply(lambda x: f'{IMG_PATH_TEST}{x}.npy')
test

Unnamed: 0,StudyInstanceUID,numpy_path
0,1.2.826.0.1.3680043.15151,/content/gdrive/MyDrive/hj/test_arrays_3/1.2.8...
1,1.2.826.0.1.3680043.28990,/content/gdrive/MyDrive/hj/test_arrays_3/1.2.8...
2,1.2.826.0.1.3680043.8458,/content/gdrive/MyDrive/hj/test_arrays_3/1.2.8...


In [15]:
model = tf.keras.models.load_model('/content/gdrive/MyDrive/checkpoints/_resnet50_end_fold_1', custom_objects = {'weighted_loss': weighted_loss})

In [16]:
data_test = DataLoader(test, 4, shuffle = False, is_train = False)

In [17]:
pred = model.predict(data_test)


In [18]:
def inference(df, preds):
    cols = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']
    cols = ['patient_overall', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']
    patients = df.StudyInstanceUID.to_list()
    
    df_sub = pd.DataFrame()
    
    for i, p in enumerate(patients):
        scores = list(preds[i])
        print(scores)
        if len(scores) < 8:
            scores.append(preds[i].max() + preds[i].mean())
        
        df_temp = pd.DataFrame({'StudyInstanceUID': [p]*len(cols), 'prediction_type': cols, 'fractured': scores})
        df_sub = pd.concat([df_sub, df_temp])
        
        del df_temp
    
    df_sub['row_id'] = df_sub['StudyInstanceUID'] + '_' + df_sub['prediction_type']
    
    return df_sub[['row_id', 'fractured']].reset_index(drop = True)

In [19]:
df_sub = inference(test, pred)

[0.6623369, 0.14846219, 0.10944817, 0.06489923, 0.085810535, 0.1513077, 0.3350138, 0.50822896]
[0.6623334, 0.14847547, 0.10945757, 0.06490802, 0.08581772, 0.15131795, 0.33501837, 0.508227]
[0.6623359, 0.14847027, 0.10945337, 0.064904004, 0.08581395, 0.15131491, 0.3350165, 0.5082282]


In [20]:
df_sub.to_csv('submission.csv', index=False)
df_sub

Unnamed: 0,row_id,fractured
0,1.2.826.0.1.3680043.15151_patient_overall,0.662337
1,1.2.826.0.1.3680043.15151_C1,0.148462
2,1.2.826.0.1.3680043.15151_C2,0.109448
3,1.2.826.0.1.3680043.15151_C3,0.064899
4,1.2.826.0.1.3680043.15151_C4,0.085811
5,1.2.826.0.1.3680043.15151_C5,0.151308
6,1.2.826.0.1.3680043.15151_C6,0.335014
7,1.2.826.0.1.3680043.15151_C7,0.508229
8,1.2.826.0.1.3680043.28990_patient_overall,0.662333
9,1.2.826.0.1.3680043.28990_C1,0.148475


In [22]:
!cp "/content/resnet50_end_fold_2" -r "/content/gdrive/MyDrive/checkpoints"
