In [15]:
'''
https://www.kaggle.com/code/aiinradiology/rsna-model
'''

'\nhttps://www.kaggle.com/code/aiinradiology/rsna-model\n'

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import pydicom
import cv2 
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 필요한 만큼만 메모리를 사용하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        # 특정 GPU에 연산을 할당
        tf.config.set_visible_devices(gpus[0], 'GPU')
        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPU


In [3]:
train_df = pd.read_pickle('train_data.pkl')

In [4]:
train_df.head()

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,category,img_file_path,processed_images
46219,4075603869,1361414844,8,Spinal Canal Stenosis,L3/L4,180.215054,145.519713,1,spine/train_images/4075603869/1361414844/8.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
3641,314041963,277371578,12,Spinal Canal Stenosis,L1/L2,376.0573,114.386185,1,spine/train_images/314041963/277371578/12.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
25382,2288221191,1504692102,13,Right Subarticular Stenosis,L2/L3,292.706422,343.905636,1,spine/train_images/2288221191/1504692102/13.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
45671,4029974537,2684154860,37,Right Subarticular Stenosis,L5/S1,195.5,250.12844,1,spine/train_images/4029974537/2684154860/37.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
47246,4163587601,3190560134,41,Right Subarticular Stenosis,L5/S1,142.390671,161.399417,0,spine/train_images/4163587601/3190560134/41.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...


In [5]:
val_df= pd.read_pickle('val_data.pkl')
val_df.head()

Unnamed: 0,study_id,series_id,instance_number,condition,level,x,y,category,img_file_path,processed_images
11118,984887785,3830185810,6,Left Neural Foraminal Narrowing,L4/L5,161.786988,197.361854,2,spine/train_images/984887785/3830185810/6.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
44346,3895795003,4014059471,7,Spinal Canal Stenosis,L4/L5,144.424779,165.522124,1,spine/train_images/3895795003/4014059471/7.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
32761,2888359875,2906456873,13,Left Subarticular Stenosis,L4/L5,274.669492,257.084746,2,spine/train_images/2888359875/2906456873/13.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
43677,3859825215,3632938463,13,Left Neural Foraminal Narrowing,L5/S1,239.483871,303.362429,0,spine/train_images/3859825215/3632938463/13.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...
22510,2026420722,2823687618,4,Left Neural Foraminal Narrowing,L4/L5,175.771785,324.564481,1,spine/train_images/2026420722/2823687618/4.dcm,b'\x80\x04\x95\x83\x00\x00\x00\x00\x00\x00\x00...


In [6]:
print(f"The training data loaded sucessfully. The train data has {train_df.shape[0]} rows and {train_df.shape[1]} columns.")
print(f"The tvalidatin data loaded sucessfully. The validation data has {val_df.shape[0]} rows and {val_df.shape[1]} columns.")

The training data loaded sucessfully. The train data has 38925 rows and 10 columns.
The tvalidatin data loaded sucessfully. The validation data has 9732 rows and 10 columns.


In [7]:
# 사용자 정의 손실 함수를 정의합니다. keras 라이브러리에서 백킹된 함수를 가져올 것입니다. 
# 이 손실 함수는 우리가 완전히 채워지도록 보장합니다
# 이 경기의 요구사항(특정 체중에 대한 것)

In [8]:
import tensorflow.keras.backend as K
def weighted_log_loss(y_true, y_pred):
    class_weights = K.constant([1.0, 2.0, 4.0])
    y_true = K.cast(y_true, y_pred.dtype)
    weights = K.sum(y_true * class_weights, axis=-1)
    loss = K.sum(y_true * K.log(y_pred + K.epsilon()), axis=-1)
    weighted_loss = -weights * loss
    return K.mean(weighted_loss)

In [9]:
# train_data 및 val_data가 'img_file_path' 및 'category' 열이 있는 DataFrame이라고 가정
train_df['category'] = train_df['category'].astype(int)
val_df['category'] = val_df['category'].astype(int)

In [10]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 38925 entries, 46219 to 22933
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   study_id          38925 non-null  int64  
 1   series_id         38925 non-null  int64  
 2   instance_number   38925 non-null  int64  
 3   condition         38925 non-null  object 
 4   level             38925 non-null  object 
 5   x                 38925 non-null  float64
 6   y                 38925 non-null  float64
 7   category          38925 non-null  int32  
 8   img_file_path     38925 non-null  object 
 9   processed_images  38925 non-null  object 
dtypes: float64(2), int32(1), int64(3), object(4)
memory usage: 3.1+ MB


In [11]:
val_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9732 entries, 11118 to 30476
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   study_id          9732 non-null   int64  
 1   series_id         9732 non-null   int64  
 2   instance_number   9732 non-null   int64  
 3   condition         9732 non-null   object 
 4   level             9732 non-null   object 
 5   x                 9732 non-null   float64
 6   y                 9732 non-null   float64
 7   category          9732 non-null   int32  
 8   img_file_path     9732 non-null   object 
 9   processed_images  9732 non-null   object 
dtypes: float64(2), int32(1), int64(3), object(4)
memory usage: 798.3+ KB


In [12]:
# We will create custom data generator because the pakaged imagedata generator which comes with keras do not have option of creating check points.
# out data is big and by adding check points we will be saivng the progress of the model everytime it runs even if it fails to rune all the epochs (due various reasons such internet failure)

from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator

class DicomDataGenerator(Sequence):
    def __init__(self, dataframe, x_col, y_col, batch_size, target_size, shuffle=True, augment=False):
        self.dataframe = dataframe
        self.x_col = x_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.augment = augment
        self.on_epoch_end()
        
        self.datagen = ImageDataGenerator(
            rotation_range=20,
            zoom_range=0.15,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.15,
            horizontal_flip=True,
            fill_mode="nearest"
        )

    def __len__(self):
        return int(np.floor(len(self.dataframe) / self.batch_size))

    def __getitem__(self, index):
        batch = self.dataframe.iloc[index*self.batch_size:(index+1)*self.batch_size]
        x, y = self.__data_generation(batch)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.dataframe))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch):
        images = []
        labels = []

        for _, row in batch.iterrows():
            dicom_path = row[self.x_col]
            dicom = pydicom.dcmread(dicom_path)
            image = dicom.pixel_array
            image = cv2.resize(image, self.target_size)
            image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
            image = image.astype('float32') / 255.0
            image = np.expand_dims(image, axis=-1)
            if self.augment:
                image = self.datagen.random_transform(image)
            images.append(image)
            labels.append(row[self.y_col])
        
        x = np.array(images)
        y = tf.keras.utils.to_categorical(labels, num_classes=3)
        return x, y

# Assuming train_data and val_data are your DataFrames with 'img_file_path' and 'category' columns
train_df['category'] = train_df['category'].astype(int)
val_df['category'] = val_df['category'].astype(int)

# Create data generators
train_generator = DicomDataGenerator(
    dataframe=train_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=True,
    augment=True
)

val_generator = DicomDataGenerator(
    dataframe=val_df,
    x_col='img_file_path',
    y_col='category',
    batch_size=32,
    target_size=(224, 224),
    shuffle=False,
    augment=False
)

In [13]:
#  Buidling model.

from tensorflow.keras.layers import Input

# Model Architecture
model = tf.keras.Sequential([
    Input(shape=(224, 224, 1)),  # Define input shape here
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # Adjust the output layer based on the number of classes
])

In [14]:
from tensorflow.keras.callbacks import ModelCheckpoint, LambdaCallback
from tensorflow.keras.models import load_model

# Define the checkpoint callback for best loss
checkpoint_loss = ModelCheckpoint('model_checkpoint_best_loss.weights.h5', 
                                  save_best_only=True, save_weights_only=True, 
                                  monitor='val_loss', mode='min', verbose=1)

# Define the checkpoint callback for best accuracy
checkpoint_accuracy = ModelCheckpoint('model_checkpoint_best_accuracy.weights.h5', 
                                      save_best_only=True, save_weights_only=True, 
                                      monitor='val_accuracy', mode='max', verbose=1)

# Save model after each epoch
save_after_epoch = LambdaCallback(on_epoch_end=lambda epoch, logs: model.save('model_checkpoint_epoch.h5'))

# Compile the Model with the custom weighted log loss
model.compile(optimizer='adam', loss=weighted_log_loss, metrics=['accuracy'])

# Load the last saved weights if they exist
try:
    model.load_weights('model_checkpoint_epoch.h5')
    print("Model weights loaded from the last checkpoint.")
except:
    try:
        model.load_weights('model_checkpoint_best_loss.weights.h5')
        print("Model weights loaded from best validation loss checkpoint.")
    except:
        try:
            model.load_weights('model_checkpoint_best_accuracy.weights.h5')
            print("Model weights loaded from best validation accuracy checkpoint.")
        except:
            print("No checkpoint found. Starting training from scratch.")

# Train the model with both checkpoint callbacks
history = model.fit(train_generator, epochs=50, validation_data=val_generator, 
                    callbacks=[checkpoint_loss, checkpoint_accuracy, save_after_epoch])

# Save the fully trained model
model.save('my_trained_model.h5')

Model weights loaded from the last checkpoint.
Epoch 1/50
Epoch 1: val_loss improved from inf to 0.97544, saving model to model_checkpoint_best_loss.weights.h5

Epoch 1: val_accuracy improved from -inf to 0.79153, saving model to model_checkpoint_best_accuracy.weights.h5
Epoch 2/50
Epoch 2: val_loss did not improve from 0.97544

Epoch 2: val_accuracy did not improve from 0.79153
Epoch 3/50
Epoch 3: val_loss improved from 0.97544 to 0.97515, saving model to model_checkpoint_best_loss.weights.h5

Epoch 3: val_accuracy improved from 0.79153 to 0.79266, saving model to model_checkpoint_best_accuracy.weights.h5
Epoch 4/50
Epoch 4: val_loss improved from 0.97515 to 0.97201, saving model to model_checkpoint_best_loss.weights.h5

Epoch 4: val_accuracy did not improve from 0.79266
Epoch 5/50
Epoch 5: val_loss improved from 0.97201 to 0.95944, saving model to model_checkpoint_best_loss.weights.h5

Epoch 5: val_accuracy improved from 0.79266 to 0.79276, saving model to model_checkpoint_best_accur

In [16]:
# Evaluate the Model
val_loss, val_accuracy = model.evaluate(val_generator)
print(f'Validation Accuracy: {val_accuracy}')

Validation Accuracy: 0.7927631735801697
