In [3]:
'''
https://www.kaggle.com/code/mazenahmed11/rsna-2024-model
'''

'\nhttps://www.kaggle.com/code/mazenahmed11/rsna-2024-model\n'

In [4]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 필요한 만큼만 메모리를 사용하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            
        # 특정 GPU에 연산을 할당
        tf.config.set_visible_devices(gpus[0], 'GPU')
        
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPU


In [5]:
import pandas as pd
import numpy as np
import pydicom
import os
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load CSV files
train_labels = pd.read_csv('spine/train.csv')
train_coords = pd.read_csv('spine/train_label_coordinates.csv')

# Function to load DICOM images
def load_dicom_image(file_path):
    dicom = pydicom.dcmread(file_path)
    image = dicom.pixel_array
    image = cv2.resize(image, (128, 128))  # Resize image to 128x128 for consistency
    image = image / np.max(image)  # Normalize pixel values
    return image

In [6]:
# Normalize condition names and levels
def normalize_condition(condition):
    return condition.replace('_', ' ').title()

def normalize_level(level):
    return level.replace('_', '/').upper()

# Prepare features and labels with debug statements
def prepare_data(train_labels, train_coords, image_dir):
    images = []
    labels = []
    
    for _, row in train_labels.iterrows():
        study_id = row['study_id']
        for col in train_labels.columns[1:]:
            condition_level = col.split('_')
            condition = '_'.join(condition_level[:-2])
            level = '_'.join(condition_level[-2:])
            severity = row[col]
            
            # Skip if severity is NaN
            if pd.isna(severity):
                continue
            
            # Normalize condition and level for matching
            normalized_condition = normalize_condition(condition)
            normalized_level = normalize_level(level)
            
            # Find corresponding coordinates
            coords = train_coords[(train_coords['study_id'] == study_id) &
                                  (train_coords['condition'] == normalized_condition) &
                                  (train_coords['level'] == normalized_level)]
            
            if not coords.empty:
                series_id = coords.iloc[0]['series_id']
                instance_number = coords.iloc[0]['instance_number']
                img_path = f'{image_dir}/{study_id}/{series_id}/{instance_number}.dcm'
                
                if os.path.exists(img_path):
                    image = load_dicom_image(img_path)
                    images.append(image)
                    labels.append(severity)
                else:
                    print(f'Image not found: {img_path}')
            else:
                print(f'No coordinates found for {study_id}, {normalized_condition}, {normalized_level}')
    
    return np.array(images), np.array(labels)

In [7]:
# Example: Preparing data
image_dir = 'spine/train_images'  # Update with correct path
X, y = prepare_data(train_labels, train_coords, image_dir)

# Print the shapes of X and y to verify data loading
print(f'Shape of X: {X.shape}')
print(f'Shape of y: {y.shape}')

No coordinates found for 74782131, Spinal Canal Stenosis, L1/L2
No coordinates found for 267842058, Spinal Canal Stenosis, L1/L2
No coordinates found for 267989673, Spinal Canal Stenosis, L1/L2
No coordinates found for 293713262, Spinal Canal Stenosis, L1/L2
No coordinates found for 293713262, Spinal Canal Stenosis, L2/L3
No coordinates found for 296083289, Spinal Canal Stenosis, L1/L2
No coordinates found for 305152236, Spinal Canal Stenosis, L1/L2
No coordinates found for 344297746, Spinal Canal Stenosis, L1/L2
No coordinates found for 376723024, Spinal Canal Stenosis, L1/L2
No coordinates found for 376723024, Spinal Canal Stenosis, L2/L3
No coordinates found for 390498354, Spinal Canal Stenosis, L1/L2
No coordinates found for 390498354, Spinal Canal Stenosis, L2/L3
No coordinates found for 434488359, Spinal Canal Stenosis, L1/L2
No coordinates found for 434488359, Spinal Canal Stenosis, L2/L3
No coordinates found for 597329259, Spinal Canal Stenosis, L1/L2
No coordinates found for 6

In [8]:
X

array([[[0.02762431, 0.02394107, 0.02394107, ..., 0.        ,
         0.        , 0.        ],
        [0.02578269, 0.02394107, 0.01841621, ..., 0.        ,
         0.        , 0.        ],
        [0.02946593, 0.02025783, 0.01657459, ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.01657459, 0.01657459, 0.02762431, ..., 0.        ,
         0.        , 0.        ],
        [0.01473297, 0.02025783, 0.01289134, ..., 0.        ,
         0.        , 0.        ],
        [0.01289134, 0.01104972, 0.01657459, ..., 0.        ,
         0.        , 0.        ]],

       [[0.02762431, 0.02394107, 0.02394107, ..., 0.        ,
         0.        , 0.        ],
        [0.02578269, 0.02394107, 0.01841621, ..., 0.        ,
         0.        , 0.        ],
        [0.02946593, 0.02025783, 0.01657459, ..., 0.        ,
         0.        , 0.        ],
        ...,
        [0.01657459, 0.01657459, 0.02762431, ..., 0.        ,
         0.        , 0.        ],
        [0.0

In [9]:
y

array(['Normal/Mild', 'Normal/Mild', 'Normal/Mild', ..., 'Normal/Mild',
       'Normal/Mild', 'Normal/Mild'], dtype='<U11')

In [10]:
# Check if the dataset is empty before proceeding
if X.size > 0 and y.size > 0:
    # Encode labels
    severity_mapping = {'Normal/Mild': 0, 'Moderate': 1, 'Severe': 2}
    y_encoded = np.array([severity_mapping[severity] for severity in y])
    y_categorical = to_categorical(y_encoded, num_classes=3)

    # Train-test split
    X_train, X_val, y_train, y_val = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

    # Add a channel dimension to the images
    X_train = np.expand_dims(X_train, axis=-1)
    X_val = np.expand_dims(X_val, axis=-1)

In [11]:
# Define CNN model
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
        
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [16]:
# Create and train the model
input_shape = (128, 128, 1)
model = create_cnn_model(input_shape)
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model
val_loss, val_acc = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {val_acc:.4f}')

MemoryError: Unable to allocate 2.38 GiB for an array with shape (38925, 128, 128, 1) and data type float32

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
# Generate predictions on validation set
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_val, axis=1)

# Print confusion matrix and classification report
print(confusion_matrix(y_true_classes, y_pred_classes))
print(classification_report(y_true_classes, y_pred_classes, 
                            target_names=['Normal/Mild', 'Moderate', 'Severe']))

In [None]:
# Prepare submission
def prepare_submission(test_image_dir, model):
    test_files = []
    test_images = []
    
    for root, dirs, files in os.walk(test_image_dir):
        for file in files:
            if file.endswith(".dcm"):
                file_path = os.path.join(root, file)
                test_files.append(file_path)
                test_images.append(load_dicom_image(file_path))
    
    test_images = np.array(test_images)
    test_images = np.expand_dims(test_images, axis=-1)  # Add channel dimension
    predictions = model.predict(test_images)
    pred_classes = np.argmax(predictions, axis=1)
    
    severity_mapping_rev = {0: 'normal_mild', 1: 'moderate', 2: 'severe'}
    submission = []
    
    for file_path, pred in zip(test_files, pred_classes):
        # Extract study_id and instance_number from file path
        parts = file_path.split('/')
        study_id = parts[-3]
        series_id = parts[-2]
        instance_number = parts[-1].replace('.dcm', '')
        row_id = f'{study_id}_{series_id}_{instance_number}'
        
        submission.append({
            'row_id': row_id,
            'normal_mild': 1 if pred == 0 else 0,
            'moderate': 1 if pred == 1 else 0,
            'severe': 1 if pred == 2 else 0,
        })
    
    submission_df = pd.DataFrame(submission)
    return submission_df

# Update test_image_dir with the actual path
test_image_dir = 'spine/test_images'
submission_df = prepare_submission(test_image_dir, model)

# Save submission file
submission_df.to_csv('submission.csv', index=False)
print(submission_df.head())