In [1]:
import os
import tensorflow as tf
from tensorflow import keras
import sys
import random

random.seed(42)

In [2]:
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [3]:
normal_root = '/Users/alex/Desktop/NeuroRex/data/ct_hemorrhage_classifier/Normal'
hemorrhagic_root = '/Users/alex/Desktop/NeuroRex/data/ct_hemorrhage_classifier/Hemorrhagic'

In [4]:
normal_patients = [os.path.join(normal_root, p) for p in os.listdir(normal_root) if os.path.isdir(os.path.join(normal_root, p))]
hemorrhagic_patients = [os.path.join(hemorrhagic_root, p) for p in os.listdir(hemorrhagic_root) if os.path.isdir(os.path.join(hemorrhagic_root, p))]

In [5]:
print(f'Found {len(normal_patients)} normal patients.')
print(f'Found {len(hemorrhagic_patients)} hemorrhagic patients.')

Found 27 normal patients.
Found 18 hemorrhagic patients.


In [6]:
def split_patients(patients, train_ratio = 0.7, val_ratio = 0.15):
    random.shuffle(patients)
    total = len(patients)
    train_end = int(total*train_ratio)
    val_end = train_end + int(total*val_ratio)
    
    train_patients = patients[:train_end]
    val_patients = patients[train_end:val_end]
    test_patients = patients[val_end:]
    
    return train_patients, val_patients, test_patients

normal_train, normal_val, normal_test = split_patients(normal_patients)
hemorrhagic_train, hemorrhagic_val, hemorrhagic_test = split_patients(hemorrhagic_patients)

In [7]:
train_set = [(p, 0) for p in normal_train] + [(p, 1) for p in hemorrhagic_train]
val_set = [(p, 0) for p in normal_val] + [(p, 1) for p in hemorrhagic_val]
test_set = [(p, 0) for p in normal_test] + [(p, 1) for p in hemorrhagic_test]

random.shuffle(train_set)
random.shuffle(val_set)
random.shuffle(test_set)

print(f'Train set: {len(train_set)} patients')
print(f'Val set: {len(val_set)} patients')
print(f'Test set: {len(test_set)} patients')

Train set: 30 patients
Val set: 6 patients
Test set: 9 patients


In [8]:
import json

'''splits = {
    'train':[{'patient_dir':p, 'label':label} for (p, label) in train_set],
    'val':[{'patient_dir':p, 'label':label} for (p, label) in val_set],
    'test':[{'patient_dir':p, 'label':label} for (p, label) in test_set]
}

with open('ct_hemorrhage_splits.json', 'w') as f:
    json.dump(splits, f, indent=4)'''

"splits = {\n    'train':[{'patient_dir':p, 'label':label} for (p, label) in train_set],\n    'val':[{'patient_dir':p, 'label':label} for (p, label) in val_set],\n    'test':[{'patient_dir':p, 'label':label} for (p, label) in test_set]\n}\n\nwith open('ct_hemorrhage_splits.json', 'w') as f:\n    json.dump(splits, f, indent=4)"

In [9]:
def load_patients_slices(patient_dir, label, img_size=(224, 224)):
    slice_paths = []
    for root, dirs, files in os.walk(patient_dir):
        for fname in files:
            if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                slice_paths.append(os.path.join(root, fname))
    
    def _load_slice(path):
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, img_size)
        img = tf.cast(img, tf.float32)/255.0
        return img, label
    
    dataset = tf.data.Dataset.from_tensor_slices(slice_paths)
    dataset = dataset.map(lambda x: _load_slice(x), num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

In [10]:
def create_full_dataset(split, base_path, img_size=(224, 224)):
    datasets = []
    for patient in split:
        patient_dir = os.path.join(base_path, patient['patient_dir'])
        label = patient['label']
        patient_ds = load_patients_slices(patient_dir, label, img_size)
        datasets.append(patient_ds)
        
    full_ds = datasets[0]
    for ds in datasets[1:]:
        full_ds = full_ds.concatenate(ds)
        
    return full_ds.shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

In [11]:
with open('ct_hemorrhage_splits.json', 'r') as f:
    splits = json.load(f)
    
base_path = '/Users/alex/Desktop/NeuroRex/data/ct_hemorrhage_classifier'

train_ds = create_full_dataset(splits['train'], base_path)
val_ds = create_full_dataset(splits['val'], base_path)
test_ds = create_full_dataset(splits['test'], base_path)

2025-04-27 13:09:54.290272: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2025-04-27 13:09:54.290293: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-04-27 13:09:54.290299: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-04-27 13:09:54.290520: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-04-27 13:09:54.290754: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [12]:
print(f'Train data: {len(train_ds)} slices')
print(f'Validation data: {len(val_ds)} slices')
print(f'Test data {len(test_ds)} slices')

Train data: 145 slices
Validation data: 33 slices
Test data 36 slices


In [13]:
def ct_hemorrhage_classifier(input_shape=(224, 224, 3), num_classes=2):
    base_model = tf.keras.applications.EfficientNetB0(input_shape=input_shape,
                                                   include_top=False,
                                                   weights='imagenet')
    
    base_model.trainable = False
    
    model = tf.keras.models.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

model = ct_hemorrhage_classifier()
model.build((None, 224, 224, 3))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb0 (Functional  (None, 7, 7, 1280)        4049571   
 )                                                               
                                                                 
 global_average_pooling2d (  (None, 1280)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 128)               163968    
                                                                 
 dense_1 (Dense)             (None, 2)                 258       
                                                                 
Total params: 4213797 (16.07 MB)
Trainable params: 16422

In [14]:
for _, label in train_ds.take(1):
    print(label)

tf.Tensor([0 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1], shape=(32,), dtype=int32)


In [15]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

train_labels = []
for _, batch_labels in train_ds:
    batch_labels = batch_labels.numpy()
    for label in batch_labels:
        train_labels.append(label)

train_labels = np.array(train_labels).flatten()
class_names = [0, 1]
unique_classes = np.unique(train_labels)
class_weight = compute_class_weight(class_weight='balanced', classes=unique_classes, y=train_labels)

class_weight_dict = {i:w for i,w in zip(unique_classes, class_weight)}

In [16]:
print(class_weight_dict)

{0: 0.8275615851481614, 1: 1.2632152588555858}


In [17]:
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(1e-3),
              loss = tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics = ['accuracy'])

In [18]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_accuracy',
    patience=10,
    mode='max',
    restore_best_weights=True
)

In [19]:
lr_decay = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=3
)

In [20]:
history = model.fit(train_ds, 
                    validation_data = val_ds, 
                    epochs=100, 
                    callbacks=[early_stop, lr_decay],
                    class_weight=class_weight_dict)

Epoch 1/100


2025-04-27 13:10:05.805497: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
 14/145 [=>............................] - ETA: 43s - loss: 0.6905 - accuracy: 0.4286

KeyboardInterrupt: 

In [None]:
model.evaluate(test_ds)



[1.02595055103302, 0.5996472835540771]