In [1]:
from keras import layers

from models import create_model
from models.layers import RandomSwitch
from utils import plot_metrics
from utils.data import from_dir, over_sampling, random_resample, from_zip

data_path = 'data/chest_xray/train'

2025-02-13 01:02:47.921509: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-13 01:02:47.921820: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-13 01:02:47.923473: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-13 01:02:47.927778: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739404967.935167  196489 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739404967.93

In [2]:
resize = (299, 299)

In [3]:
training_selector = lambda s: s.startswith('chest_xray/train/') and s.endswith('.jpeg')
(x, y) = from_dir(data_path, resize=resize, resample=random_resample)

In [4]:
NEG_IDX = 0
POS_IDX = 1
TOTAL   = len(y)
N       = len(list(filter(lambda j: j[NEG_IDX] > 0, y)))
P       = len(list(filter(lambda j: j[POS_IDX] > 0, y)))
print(f'#Positives = {P} ({P/TOTAL*100:.2f}%)')
print(f'#Negatives = {N} ({N/TOTAL*100:.2f}%)')

#Positives = 3883 (74.22%)
#Negatives = 1349 (25.78%)


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(x, y, test_size=0.2, random_state=42)
total_train = len(Y_train)
N_train     = len(list(filter(lambda j: j[NEG_IDX] > 0, Y_train)))
P_train     = len(list(filter(lambda j: j[POS_IDX] > 0, Y_train)))
print(f'#Positives (train) = {P_train} ({P_train/total_train*100:.2f}%)')
print(f'#Negatives (train) = {N_train} ({N_train/total_train*100:.2f}%)')
total_val   = len(Y_val)
N_val       = len(list(filter(lambda j: j[NEG_IDX] > 0, Y_val)))
P_val       = len(list(filter(lambda j: j[POS_IDX] > 0, Y_val)))
print(f'#Positives (val)   = {P_val} ({P_val/total_val*100:.2f}%)')
print(f'#Negatives (val)   = {N_val} ({N_val/total_val*100:.2f}%)')

#Positives (train) = 3095 (73.95%)
#Negatives (train) = 1090 (26.05%)
#Positives (val)   = 788 (75.26%)
#Negatives (val)   = 259 (24.74%)


In [6]:
xs_extras, ys_extras = over_sampling(X_train, Y_train)
total   = len(ys_extras)
N_extras       = len(list(filter(lambda j: j[NEG_IDX] > 0, ys_extras)))
P_extras       = len(list(filter(lambda j: j[POS_IDX] > 0, ys_extras)))
print(f'#Positives (val)   = {P_extras} ({P_extras/total*100:.2f}%)')
print(f'#Negatives (val)   = {N_extras} ({N_extras/total*100:.2f}%)')

#Positives (val)   = 3095 (50.00%)
#Negatives (val)   = 3095 (50.00%)


In [7]:
tr = layers.RandomTranslation(0.2, 0.2, data_format='channels_last')
rr = layers.RandomRotation(0.2, data_format='channels_last')
rb = layers.RandomBrightness(0.2, data_format='channels_last')
rc = layers.RandomContrast(0.2, data_format='channels_last')
rz = layers.RandomZoom(0.2, 0.2, data_format='channels_last')
p1 = layers.Pipeline([rr, rz])
p2 = layers.Pipeline([tr, rr])
p3 = layers.Pipeline([rr, rc])
data_augmentation = RandomSwitch([tr, rr, rb, rc, rz, p1, p2, p3], 0.7)

2025-02-13 01:02:51.861590: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [8]:
v1 = create_model('inception-v3', resize=resize, data_augmentation=data_augmentation)

In [9]:
from keras import losses, metrics, optimizers

METRICS = [
    metrics.BinaryCrossentropy(name='cross entropy'),  # same as model's loss
    # metrics.MeanSquaredError(name='Brier score'),
    # metrics.TruePositives(name='tp'),
    # metrics.FalsePositives(name='fp'),
    # metrics.TrueNegatives(name='tn'),
    # metrics.FalseNegatives(name='fn'),
    # metrics.BinaryAccuracy(name='accuracy'),
    metrics.Precision(name='precision'),
    metrics.Recall(name='recall'),
    metrics.AUC(name='auc'),
    metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

v1.compile(optimizer=optimizers.Adam(), loss=losses.BinaryCrossentropy(), metrics=METRICS)
v1.summary()

In [10]:
from keras import callbacks

EPOCHS = 100
BATCH_SIZE = 64
early_stopping = callbacks.EarlyStopping(
    verbose=1,
    patience=10,
    restore_best_weights=True)

In [11]:
history = v1.fit(
    xs_extras,
    ys_extras,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[early_stopping],
    validation_data=(X_val, Y_val),
    validation_freq=1)

Epoch 1/100


KeyboardInterrupt: 

In [None]:
plot_metrics(history, metrics=['loss', 'precision', 'recall', 'auc'])