In [1]:
import random
from typing import Iterable

import keras
from sklearn.model_selection import train_test_split
from keras import layers
from tensorflow.python.feature_column.feature_column import InputLayer

from models import create_model
from models.layers import RandomSwitch
from utils.data import from_dir, over_sampling, random_resample, from_zip

data_path = 'data/chest_xray/train'
training_selector = lambda s: s.startswith('chest_xray/train/') and s.endswith('.jpeg')
resize = (96, 96)
(x, y) = from_dir(data_path, resize=resize, resample=random_resample)
X_train, X_val, Y_train, Y_val = train_test_split(x, y, test_size=0.2, random_state=42)

2025-02-23 18:20:56.270170: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-23 18:20:56.270475: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-23 18:20:56.272052: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-23 18:20:56.276190: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740331256.282831   67014 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740331256.28

In [2]:
model = create_model('resnet-18', resize)

2025-02-23 18:20:59.195710: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [3]:
import tensorflow as tf
import math
import numpy as np


def random_crop(w, h, l):
    rw = int(w * math.sqrt(1-l))
    rh = int(h * math.sqrt(1-l))
    rx = int(np.random.uniform(0, w))
    ry = int(np.random.uniform(0, h))
    return rx, ry, rw, rh


def bbox(w, h, l):
    rx, ry, rw, rh = random_crop(w, h, l)
    x1 = np.clip(rx - rw // 2, 0, w)
    x2 = np.clip(rx + rw // 2, 0, w)
    x2 -= x1
    if x2 == 0:
        x2 = 1
    y1 = np.clip(ry - rh // 2, 0, h)
    y2 = np.clip(ry + rh // 2, 0, h)
    y2 -= y1
    if y2 == 0:
        y2 = 1
    return x1, y1, x2, y2


def remove_patch(img, x1, y1, w1, h1):
    m = np.ones(img.shape)
    m[x1:w1, y1:h1] = 0
    return img * m


def read_patch(img, x1, y1, w1, h1):
    m = np.zeros(img.shape)
    m[x1:w1, y1:h1] = 1
    return img * m


def cutmix(sample_a, sample_b):
    img_a, y_a = sample_a
    img_b, y_b = sample_b
    (w, h, _) = img_a.shape
    lambd_ = np.random.uniform(0,1)
    x1, y1, w1, h1 = bbox(w, h, lambd_)
    img_c = remove_patch(img_a, x1, y1, w1, h1)
    patch_b = read_patch(img_b, x1, y1, w1, h1)
    y_a = tf.cast(y_a, tf.float32)
    y_b = tf.cast(y_b, tf.float32)
    l = tf.cast(1 - float(w1 * h1) / (w * h), dtype=tf.float32)
    y = l * y_a + (1 - l) * y_b
    return img_c + patch_b, y

In [4]:
import numpy as np

def preprocessing(x, y):
    shf = list(range(len(x)))
    np.random.shuffle(shf)
    x_mix = []
    y_mix = []
    for i in range(len(x)):
        a = (x[i], y[i])
        b = (x[shf[i]], y[shf[i]])
        xc, yc = cutmix(a, b)
        x_mix.append(xc)
        y_mix.append(yc)
    return np.asarray(x_mix), np.asarray(y_mix)



In [5]:
x_batch, y_batch = X_train[:64], Y_train[:64]
x_batch, y_batch = preprocessing(x_batch, y_batch)
print(x_batch.shape, y_batch.shape)

(64, 96, 96, 1) (64, 1)


In [6]:
from keras import callbacks, losses, metrics, optimizers
from models import loop


loop.training_loop(
    X_train, Y_train, X_val, Y_val,
    model,
    1, 64,
    [metrics.BinaryCrossentropy(name='loss'), metrics.BinaryCrossentropy(name='val_loss')],
    optimizers.Adam(),
    [callbacks.EarlyStopping(patience=3, monitor='val_loss')],
    losses.BinaryCrossentropy(), preprocessing)

Start of epoch 0
Training loss (for one batch) at step 0: 0.5364
Seen so far: 64 samples


2025-02-23 18:21:17.291943: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


loss: 0.5495,val_loss: 1.3629


In [39]:
from keras import metrics
from typing import Iterable

class AggregateMetrics:
    def __init__(self, l: Iterable[metrics.Metric]):
        self.metrics = sorted(l, key=lambda metric: metric.name)

    def __str__(self):
        def to_str(metric: metrics.Metric):
            return f'{metric.name()}: {metric.result():.4f}'

        return ','.join(map(to_str, self.metrics))

    def which(self, metric: metrics.Metric, w: str) -> bool:
        if w == '*':
            return True
        elif w == 'train' and not metric.name.startswith('val'):
            return True
        elif w == 'val' and metric.name.startswith('val'):
            return True
        return False

    def update(self, y_batch, y_pred, which: str='train'):
        for metric in self.metrics:
            if self.which(metric, which):
                metric.update_state(y_batch, y_pred)

    def reset(self, which: str='train'):
        for metric in self.metrics:
            if self.which(metric, which):
                metric.reset_state()


@tf.function
def train_step(x, y, model, loss_fn, optimizer,  aggr: AggregateMetrics):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        aggr.update(y, logits)


@tf.function
def val_step(x, y, model, aggr: AggregateMetrics):
    val_logits = model(x, training=False)
    aggr.update(y, val_logits, which='val')


def training_loop(
        X_train, Y_train, X_val, Y_val,
        model,
        epochs,
        batch_size,
        metrics: AggregateMetrics,
        optimizer,
        callbacks,
        loss_fn,
        verbose: bool=True,
):
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
    val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size)
    for epoch in range(epochs):
        if verbose:
            print(f'Start of epoch {epoch}')
        for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
            x_batch_train, y_batch_train = preprocessing(x_batch_train, y_batch_train)
            train_step(x_batch_train, y_batch_train, model, loss_fn, optimizer, metrics)
            # Log every 200 batches.
            if verbose and step % 200 == 0:
                print(f'Training loss (for one batch) at step {step}: {float(loss_value):.4f}')
                print(f'Seen so far: {((step + 1) * batch_size)} samples')
        # Run a validation loop at the end of each epoch.
        for x_batch_val, y_batch_val in val_dataset:
            val_step(x_batch_val, y_batch_val, model, metrics)
        print(str(metrics))
        metrics.reset(which='*')

In [40]:
from keras import optimizers, losses

aggr = AggregateMetrics([metrics.BinaryAccuracy(name='train_accuracy'),metrics.BinaryAccuracy(name='train_accuracy')])
training_loop(X_train, Y_train, X_val, Y_val, model, 1, 64, aggr, optimizers.Adam(), None, losses.BinaryCrossentropy())

Start of epoch 0
Training loss (for one batch) at step 0: 0.3369
Seen so far: 64 samples


TypeError: 'str' object is not callable

In [29]:
from keras import losses, metrics, optimizers
import tensorflow as tf

def get_metrics():
    return {
        'train_accuracy': metrics.BinaryAccuracy(name='train_accuracy'),
        'val_accuracy': metrics.BinaryAccuracy(name='train_accuracy'),,
        'train_precision': metrics.Precision(name='train_precision'),
        'val_precision': metrics.Precision(name='val_precision'),
        'train_recall': metrics.Recall(name='train_precision'),
        'val_recall': metrics.Recall(name='val_recall')
    }


def display_metrics(m):
    for k in sorted(m.keys()):
        print('%s: %.4f' % (k, m[k].result()), sep=' ')


def reset_metrics(m):
    for v in m.values():
        v.reset_state()


def update_metrics(m, y_batch, logits, category: str='train'):
    for k, v in m.items():
        if k.startswith(category):
            v.update_state(y_batch, logits)

epochs = 1
batch_size = 64

loss_fn = losses.BinaryCrossentropy()
optimizer = optimizers.Adam()
m = get_metrics()
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        x_batch_train, y_batch_train = preprocessing(x_batch_train, y_batch_train)
        with tf.GradientTape() as tape:
            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train, training=True)  # Logits for this minibatch
            print(logits[:10])
            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        update_metrics(m, y_batch_train, logits, 'train')

        reset_metrics(m)
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * batch_size))

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        update_metrics(m, y_batch_val, val_logits, 'val')

    display_metrics(m)
    reset_metrics(m)


Start of epoch 0
tf.Tensor(
[[0.9829008 ]
 [0.54234654]
 [0.9670155 ]
 [0.21392596]
 [0.94785976]
 [0.9893938 ]
 [0.9811459 ]
 [0.91917557]
 [0.7873801 ]
 [0.9530228 ]], shape=(10, 1), dtype=float32)
Training loss (for one batch) at step 0: 0.3618
Seen so far: 64 samples
tf.Tensor(
[[0.9728605 ]
 [0.67820805]
 [0.15616277]
 [0.68743193]
 [0.88999325]
 [0.77818334]
 [0.92738235]
 [0.8303805 ]
 [0.10357093]
 [0.9508287 ]], shape=(10, 1), dtype=float32)
tf.Tensor(
[[0.8298379 ]
 [0.9518264 ]
 [0.86792547]
 [0.9739022 ]
 [0.26927617]
 [0.41255066]
 [0.9620909 ]
 [0.9717414 ]
 [0.04008002]
 [0.9642554 ]], shape=(10, 1), dtype=float32)
tf.Tensor(
[[0.9400545 ]
 [0.96379   ]
 [0.904133  ]
 [0.94975203]
 [0.9453976 ]
 [0.9631613 ]
 [0.26777136]
 [0.8956694 ]
 [0.87130547]
 [0.860685  ]], shape=(10, 1), dtype=float32)
tf.Tensor(
[[0.95758015]
 [0.9773675 ]
 [0.96190584]
 [0.06291236]
 [0.0838152 ]
 [0.9393035 ]
 [0.84961015]
 [0.3941529 ]
 [0.8731751 ]
 [0.8406267 ]], shape=(10, 1), dtype=floa

KeyboardInterrupt: 

In [None]:
from