In [1]:
# Mounting Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# import libraries to build the network
!pip install tensorflow



In [3]:
import os
import numpy as np
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.utils import plot_model


In [5]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# for processing the data before feeding it into the model

def data_generator(input_folder, ground_truth_folder, batch_size, input_size=(320, 640)):
    input_files = sorted(os.listdir(input_folder))
    ground_truth_files = sorted(os.listdir(ground_truth_folder))

    while True:
        for i in range(0, len(input_files), batch_size):
            batch_input_files = input_files[i:i + batch_size]
            batch_ground_truth_files = ground_truth_files[i:i + batch_size]

            batch_inputs = []
            batch_ground_truths = []

            for input_file, gt_file in zip(batch_input_files, batch_ground_truth_files):
                input_image = load_img(os.path.join(input_folder, input_file), color_mode="grayscale", target_size=input_size)
                input_image = img_to_array(input_image) / 255.0
                batch_inputs.append(input_image)

                ground_truth_image = load_img(os.path.join(ground_truth_folder, gt_file), color_mode="grayscale", target_size=input_size)
                ground_truth_image = img_to_array(ground_truth_image) / 255.0
                batch_ground_truths.append(ground_truth_image)

            yield np.array(batch_inputs), np.array(batch_ground_truths)

In [None]:
# network implmentation

def attention_block(x, g, inter_channel):
    """
    Attention block for the U-Net.

    Args:
        x: Input tensor from the encoder.
        g: Input tensor from the decoder.
        inter_channel: Number of intermediate channels.
    Returns:
        Output tensor after applying attention.
    """
    theta_x = layers.Conv2D(inter_channel, (1, 1), padding="same")(x)
    phi_g = layers.Conv2D(inter_channel, (1, 1), padding="same")(g)
    f = layers.Activation('relu')(layers.Add()([theta_x, phi_g]))
    psi = layers.Conv2D(1, (1, 1), padding="same")(f)
    psi = layers.Activation('sigmoid')(psi)
    return layers.Multiply()([x, psi])

def attention_unet(input_shape=(320, 640, 1)):
    inputs = layers.Input(input_shape)

    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)

    u1 = layers.UpSampling2D((2, 2))(c4)
    att1 = attention_block(c3, u1, 128)
    u1 = layers.Concatenate()([u1, att1])
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c5)

    u2 = layers.UpSampling2D((2, 2))(c5)
    att2 = attention_block(c2, u2, 64)
    u2 = layers.Concatenate()([u2, att2])
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c6)

    u3 = layers.UpSampling2D((2, 2))(c6)
    att3 = attention_block(c1, u3, 32)
    u3 = layers.Concatenate()([u3, att3])
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u3)
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c7)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)

    return Model(inputs, outputs)

attention_model = attention_unet()


In [None]:
attention_model.compile(optimizer=Adam(learning_rate=1e-4), loss="mean_squared_error", metrics=["accuracy"])

In [None]:
# Paths
train_input_folder = os.path.join("/content/drive/MyDrive/Training", "input")
train_gt_folder = os.path.join("/content/drive/MyDrive/Training", "ground_truth")
val_input_folder = os.path.join("/content/drive/MyDrive/Validation", "input")
val_gt_folder = os.path.join("/content/drive/MyDrive/Validation", "ground_truth")

batch_size = 16
epochs = 1

train_gen = data_generator(train_input_folder, train_gt_folder, batch_size)
val_gen = data_generator(val_input_folder, val_gt_folder, batch_size)

train_steps = len(os.listdir(train_input_folder)) // batch_size
val_steps = len(os.listdir(val_input_folder)) // batch_size


In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]


In [None]:
#checking if the GPU is available
import tensorflow as tf
print("GPUs available:", len(tf.config.list_physical_devices('GPU')))

GPUs available: 1


In [None]:
!nvidia-smi

Sun Jan  5 01:01:04 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0              48W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

[1m435/435[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2675s[0m 6s/step - accuracy: 0.0032 - loss: 0.6020 - val_accuracy: 0.0096 - val_loss: 0.5685


---------------------------------------

**ABOVE CODE WAS FOR T4/ BELOW IS L4**

---------------------------------------

In [None]:
# Paths
train_input_folder = os.path.join("/content/drive/MyDrive/Training", "input")
train_gt_folder = os.path.join("/content/drive/MyDrive/Training", "ground_truth")
val_input_folder = os.path.join("/content/drive/MyDrive/Validation", "input")
val_gt_folder = os.path.join("/content/drive/MyDrive/Validation", "ground_truth")

batch_size = 24
epochs = 1

train_gen = data_generator(train_input_folder, train_gt_folder, batch_size)
val_gen = data_generator(val_input_folder, val_gt_folder, batch_size)

train_steps = len(os.listdir(train_input_folder)) // batch_size
val_steps = len(os.listdir(val_input_folder)) // batch_size

In [None]:
print(train_steps)
print(val_steps)

290
68


In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=52,  # Save every 5 epochs
        filepath="best_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1063s[0m 3s/step - accuracy: 0.0047 - loss: 0.6164 - val_accuracy: 0.0097 - val_loss: 0.5750


In [None]:
# got this error when using batch size 32,64

In [None]:
---------------------------------------------------------------------------
ResourceExhaustedError                    Traceback (most recent call last)
<ipython-input-12-ae02d2b40640> in <cell line: 1>()
----> 1 history = attention_model.fit(
      2     train_gen,
      3     steps_per_epoch=train_steps,
      4     validation_data=val_gen,
      5     validation_steps=val_steps,

1 frames
/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     51   try:
     52     ctx.ensure_initialized()
---> 53     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     54                                         inputs, attrs, num_outputs)
     55   except core._NotOkStatusException as e:

ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-12-ae02d2b40640>", line 1, in <cell line: 1>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

Out of memory while trying to allocate 32681838224 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_one_step_on_iterator_8924]


--------------------------------

**Trying A100 GPU**

In [None]:
# Set mixed precision policy (if you are using mixed precision)
import tensorflow as tf
from tensorflow.keras.models import load_model

from tensorflow.keras.mixed_precision import Policy, set_global_policy


model_path = "/content/drive/MyDrive/best_attention_model.keras"  # Update with your saved model path
attention_model = load_model(model_path, compile=True)

attention_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [None]:
# Paths
train_input_folder = os.path.join("/content/drive/MyDrive/Training", "input")
train_gt_folder = os.path.join("/content/drive/MyDrive/Training", "ground_truth")
val_input_folder = os.path.join("/content/drive/MyDrive/Validation", "input")
val_gt_folder = os.path.join("/content/drive/MyDrive/Validation", "ground_truth")

batch_size = 32
epochs = 1

train_gen = data_generator(train_input_folder, train_gt_folder, batch_size)
val_gen = data_generator(val_input_folder, val_gt_folder, batch_size)

train_steps = len(os.listdir(train_input_folder)) // batch_size
val_steps = len(os.listdir(val_input_folder)) // batch_size

In [None]:
print(train_steps)
print(val_steps)

217
51


In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs
)

[1m 28/217[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m44:31[0m 14s/step - accuracy: 4.5853e-04 - loss: 0.5662

KeyboardInterrupt: 

---------------------------------------------------------------------------

**FINAL TRAINING RUN**

---------------------------------------------------------------------------

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model

In [None]:
# Load the saved model
model_path = "/content/drive/MyDrive/best_attention_model.keras"  # Update with your saved model path
attention_model = load_model(model_path, compile=True)

In [None]:
# Confirming the previous loss and optimizer configuration was loaded
print("Optimizer:", attention_model.optimizer.get_config())
print("Loss function:", attention_model.loss)

Optimizer: {'name': 'adam', 'learning_rate': 9.999999747378752e-05, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
Loss function: binary_crossentropy


In [None]:
########################## FINAL DATA LOADER #############################
train_input_folder = os.path.join("/content/drive/MyDrive/Training", "input")
train_gt_folder = os.path.join("/content/drive/MyDrive/Training", "ground_truth")
val_input_folder = os.path.join("/content/drive/MyDrive/Validation", "input")
val_gt_folder = os.path.join("/content/drive/MyDrive/Validation", "ground_truth")

batch_size = 24

train_gen = data_generator(train_input_folder, train_gt_folder, batch_size)
val_gen = data_generator(val_input_folder, val_gt_folder, batch_size)

train_steps = len(os.listdir(train_input_folder)) // batch_size
val_steps = len(os.listdir(val_input_folder)) // batch_size

In [None]:
print(train_steps)
print(val_steps)

290
68


In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=5,  # Save every 5 epochs
        filepath="best_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/52
[1m  5/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:58[0m 837ms/step - accuracy: 4.3703e-04 - loss: 0.5605

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1532s[0m 5s/step - accuracy: 0.0047 - loss: 0.5609 - val_accuracy: 0.0097 - val_loss: 0.5712
Epoch 2/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 984ms/step - accuracy: 0.0047 - loss: 0.5570 - val_accuracy: 0.0097 - val_loss: 0.5608
Epoch 3/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 903ms/step - accuracy: 0.0046 - loss: 0.5532 - val_accuracy: 0.0097 - val_loss: 0.5589
Epoch 4/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 898ms/step - accuracy: 0.0046 - loss: 0.5511 - val_accuracy: 0.0097 - val_loss: 0.5574
Epoch 5/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 897ms/step - accuracy: 0.0046 - loss: 0.5496 - val_accuracy: 0.0097 - val_loss: 0.5561
Epoch 6/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 897ms/step - accuracy: 0.0045 - loss: 0.5489 - val_accuracy: 0.0097 - val_loss: 0.5560
Epoch 7/52
[1m29

In [None]:
!nvidia-smi

Sun Jan  5 13:54:55 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |
| N/A   70C    P0              32W /  72W |  21207MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

------------------------------------------------------------------------------

**Training Run After Changes**

-----------------------------------------------------------------------------

In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=5,  # Save every 5 epochs
        filepath="/content/drive/MyDrive/best_new_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)
callbacks.append(reduce_lr)

In [None]:
# tweaking the attention block and changing model architecture

def attention_block(x, g, inter_channel):
    theta_x = layers.Conv2D(inter_channel, (1, 1), padding="same")(x)
    phi_g = layers.Conv2D(inter_channel, (1, 1), padding="same")(g)
    f = layers.Activation('relu')(layers.Add()([theta_x, phi_g]))
    f = layers.Conv2D(inter_channel, (3, 3), padding="same", activation="relu")(f)  # Extra layer
    psi = layers.Conv2D(1, (1, 1), padding="same")(f)
    psi = layers.Activation('sigmoid')(psi)
    return layers.Multiply()([x, psi])


def attention_unet(input_shape=(320, 640, 1)):
    inputs = layers.Input(input_shape)

    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)  # New layer

    u1 = layers.UpSampling2D((2, 2))(c4)
    att1 = attention_block(c3, u1, 128)
    u1 = layers.Concatenate()([u1, att1])
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c5)

    u2 = layers.UpSampling2D((2, 2))(c5)
    att2 = attention_block(c2, u2, 64)
    u2 = layers.Concatenate()([u2, att2])
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c6)

    u3 = layers.UpSampling2D((2, 2))(c6)
    att3 = attention_block(c1, u3, 32)
    u3 = layers.Concatenate()([u3, att3])
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u3)
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c7)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)

    return Model(inputs, outputs)

attention_model = attention_unet()


In [None]:
attention_model.compile(optimizer=Adam(learning_rate=1e-4), loss="mean_squared_error", metrics=["accuracy"])

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/52
[1m  5/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:44[0m 998ms/step - accuracy: 4.9237e-04 - loss: 0.0766

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m328s[0m 1s/step - accuracy: 0.0041 - loss: 0.0405 - val_accuracy: 0.0097 - val_loss: 0.0178 - learning_rate: 1.0000e-04
Epoch 2/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0014 - loss: 0.0174 - val_accuracy: 0.0097 - val_loss: 0.0176 - learning_rate: 1.0000e-04
Epoch 3/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0037 - loss: 0.0155 - val_accuracy: 0.0097 - val_loss: 0.0173 - learning_rate: 1.0000e-04
Epoch 4/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0040 - loss: 0.0140 - val_accuracy: 0.0097 - val_loss: 0.0170 - learning_rate: 1.0000e-04
Epoch 5/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0039 - loss: 0.0131 - val_accuracy: 0.0097 - val_loss: 0.0162 - learning_rate: 1.0000e-04
Epoch 6/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

KeyboardInterrupt: 

In [None]:
attention_model.save('/content/drive/MyDrive/model_with_mse.keras')

In [None]:
#changing the loss function to ssim_loss and continuing training

In [None]:
attention_model = load_model("/content/drive/MyDrive/model_with_mse.keras")

In [None]:
import tensorflow as tf

def ssim_loss(y_true, y_pred):
    return 1 - tf.reduce_mean(tf.image.ssim(y_true, y_pred, max_val=1.0))

In [None]:
attention_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=ssim_loss,
    metrics=[tf.image.psnr]  # Add PSNR for monitoring quality
)

In [None]:
history = attention_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/52
[1m  5/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:49[0m 1s/step - accuracy: 3.7150e-04 - loss: 0.0075

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 1s/step - accuracy: 0.0048 - loss: 0.0100 - val_accuracy: 0.0097 - val_loss: 0.0123 - learning_rate: 1.0000e-04
Epoch 2/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0047 - loss: 0.0101 - val_accuracy: 0.0097 - val_loss: 0.0126 - learning_rate: 1.0000e-04
Epoch 3/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0047 - loss: 0.0101 - val_accuracy: 0.0097 - val_loss: 0.0124 - learning_rate: 1.0000e-04
Epoch 4/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0047 - loss: 0.0100
Epoch 4: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.0047 - loss: 0.0100 - val_accuracy: 0.0097 - val_loss: 0.0123 - learning_rate: 1.0000e-04
Epoch 5/52
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

KeyboardInterrupt: 

In [None]:
attention_model.save('/content/drive/MyDrive/model_with_ssim.keras')

-----------------------------------------------------------------------------

**TRAINED FOR (14.2+13.2) EPOCHS, TRAINING FOR MORE**

----------------------------------------------------------------------------

In [None]:
from tensorflow.keras.models import load_model

In [None]:
    # Encoder
def attention_block(x, g, inter_channel):
    theta_x = layers.Conv2D(inter_channel, (1, 1), padding="same")(x)
    phi_g = layers.Conv2D(inter_channel, (1, 1), padding="same")(g)
    f = layers.Activation('relu')(layers.Add()([theta_x, phi_g]))
    f = layers.Conv2D(inter_channel, (3, 3), padding="same", activation="relu")(f)  # Extra layer
    psi = layers.Conv2D(1, (1, 1), padding="same")(f)
    psi = layers.Activation('sigmoid')(psi)
    return layers.Multiply()([x, psi])


def attention_unet(input_shape=(320, 640, 1)):
    inputs = layers.Input(input_shape)

    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)
    c4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c4)  # New layer

    u1 = layers.UpSampling2D((2, 2))(c4)
    att1 = attention_block(c3, u1, 128)
    u1 = layers.Concatenate()([u1, att1])
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    c5 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c5)

    u2 = layers.UpSampling2D((2, 2))(c5)
    att2 = attention_block(c2, u2, 64)
    u2 = layers.Concatenate()([u2, att2])
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
    c6 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c6)

    u3 = layers.UpSampling2D((2, 2))(c6)
    att3 = attention_block(c1, u3, 32)
    u3 = layers.Concatenate()([u3, att3])
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u3)
    c7 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c7)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c7)

    return Model(inputs, outputs)

In [None]:
extra_20_epoch_model = attention_unet()

Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op FloorMod in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomGetKeyCounter in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomUniformV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AddV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:loca

In [None]:
extra_20_epoch_model = load_model("/content/drive/MyDrive/model_with_mse.keras")

Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op FloorMod in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomGetKeyCounter in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomUniformV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AddV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:loca

In [None]:
import tensorflow as tf

def ssim_loss(y_true, y_pred):
    return 1 - tf.reduce_mean(tf.image.ssim(y_true, y_pred, max_val=1.0))

In [None]:
attention_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=ssim_loss,
    metrics=[tf.image.psnr]  # Add PSNR for monitoring quality
)

In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=3,  # Save every 5 epochs
        filepath="/content/drive/MyDrive/best_new_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]


from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)
callbacks.append(reduce_lr)

In [None]:
epochs = 20

In [None]:
# STOPPED THIS TRAINING RUN CUZ MAYBE NOT COMPILING THE MODEL MADE THE EPOCHS GO
history = extra_20_epoch_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)


Epoch 1/20
[1m  5/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:23:30[0m 30s/step - accuracy: 4.4194e-04 - loss: 0.0084

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m 27/290[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:39:53[0m 36s/step - accuracy: 4.5009e-04 - loss: 0.0086

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.debugging.set_log_device_placement(False)

Num GPUs Available:  1


In [None]:
extra_20_epoch_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=ssim_loss,
    metrics=[tf.image.psnr]  # Add PSNR for monitoring quality
)

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0


In [None]:
history = extra_20_epoch_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Epoch 1/20
Executing op AnonymousIteratorV3 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
E

TypeError: Missing required positional argument

In [None]:
new_extra_20_epoch_model = load_model("/content/drive/MyDrive/model_with_mse.keras")

Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op FloorMod in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomGetKeyCounter in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomUniformV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AddV2 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:loca

In [None]:
history = new_extra_20_epoch_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Epoch 1/20
Executing op AnonymousIteratorV3 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Pack in device /job:localhost/replica:0/task:0/device:

In [None]:
# Suppress TensorFlow logging
import os
import tensorflow as tf

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress info and warning messages
tf.get_logger().setLevel('ERROR')         # Suppress TensorFlow execution logs


In [None]:
final_model = load_model("/content/drive/MyDrive/model_with_ssim.keras")

In [None]:
epochs = 25

In [None]:
history = final_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/25
[1m  3/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:14:25[0m 28s/step - accuracy: 4.5341e-04 - loss: 0.0078

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 1s/step - accuracy: 0.0047 - loss: 0.0091 - val_accuracy: 0.0097 - val_loss: 0.0118 - learning_rate: 1.2500e-05
Epoch 2/25
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 1s/step - accuracy: 0.0047 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0117 - learning_rate: 1.2500e-05
Epoch 3/25
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0046 - loss: 0.0095 - val_accuracy: 0.0097 - val_loss: 0.0117 - learning_rate: 1.2500e-05
Epoch 4/25
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0046 - loss: 0.0094
Epoch 4: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0046 - loss: 0.0095 - val_accuracy: 0.0097 - val_loss: 0.0117 - learning_rate: 1.2500e-05
Epoch 5/25
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

KeyboardInterrupt: 

In [None]:
final_model.save('/content/drive/MyDrive/final_model.keras')

In [None]:
final_model = load_model("/content/drive/MyDrive/final_model.keras")

In [None]:
epochs = 20

In [None]:
history = final_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/20
[1m  1/290[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m42:57[0m 9s/step - accuracy: 4.5797e-04 - loss: 0.0091

  self._save_model(epoch=self._current_epoch, batch=batch, logs=logs)


[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 1s/step - accuracy: 0.0052 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0110 - learning_rate: 6.2500e-06
Epoch 2/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0051 - loss: 0.0093 - val_accuracy: 0.0097 - val_loss: 0.0108 - learning_rate: 6.2500e-06
Epoch 3/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0051 - loss: 0.0093 - val_accuracy: 0.0097 - val_loss: 0.0108 - learning_rate: 6.2500e-06
Epoch 4/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0050 - loss: 0.0093 - val_accuracy: 0.0097 - val_loss: 0.0108 - learning_rate: 6.2500e-06
Epoch 5/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0050 - loss: 0.0093
Epoch 5: ReduceLROnPlateau reducing learning rate to 3.12499992105586e-06.
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [None]:
final_model.save('/content/drive/MyDrive/final_model_1.keras')

In [None]:
history = final_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 1s/step - accuracy: 0.0049 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0113 - learning_rate: 1.5625e-06
Epoch 2/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0049 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0113 - learning_rate: 1.5625e-06
Epoch 3/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0048 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0113 - learning_rate: 1.5625e-06
Epoch 4/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0048 - loss: 0.0093
Epoch 4: ReduceLROnPlateau reducing learning rate to 1e-06.
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0048 - loss: 0.0093 - val_accuracy: 0.0097 - val_loss: 0.0113 - learning_rate: 1.5625e-06
Epoch 5/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [None]:
final_model.save('/content/drive/MyDrive/final_model_2.keras')

In [None]:
history = final_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 1s/step - accuracy: 0.0048 - loss: 0.0095 - val_accuracy: 0.0097 - val_loss: 0.0112 - learning_rate: 1.0000e-06
Epoch 2/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0047 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0112 - learning_rate: 1.0000e-06
Epoch 3/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 1s/step - accuracy: 0.0047 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0112 - learning_rate: 1.0000e-06
Epoch 4/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0047 - loss: 0.0094 - val_accuracy: 0.0097 - val_loss: 0.0111 - learning_rate: 1.0000e-06
Epoch 5/20
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 1s/step - accuracy: 0.0046 - loss: 0.0095 - val_accuracy: 0.0097 - val_loss: 0.0112 - learning_rate: 1.0000e-06


In [None]:
final_model.save('/content/drive/MyDrive/final_model_3.keras')

In [None]:
callbacks = [
    ModelCheckpoint("best_attention_model.keras", save_best_only=True, monitor="val_loss", mode="min"),
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]

class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=3,  # Save every 5 epochs
        filepath="/content/drive/MyDrive/best_new_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]


from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    min_lr=1e-6,
    verbose=1
)
callbacks.append(reduce_lr)

In [None]:
history = final_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

In [None]:
#

In [None]:
# try re-compiling and starting if loss and validation loss isnt increasing.

In [None]:
recompiled_model = load_model("/content/drive/MyDrive/model_with_ssim.keras")

In [None]:
# didnt use the one below ---> for some reason the one below was throwing error during
recompiled_model.compile(optimizer=Adam(learning_rate=1e-5), loss=ssim_loss)




In [None]:
history = recompiled_model.fit(
    train_gen,
    steps_per_epoch=train_steps,
    validation_data=val_gen,
    validation_steps=val_steps,
    epochs=epochs,
    callbacks=callbacks
)

Epoch 1/30
[1m 56/290[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m4:00[0m 1s/step - loss: 0.4845

KeyboardInterrupt: 

----------------------------------------------------------------------------

----------------------------------------------------------------------------

----------------------------------------------------------------------------

----------------------------------------------------------------------------





**CODE BELOW IS OPTIMISED FOR GPU L4/A100 (AINT WORKING THO)**

------------------------------------------------------------------

In [None]:
class SaveEveryNEpochs(ModelCheckpoint):
    def __init__(self, save_freq, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.save_freq = save_freq

    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.save_freq == 0:
            super().on_epoch_end(epoch, logs)

callbacks = [
    SaveEveryNEpochs(
        save_freq=5,  # Save every 5 epochs
        filepath="best_attention_model.keras",
        save_best_only=True,
        monitor="val_loss",
        mode="min"
    ),
    EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    )
]

In [None]:
# Enable mixed precision
from tensorflow.keras.mixed_precision import set_global_policy, Policy

policy = Policy('mixed_float16')  # Use mixed precision policy
set_global_policy(policy)

print("Mixed Precision Policy:", policy)

Mixed Precision Policy: <DTypePolicy "mixed_float16">


In [None]:
# Paths
train_input_paths = os.path.join("/content/drive/MyDrive/Training", "input")
train_target_paths = os.path.join("/content/drive/MyDrive/Training", "ground_truth")
val_input_paths = os.path.join("/content/drive/MyDrive/Validation", "input")
val_target_paths = os.path.join("/content/drive/MyDrive/Validation", "ground_truth")

train_input_folder = os.path.join("/content/drive/MyDrive/Training", "input")
val_input_folder = os.path.join("/content/drive/MyDrive/Validation", "input")
BATCH_SIZE = 24
epochs = 6


train_steps = len(os.listdir(train_input_folder)) // BATCH_SIZE
val_steps = len(os.listdir(val_input_folder)) // BATCH_SIZE


In [None]:
print(train_steps)
print(val_steps)

290
68


In [None]:
# Create Dataset
import tensorflow as tf

def create_dataset(input_paths, target_paths, batch_size):
    def load_data(input_path, target_path):
        input_img = tf.image.decode_png(tf.io.read_file(input_path), channels=1)
        target_img = tf.image.decode_png(tf.io.read_file(target_path), channels=1)
        input_img = tf.image.resize(input_img, (320, 640)) / 255.0
        target_img = tf.image.resize(target_img, (320, 640)) / 255.0
        return input_img, target_img

    dataset = tf.data.Dataset.from_tensor_slices((input_paths, target_paths))
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.shuffle(len(input_paths))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(train_input_paths, train_target_paths, batch_size=BATCH_SIZE)
val_dataset = create_dataset(val_input_paths, val_target_paths, batch_size=BATCH_SIZE)


ValueError: Unbatching a tensor is only supported for rank >= 1

In [None]:
# Example file paths
import tensorflow as tf
import os

train_input_paths = ["/content/drive/MyDrive/Training/input/MRIT_450_slice_013.png", "/content/drive/MyDrive/Training/input/MRIT_450_slice_012.png"]  # Replace with actual file paths
train_target_paths = ["/content/drive/MyDrive/Training/ground_truth/MRIT_450_slice_013.png", "/content/drive/MyDrive/Training/ground_truth/MRIT_450_slice_012.png"]  # Replace with actual file paths
BATCH_SIZE = 16

def create_dataset(input_paths, target_paths, batch_size):
    def load_data(input_path, target_path):
        input_img = tf.image.decode_png(tf.io.read_file(input_path), channels=1)
        target_img = tf.image.decode_png(tf.io.read_file(target_path), channels=1)
        input_img = tf.image.resize(input_img, (320, 640)) / 255.0
        target_img = tf.image.resize(target_img, (320, 640)) / 255.0
        return input_img, target_img

    assert len(input_paths) == len(target_paths), "Input and target paths must have the same length"

    dataset = tf.data.Dataset.from_tensor_slices((input_paths, target_paths))
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.shuffle(len(input_paths))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

print("Train Input Paths:", train_input_paths[:5])
print("Train Target Paths:", train_target_paths[:5])

train_dataset = create_dataset(train_input_paths, train_target_paths, batch_size=BATCH_SIZE)


Train Input Paths: ['/content/drive/MyDrive/Training/input/MRIT_450_slice_013.png', '/content/drive/MyDrive/Training/input/MRIT_450_slice_012.png']
Train Target Paths: ['/content/drive/MyDrive/Training/ground_truth/MRIT_450_slice_013.png', '/content/drive/MyDrive/Training/ground_truth/MRIT_450_slice_012.png']


In [None]:
# Example file paths
import tensorflow as tf
import os

BATCH_SIZE = 64
train_input_paths = sorted([f"/content/drive/MyDrive/Training/input/{f}" for f in os.listdir("/content/drive/MyDrive/Training/input")])
train_target_paths = sorted([f"/content/drive/MyDrive/Training/ground_truth/{f}" for f in os.listdir("/content/drive/MyDrive/Training/ground_truth")])
val_input_paths = sorted([f"/content/drive/MyDrive/Validation/input/{f}" for f in os.listdir("/content/drive/MyDrive/Validation/input")])
val_target_paths = sorted([f"/content/drive/MyDrive/Validation/ground_truth/{f}" for f in os.listdir("/content/drive/MyDrive/Validation/ground_truth")])

print(f"Number of input paths: {len(train_input_paths)}")
print(f"Number of target paths: {len(train_target_paths)}")
assert len(train_input_paths) == len(train_target_paths), "Input and target path lists are not of the same length!"

for path in train_input_paths + train_target_paths:
    if not tf.io.gfile.exists(path):
        print(f"File not found: {path}")

def create_dataset(input_paths, target_paths, batch_size):
    def load_data(input_path, target_path):
        input_img = tf.image.decode_png(tf.io.read_file(input_path), channels=1)
        target_img = tf.image.decode_png(tf.io.read_file(target_path), channels=1)
        input_img = tf.image.resize(input_img, (320, 640)) / 255.0
        target_img = tf.image.resize(target_img, (320, 640)) / 255.0
        return input_img, target_img

    dataset = tf.data.Dataset.from_tensor_slices((input_paths, target_paths))
    dataset = dataset.map(load_data, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.shuffle(len(input_paths))
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

train_dataset = create_dataset(train_input_paths, train_target_paths, batch_size=BATCH_SIZE)
val_dataset = create_dataset(train_input_paths, train_target_paths, batch_size=BATCH_SIZE)


Number of input paths: 6964
Number of target paths: 6964


In [None]:
print(f"Number of input paths: {len(val_input_paths)}")
print(f"Number of target paths: {len(val_target_paths)}")

Number of input paths: 1652
Number of target paths: 1652


In [None]:
#for input_img, target_img in train_dataset.take(1):  # Fetch one batch

In [None]:
# Train using Dataset
history = attention_model.fit(
    train_dataset,
    epochs=6,
    validation_data=val_dataset,
    callbacks=callbacks
)

Epoch 1/6


ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-25-ddc237ec005c>", line 2, in <cell line: 2>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 320, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

Out of memory while trying to allocate 32681838224 bytes.
	 [[{{node StatefulPartitionedCall}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_one_step_on_iterator_10640]

In [None]:
## to later reload the model

from tensorflow.keras.models import load_model

saved_model = load_model("best_attention_model.keras")

inputs = saved_model.input
outputs = saved_model.layers[-2].output

new_outputs = layers.Conv2D(1, (1, 1), activation='sigmoid', dtype='float32')(outputs)

new_model = Model(inputs=inputs, outputs=new_outputs)

new_model.save("best_attention_model_updated.keras")

ValueError: File not found: filepath=best_attention_model.keras. Please ensure the file is an accessible `.keras` zip file.

In [None]:
# Create the Attention U-Net model
from tensorflow.keras.utils import plot_model

attention_model = attention_unet(input_shape=(512, 512, 1))

attention_model.summary()

plot_model(attention_model, to_file="attention_unet.png", show_shapes=True, dpi=100)


In [None]:
# specify input to the layer

In [None]:
# break it down into batchsize

In [None]:
# reduced model for later

In [None]:
    # Encoder
def attention_block(x, g, inter_channel):
    """
    Attention block for the U-Net.

    Args:
        x: Input tensor from the encoder.
        g: Input tensor from the decoder.
        inter_channel: Number of intermediate channels.
    Returns:
        Output tensor after applying attention.
    """
    theta_x = layers.Conv2D(inter_channel, (1, 1), padding="same")(x)
    phi_g = layers.Conv2D(inter_channel, (1, 1), padding="same")(g)
    f = layers.Activation('relu')(layers.Add()([theta_x, phi_g]))
    psi = layers.Conv2D(1, (1, 1), padding="same")(f)
    psi = layers.Activation('sigmoid')(psi)
    return layers.Multiply()([x, psi])


def attention_unet_reduced(input_shape=(320, 640, 1)):
    """
    Reduced Attention U-Net architecture with 2 encoder blocks, 1 bottleneck, and 2 decoder blocks.

    Args:
        input_shape: Shape of the input images (height, width, channels).
    Returns:
        Compiled Keras model.
    """
    inputs = layers.Input(input_shape)

    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c3)

    u1 = layers.UpSampling2D((2, 2))(c3)
    att1 = attention_block(c2, u1, 64)
    u1 = layers.Concatenate()([u1, att1])
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u1)
    c4 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c4)

    u2 = layers.UpSampling2D((2, 2))(c4)
    att2 = attention_block(c1, u2, 32)
    u2 = layers.Concatenate()([u2, att2])
    c5 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u2)
    c5 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c5)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c5)

    return Model(inputs, outputs)


attention_model_reduced = attention_unet_reduced()
attention_model_reduced.summary()


In [6]:
pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-4.22.0-py3-none-any.whl.metadata (3.2 kB)
Collecting tensorflow-decision-forests>=1.5.0 (from tensorflowjs)
  Downloading tensorflow_decision_forests-1.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Collecting packaging~=23.1 (from tensorflowjs)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting tensorflow<3,>=2.13.0 (from tensorflowjs)
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting wurlitzer (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading wurlitzer-3.1.1-py3-none-any.whl.metadata (2.5 kB)
Collecting tf-keras>=2.13.0 (from tensorflowjs)
  Downloading tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting ydf>=0.11.0 (from tensorflow-decision-forests>=1.5.0->tensorflowjs)
  Downloading ydf-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Collec