In [16]:
import tensorflow as tf

# List all physical devices of type GPU
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    for gpu in gpus:
        gpu_details = tf.config.experimental.get_device_details(gpu)
        gpu_name = gpu_details.get('device_name', 'Unknown GPU')
        print(f"Found a GPU with ID: {gpu}, Name: {gpu_name}")
else:
    print("Failed to detect a GPU.")

Found a GPU with ID: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), Name: NVIDIA GeForce RTX 3060


In [None]:
import os
import math
import numpy as np
import tensorflow as tf
import time
import glob

from keras import backend as K
from keras.layers import Input
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from networks.Sqeeze_Expand_Excite import SEE_Unet
import random

from networks.squeezeunet import SqueezeUNet
from networks.unet3 import unet
from networks.Sqeeze_Expand_Excite import SEE_Unet

# Set seeds for reproducibility
seed = 42
# os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

def set_seeds(seed=seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

# Activate Tensorflow deterministic behavior
def set_global_determinism(seed=seed):
    set_seeds(seed=seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    
    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)

# Call the above function with seed value
set_global_determinism(seed=seed)

# -----------------             ---------------------------------------------------------------
# Set hyperparameters and paths
# --------------------------------------------------------------------------------
img_rows = 256
img_cols = 256
channels = 3
num_classes = 1  # Binary segmentation (cloud vs. non-cloud)
epochs = 100
train_batch_size = 12
val_batch_size = 17

train_dir = 'combined/train'
test_dir = 'combined/test'

def load_images_and_masks(root_dir):
    """
    Loads images and masks from the given directories, 
    resizes them, and normalizes pixel values.
    """
    # Lists to store image and mask file paths
    images = []
    masks = []

    # Loop through files in the train directory
    for filename in sorted(os.listdir(root_dir)):  # Sorting ensures image-mask alignment
        file_path = os.path.join(root_dir, filename)

        if filename.endswith(".jpg"):  # Check if it's an image
            image = file_path
            images.append(image)
        elif filename.endswith("_mask.png"):  # Check if it's a mask
            mask = file_path
            masks.append(mask)

    return images, masks

train_images, train_masks = load_images_and_masks(train_dir)
val_images, val_masks = load_images_and_masks(test_dir)

# Print the counts for verification
print(f"Number of training images: {len(train_images)}")
print(f"Number of training masks: {len(train_masks)}")
print(f"Number of validation images: {len(val_images)}")
print(f"Number of validation masks: {len(val_masks)}")

Number of training images: 318
Number of training masks: 318
Number of validation images: 35
Number of validation masks: 35


In [None]:
train_steps_per_epoch = int(len(train_images) / train_batch_size)
val_steps_per_epoch = int(len(val_images) / val_batch_size)
train_steps_per_epoch, val_steps_per_epoch

In [None]:
import numpy as np
import cv2 as cv

def data_generator(imglist, maplist, batchsize, size=(32, 32)):
    """
    Simplified data generator for binary classification problems without augmentations.
    
    Args:
        imglist (list): List of image file paths.
        maplist (list): List of corresponding mask file paths.
        batchsize (int): Number of images per batch.
        size (tuple): Target size for resizing (height, width).
        
    Yields:
        img_batch (np.array): Batch of preprocessed images.
        mask_batch (np.array): Batch of corresponding masks.
    """
    assert len(imglist) == len(maplist), "Mismatch between image and mask counts!"
    
    h, w = size
    while True:
        # Initialize batch arrays
        img_batch = np.zeros((batchsize, h, w, 3), dtype=np.float32)  # For RGB images
        mask_batch = np.zeros((batchsize, h, w, 1), dtype=np.float32)  # For binary masks
        
        # Randomly sample indices for the batch
        indices = np.random.choice(len(imglist), batchsize, replace=False)
        
        for i, idx in enumerate(indices):
            # Load image and mask
            img = cv.imread(imglist[idx], cv.IMREAD_COLOR)  # Load as RGB
            mask = cv.imread(maplist[idx], cv.IMREAD_GRAYSCALE)  # Load as grayscale
            
            # Resize to target size
            img = cv.resize(img, (w, h), interpolation=cv.INTER_CUBIC)
            mask = cv.resize(mask, (w, h), interpolation=cv.INTER_NEAREST)
            
            # Normalize image and scale mask
            img_batch[i] = img / 255.0  # Normalize to [0, 1]
            mask_batch[i] = np.expand_dims(mask / 255.0, axis=-1)  # Binary masks scaled to [0, 1]
        
        yield img_batch, mask_batch


# Corrected helper function to estimate FLOPS
def estimate_flops(model):
    total_flops = 0
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Conv2D):
            # Accessing the input shape via the input tensor
            input_shape = layer.input.shape
            flops = np.prod(input_shape[1:]) * layer.filters * np.prod(layer.kernel_size)
            total_flops += flops
        elif isinstance(layer, tf.keras.layers.Dense):
            # Accessing the input shape via the input tensor
            input_shape = layer.input.shape
            flops = np.prod(input_shape[1:]) * layer.units
            total_flops += flops
    return total_flops

In [None]:
train_generator = data_generator(
    train_images,
    train_masks,
    train_batch_size,
    size=(img_rows, img_cols)
    
)

validation_generator = data_generator(
    val_images,
    val_masks,
    val_batch_size,
    size=(img_rows, img_cols)
)

In [None]:
# --------------------------------------------------------------------------------
# Build and compile the model
# --------------------------------------------------------------------------------
input_shape = (
    (img_rows, img_cols, 3) 
    if K.image_data_format() == 'channels_last' 
    else (3, img_rows, img_cols)
)
input_tensor = Input(shape=input_shape)

In [None]:
# Select model architecture
model_name = 'unet3'
if model_name == 'sq-unet':
    model = SqueezeUNet(inputs=input_tensor, num_classes=num_classes, dropout=0.5, activation='sigmoid')
elif model_name == 'see_unet':
    model = SEE_Unet(inputs=input_tensor, num_classes=num_classes, dropout=0.5, activation='sigmoid')
elif model_name == 'unet3':
    model = unet(input_tensor)  

model_file = f'{model_name}.keras'

In [None]:
import os
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping, CSVLogger
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K


# Custom callback to log learning rate
class LearningRateLogger(Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Access the learning rate
        lr = float(K.get_value(self.model.optimizer.learning_rate))
        logs['learning_rate'] = lr  # Add learning rate to logs
        print(f"\nEpoch {epoch + 1}: Learning rate is {lr:.6f}")

# Model checkpoint configuration
model_file = f'{model_name}.keras'

# Log directory
log_dir = "logs/{}/".format(model_name)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

# Load checkpoint if found
if os.path.exists(model_file):
    print(f"Loading weights from checkpoint: {model_file}")
    model.load_weights(model_file)
else:
    print("No checkpoint found. Starting training from scratch.")

# Define model checkpoint
cp = ModelCheckpoint(model_file, save_best_only=True, monitor='val_loss', mode='min', verbose=1)

# Early stopping configuration
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=100,
    restore_best_weights=True
)

# Learning rate schedule
# lr_schedule = ExponentialDecay(initial_learning_rate=1e-3, decay_steps=10000, decay_rate=0.9)

# Compile the model
optimizer = Adam(learning_rate=1e-3)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])

# Callbacks
csvlogger = CSVLogger(os.path.join(log_dir, "training.log"), separator=',', append=True)
lr_logger = LearningRateLogger()  # Instantiate the learning rate logger

start = time.time()
# Train the model
model_history = model.fit(
    train_generator,
    steps_per_epoch=train_steps_per_epoch,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=val_steps_per_epoch,
    callbacks=[cp, csvlogger, early_stopping, lr_logger]
)
end = time.time()

In [None]:
print('Training Time (s)', end-start)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Path to the CSVLogger file
log_dir = "logs/{}/".format(model_name)
log_file = os.path.join(log_dir, "training.log")

# Check if the log file exists
if not os.path.exists(log_file):
    raise FileNotFoundError(f"No log file found at {log_file}. Ensure training has been performed and logs are saved.")

# Load the CSVLogger file
log_data = pd.read_csv(log_file)

# Extract the epoch, training loss, and validation loss
epochs = log_data['epoch']
training_loss = log_data['loss']
validation_loss = log_data['val_loss']

# Plot the loss graph
plt.figure()
plt.plot(epochs, training_loss, 'r', label='Training Loss')
plt.plot(epochs, validation_loss, 'bo', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.ylim([0, 1])  # Adjust limits based on your data
plt.legend()
plt.show()


Evaluation

In [None]:
model_file

In [None]:
import os
import time
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from keras.models import load_model
from tqdm import tqdm
import matplotlib.pyplot as plt

model_file = f'{model_name}.keras'
log_dir = "logs"

# Set seeds for reproducibility
seed = 42
# os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

def set_seeds(seed=seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)

# Activate Tensorflow deterministic behavior
def set_global_determinism(seed=seed):
    set_seeds(seed=seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    
    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)

# Call the above function with seed value
set_global_determinism(seed=seed)


# --------------------------------------------------------------------------------
# Constants
# --------------------------------------------------------------------------------
model_file = f'{model_name}.keras'

checkpoint_path = model_file
threshold = 0.5
# model.load_weights(checkpoint_path)
# --------------------------------------------------------------------------------
# Load the best model after training
# --------------------------------------------------------------------------------
print("Loading the best model...")
model = load_model(checkpoint_path, safe_mode=False)

# --------------------------------------------------------------------------------
# Generate predictions and measure latency
# --------------------------------------------------------------------------------
start_time = time.time()
val_predictions = model.predict(validation_generator, steps=val_steps_per_epoch)
end_time = time.time()

inference_time = end_time - start_time
num_val_samples = len(val_images)
average_latency = inference_time / num_val_samples

print(f"Number of validation predictions: {len(val_predictions)}")
print(f"Inference time for all validation samples: {inference_time:.2f} seconds")
print(f"Average latency per image: {average_latency:.4f} seconds")

# --------------------------------------------------------------------------------
# Binarize predictions
# --------------------------------------------------------------------------------
val_predictions_binary = (val_predictions > threshold).astype(np.uint8)
print(f"Number of validation predictions (binary): {len(val_predictions_binary)}")

# --------------------------------------------------------------------------------
# Evaluate model on validation set
# --------------------------------------------------------------------------------
loss, accuracy = model.evaluate(validation_generator, steps=val_steps_per_epoch)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

# --------------------------------------------------------------------------------
# Calculate additional metrics
# --------------------------------------------------------------------------------
precision_scores, recall_scores, f1_scores, accuracy_scores, dice_scores = [], [], [], [], []

# Reset the generator if necessary
validation_generator = data_generator(
    val_images,
    val_masks,
    val_batch_size,
    size=(img_rows, img_cols)
)

output_dir = os.path.join("predictions", model_name)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Process each batch
for i in tqdm(range(val_steps_per_epoch)):
    imgs, masks = next(validation_generator)
    predictions = model.predict(imgs)

    predictions_flat = (predictions > threshold).astype(int).flatten()
    masks_flat = (masks > threshold).astype(int).flatten()

    precision_scores.append(precision_score(masks_flat, predictions_flat, zero_division=0))
    recall_scores.append(recall_score(masks_flat, predictions_flat, zero_division=0))
    f1_scores.append(f1_score(masks_flat, predictions_flat, zero_division=0))
    accuracy_scores.append(accuracy_score(masks_flat, predictions_flat))

    intersection = np.sum(masks_flat * predictions_flat)
    dice_score =  (2. * intersection) / (np.sum(masks_flat) + np.sum(predictions_flat) + 1e-7)
    dice_scores.append(dice_score)

    # Save images
    for j in range(len(imgs)):
        fig, ax = plt.subplots(1, 3, figsize=(12, 4))

        ax[0].imshow(imgs[j], cmap='gray')
        ax[0].set_title("Input Image")
        ax[0].axis("off")

        ax[1].imshow(masks[j].squeeze(), cmap='gray')
        ax[1].set_title("Ground Truth Mask")
        ax[1].axis("off")

        ax[2].imshow(predictions[j].squeeze() > threshold, cmap='gray')
        ax[2].set_title("Predicted Mask")
        ax[2].axis("off")

        # Save the figure
        plt.savefig(os.path.join(output_dir, f"batch_{i}_sample_{j}.png"), bbox_inches='tight')
        plt.close(fig)

# Calculate average metrics
avg_precision = np.mean(precision_scores)
avg_recall = np.mean(recall_scores)
avg_f1 = np.mean(f1_scores)
avg_accuracy = np.mean(accuracy_scores)
error_rate = 1 - avg_accuracy
dice_score = np.mean(dice_scores)

print(f"Average Precision: {avg_precision:.4f}")
print(f"Average Recall: {avg_recall:.4f}")
print(f"Average F1 Score: {avg_f1:.4f}")
print(f"Average Accuracy: {avg_accuracy:.4f}")
print(f"Average Error Rate: {error_rate:.4f}")
print(f"Average Dice Score: {dice_score:.4f}")

# --------------------------------------------------------------------------------
# Measure the model size in MB
# --------------------------------------------------------------------------------
model_size = os.path.getsize(checkpoint_path) / (1024 * 1024)
print(f"Model size: {model_size:.2f} MB")