In [1]:
%%capture
!pip install --upgrade keras==2.15.0
!pip install -U git+https://github.com/UN-GCPDS/python-gcpds.image_segmentation.git

In [None]:
# General Libraries
import os
import sys
import cv2
import yaml
import h5py
import shutil
import random
import inspect
import imageio
import zipfile
import warnings
import argparse
import numpy as np
import pandas as pd
from PIL import Image
import albumentations as albu
from enum import auto, Enum
import matplotlib.pyplot as plt

# Tensorflow Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.losses import Loss
from tensorflow.keras.metrics import Metric
from tensorflow.python.framework.ops import EagerTensor
from tensorflow.keras import Model, layers, regularizers

# GCPDS Libraries
from gcpds.image_segmentation.datasets.segmentation import OxfordIiitPet

In [None]:
# Download OxfordPet dataset

dataset = OxfordIiitPet()
train_dataset, val_dataset, test_dataset = dataset()

## Original masks

In [4]:
BATCH_SIZE = 5
TARGET_SHAPE = 512, 512

def fussion_mask(mask: EagerTensor) -> EagerTensor:
    """Fuses the object and border masks into a single mask.

    This function takes a mask tensor containing separate channels for the object, 
    background, and border, and fuses the object and border channels into a single 
    mask channel.

    Parameters:
        mask (EagerTensor): A tensor representing the segmentation mask with 
            object, background, and border channels.

    Returns:
        EagerTensor: A tensor representing the fused mask containing the sum 
            of the object and border channels.

    """
    obj, bg, border = tf.unstack(mask, axis=2)
    orig_shape = mask.shape
    new_shape = list(orig_shape)
    new_shape[-1] = 1
    return tf.reshape(tf.stack([obj + border]), new_shape)


def map_dataset(dataset, target_shape, batch_size):
    """Preprocesses and batches a dataset for training or evaluation.

    This function applies a series of transformations to each sample in the dataset 
    to prepare it for training or evaluation. It resizes images and masks to the 
    specified target shape, fuses mask channels if needed, and batches the dataset.

    Args:
        dataset (tf.data.Dataset): The input dataset containing images, masks, labels, and IDs.
        target_shape (tuple): A tuple specifying the desired shape of images and masks.
        batch_size (int): The batch size to use for training or evaluation.

    Returns:
        A preprocessed and batched dataset ready for training or evaluation.

    """
    # Resize images and masks to the target shape
    dataset_ = dataset.map(lambda img, mask, label, id_img: (img, mask),
                           num_parallel_calls=tf.data.AUTOTUNE)
    dataset_ = dataset_.map(lambda img, mask: (tf.image.resize(img, target_shape), 
                                                tf.image.resize(mask, target_shape)),
                            num_parallel_calls=tf.data.AUTOTUNE)
    
    # Fuse mask channels if needed
    dataset_ = dataset_.map(lambda img, mask: (img, fussion_mask(mask)),
                            num_parallel_calls=tf.data.AUTOTUNE)

    # Batch the dataset
    dataset_ = dataset_.batch(batch_size)
    return dataset_

original_train = map_dataset(train_dataset, TARGET_SHAPE, BATCH_SIZE)
original_val = map_dataset(val_dataset, TARGET_SHAPE, BATCH_SIZE)
original_test = map_dataset(test_dataset, TARGET_SHAPE, BATCH_SIZE)

In [None]:
for img,mask in original_train.take(1):
    print(mask.shape)
    fig, axes = plt.subplots(1, 2 , figsize=(10,5))
    axes[0].imshow(img[0])
    axes[0].set_title('Image')
    axes[0].axis('off')
    axes[1].imshow(mask[0][:,:,0])
    axes[1].set_title('Original mask')
    axes[1].axis('off')

## Synthetics masks

In [None]:
# Download trained Unet network for OxfordPet segmentation task from Drive

model_url = "https://drive.google.com/file/d/1x39L3QNDMye1SJhKh1gf4YS-HRFLTs6G/view?usp=drive_link"
model_uri = model_url.split("/")[5]
!gdown $model_uri

model_extension = "keras"
paths = []

for file in os.listdir("."):
  if file.endswith(model_extension):
    paths.append(file)

model_path = paths[0]
print(f"Loading {model_path}...")
model_ann  = tf.keras.models.load_model(model_path, compile = False)

In [None]:
# Find last encoder convolution layer

def find_last_encoder_conv_layer(model):
    '''
    Finds the index of the last convolutional layer in the encoder part of the model.

    Parameters:
    model (keras.Model): The Keras model to search for the last encoder convolutional layer.

    Returns:
    int: Index of the last convolutional layer in the encoder part of the model.
    '''

    last_conv_encoder_layer = 0
    for i,layer in enumerate(model.layers):
        if (isinstance(layer, keras.layers.Conv2D)):
          last_conv_encoder_layer = i
        if (isinstance(layer, keras.layers.UpSampling2D)):
          break
    return last_conv_encoder_layer

last_conv_encoder_layer = find_last_encoder_conv_layer(model_ann)
last_conv_encoder_layer

In [8]:
# Compute and add noise to the target layer

def compute_snr(signal: float, noise_std: float) -> float:
    """Compute the Signal-to-Noise Ratio (SNR) in decibels.

    The Signal-to-Noise Ratio (SNR) measures the ratio of the power of a signal to the
    power of background noise. Higher SNR values indicate a stronger signal relative to
    the noise.

    Parameters:
        signal (float): The signal power.
        noise_std (float): The standard deviation of the background noise.

    Returns:
        float: The Signal-to-Noise Ratio (SNR) in decibels.

    """
    return 10 * np.log10(np.mean(signal ** 2) / noise_std ** 2)

class SnrType(Enum):
    """Enumeration representing different types of Signal-to-Noise Ratio (SNR) scales.

    This enumeration defines two types of SNR scales: 'log' and 'linear'. These types
    indicate whether the SNR values are represented in logarithmic or linear scale.

    Attributes:
        log (int): Represents the logarithmic scale for SNR values.
        linear (int): Represents the linear scale for SNR values.

    """
    log = 0
    linear = 1

def add_noise_to_layer_weights(model, layer, noise_snr, snr_type: SnrType = SnrType.log, verbose=0):
    """Adds noise to the weights of a specified layer in the model.

    This function adds noise to the weights of a specified layer in the model,
    simulating a certain signal-to-noise ratio (SNR) either in linear or logarithmic scale.

    Parameters:
        model (tf.keras.Model): The model to modify.
        layer (int): Index of the layer whose weights will be modified.
        noise_snr (float): Desired signal-to-noise ratio (SNR) for the added noise.
        snr_type (SnrType): Type of SNR scale to use, either 'log' (logarithmic) or 'linear'.
            Defaults to SnrType.log.
        verbose (int): Verbosity mode. If greater than 0, prints information about the noise
            and signal powers. Defaults to 0.

    Returns:
        float: The computed signal-to-noise ratio (SNR) after adding noise to the layer weights.

    """
    layer_weights = model.layers[layer].get_weights()

    sig_power = np.mean(layer_weights[0] ** 2)

    if snr_type == SnrType.log:
        noise_power = sig_power / (10 ** (noise_snr / 10))
    elif snr_type == SnrType.linear:
        noise_power = sig_power / noise_snr

    noise_std = noise_power ** (1 / 2)

    snr = compute_snr(layer_weights[0], noise_std)

    if verbose > 0:
        print(f"Adding noise for SNR: {noise_snr}\n\n")
        print(f"Signal power: {sig_power}")
        print(f"Noise power: {noise_power}\n\n")

    for i in range(layer_weights[0].shape[0]):
        for j in range(layer_weights[0].shape[1]):
            layer_weights[0][i][j] += np.random.randn(128, 128) * noise_std

    model.layers[last_conv_encoder_layer].set_weights(layer_weights)
    return snr

In [9]:
# Define the signal-to-noise ratio values for each synthetic annotator
values_to_test = [20,0,-15]

# Creation of the different models and their perturbations starting from the base model
def produce_disturbed_models(values_to_test, base_model_path):
    """Produces a list of disturbed models by adding noise to layer weights.

    This function loads a base model from the specified path and creates disturbed
    versions of it by adding noise to the weights of a specified layer. The noise
    level is controlled by the values provided in the `values_to_test` list.

    Parameters:
        values_to_test (list): A list of values representing the noise levels to test.
        base_model_path (str): The file path to the base model to load.

    Returns:
        Tuple containing two lists:
            - List of disturbed models, each with noise added to layer weights.
            - List of Signal-to-Noise Ratio (SNR) values corresponding to each disturbed model.

    """
    snr_values = []
    models = []

    for value in values_to_test:
        model_ = tf.keras.models.load_model(base_model_path, compile=False)
        snr = add_noise_to_layer_weights(model_, last_conv_encoder_layer, value)
        snr_values.append(snr)
        models.append(model_)

    return models, snr_values


disturbance_models, snr_values = produce_disturbed_models(values_to_test, model_path)

In [10]:
# Disturbance processing with different SNR ratios values for each database partition using the modified networks

BATCH_SIZE = 5
TARGET_SHAPE = (512, 512)
ORIGINAL_MODEL_SHAPE = 256, 256
NUM_ANNOTATORS = 3

def disturb_mask(model, image, model_shape, target_shape):
    """Disturbs a segmentation mask using a neural network model.

    This function takes an input image and passes it through the given neural network model
    to generate a disturbed segmentation mask. The input image is resized to fit the model's
    input shape, and the output mask is resized to match the target shape.

    Parameters:
        model (tf.keras.Model): A neural network model used to disturb the segmentation mask.
        image (tf.Tensor): Input image tensor.
        model_shape (tuple): Shape of the input expected by the model.
        target_shape (tuple): Target shape for the disturbed segmentation mask.

    Returns:
        A disturbed segmentation mask tensor.

    """
    return tf.image.resize(model(tf.image.resize(image, model_shape)), target_shape)


def mix_channels(mask, num_annotators):
    """Mixes the channels of a segmentation mask.

    This function creates a new tensor by mixing the channels of the input segmentation mask.
    It is commonly used in scenarios where binary segmentation masks are represented with
    multiple channels, each indicating the annotation of a different annotator.

    Parameters:
        mask (tensor): Input segmentation mask tensor with shape (batch_size, height, width, channels).
        num_annotators (int): Number of annotators whose annotations are included in the mask.

    Returns:
        A tensor representing the mixed channels segmentation mask with shape
        (batch_size, height, width, num_annotators).

    """
    return tf.stack([mask, 1 - mask], axis=-2)


def add_noisy_annotators(img: EagerTensor, models, model_shape, target_shape) -> EagerTensor:
    """Adds noise from multiple annotators to an input image.

    This function applies noise to an input image from multiple annotator models,
    creating a set of noisy annotations. It iterates through each model in the
    provided list of models, applying noise to the input image based on the
    characteristics of each model.

    Parameters:
        img (EagerTensor): The input image to which noise will be added.
        models (list): A list of annotator models used to generate noise.
        model_shape: The shape of the model's output.
        target_shape: The target shape of the output annotations.

    Returns:
        EagerTensor: A tensor representing the noisy annotations generated by
        applying noise from multiple annotators to the input image.

    """
    return tf.transpose([disturb_mask(model, img, model_shape=model_shape, target_shape=target_shape) for model in models], [2, 3, 1, 4, 0])


def map_dataset_MA(dataset, target_shape, model_shape, batch_size, num_annotators):
    """Preprocesses a dataset for multi-annotator segmentation tasks.

    This function performs a series of mapping operations on the input dataset
    to prepare it for training or evaluation in a multi-annotator segmentation
    scenario. It resizes images and masks, adds noisy annotations, reshapes masks,
    mixes channels, and batches the data.

    Parameters:
        dataset (tf.data.Dataset): Input dataset containing images, masks, labels, and image IDs.
        target_shape (tuple): Desired shape for the images and masks after resizing.
        model_shape (tuple): Shape required by the segmentation model.
        batch_size (int): Size of the batches to create.
        num_annotators (int): Number of annotators providing annotations for each image.

    Returns:
        A preprocessed dataset ready for training or evaluation.

    """
    dataset_ = dataset.map(lambda img, mask, label, id_img: (img, mask),
                           num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.map(lambda img, mask: (tf.image.resize(img, target_shape),
                                                tf.image.resize(mask, target_shape)),
                             num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.map(lambda img, mask: (img, add_noisy_annotators(tf.expand_dims(img, 0),
                                                                         disturbance_models,
                                                                         model_shape=model_shape,
                                                                         target_shape=target_shape)),
                             num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.map(lambda img, mask: (img, tf.reshape(mask, (mask.shape[0], mask.shape[1], 1, mask.shape[-1]))),
                             num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.map(lambda img, mask: (img, mix_channels(mask, num_annotators)),
                             num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.map(lambda img, mask: (img, tf.squeeze(mask, axis=2)),
                             num_parallel_calls=tf.data.AUTOTUNE)

    dataset_ = dataset_.batch(batch_size)
    return dataset_



synthetic_train = map_dataset_MA(
    train_dataset,
    target_shape=TARGET_SHAPE,
    model_shape=ORIGINAL_MODEL_SHAPE,
    batch_size=BATCH_SIZE,
    num_annotators=NUM_ANNOTATORS)

In [None]:
# Plotting the different perturbations to a sample and the resulting dimensions

for img,mask in synthetic_train.take(1):
  print(f"Mask shape: {mask.shape} (batch_size * h * w * k * r) Img shape {img.shape}")
  fig, axes = plt.subplots(2,NUM_ANNOTATORS)
  fig.set_size_inches(16,7)
  for i in range(NUM_ANNOTATORS):
    axes[0][i].imshow((mask)[0,:,:,0,i])
    axes[0][i].set_title(f"Mask for annotator {i}")
    axes[0][i].axis('off')
    axes[1][i].imshow((mask)[0,:,:,-1,i])
    axes[1][i].axis('off')

### Loading of the different parts of the dataset

In [12]:
def save_image_in_folder(tensor_images, tensor_masks, folder_path, index):
    """
    Saves images and their corresponding masks to specified folders.

    This function takes tensors of images and masks, and saves them to four
    different folders. Each folder corresponds to the original images, expert's
    annotations, annotator 1's annotations, and annotator 2's annotations.
    The masks are binarized before saving.

    Parameters:
    tensor_images (tf.Tensor): A tensor containing the images to be saved.
    tensor_masks (tf.Tensor): A tensor containing the masks to be saved. The masks
                              are expected to have three channels corresponding to
                              the expert's annotations, annotator 1's annotations,
                              and annotator 2's annotations.
    folder_path (list): A list of four strings, each representing the path to the
                        folder where images and masks will be saved. The folders
                        will be created if they do not exist.
    index (int): An integer used to determine the starting index for the filenames
                 of the saved images and masks.

    Returns:
    None
    """
    type_save = len(folder_path)
    # Create folder is not exist
    if not os.path.exists(folder_path[0]):
        match type_save:
            case 4:
                os.makedirs(folder_path[0])
                os.makedirs(folder_path[1])
                os.makedirs(folder_path[2])
                os.makedirs(folder_path[3])
            case _:
                os.makedirs(folder_path[0])
                os.makedirs(folder_path[1])            
                
    index_2 = 0

    tensor_masks = tf.where(tensor_masks > 0.5, tf.ones_like(tensor_masks), tf.zeros_like(tensor_masks))

    for i in range(5*index,5*(index+1),1):
        
        # Sample index
        sample_name = f'sample_{i}.png'

        # Save image
        image_array = tensor_images[index_2].numpy()
        image_path = os.path.join(folder_path[0], sample_name)
        plt.imsave(image_path, image_array, format='png')

        # Save expert's annotation
        mask_0_array = tensor_masks[index_2,:,:,0].numpy()
        image_path = os.path.join(folder_path[1], sample_name)
        imageio.imwrite(image_path, mask_0_array.astype(np.uint8))

        match type_save:
            case 4:
                # Save annotator 1's annotation
                mask_1_array = tensor_masks[index_2,:,:,1].numpy()
                image_path = os.path.join(folder_path[2], sample_name)
                imageio.imwrite(image_path, mask_1_array.astype(np.uint8))

                # Save annotator 2's annotation
                mask_2_array = tensor_masks[index_2,:,:,2].numpy()
                image_path = os.path.join(folder_path[3], sample_name)
                imageio.imwrite(image_path, mask_2_array.astype(np.uint8))
            case _:
                index_2 += 1
                continue

        index_2 += 1

In [13]:
def save_chunk_to_hdf5(filename, dataset_name, chunk_data, start_index, end_index):
    """
    Save a chunk of data to an existing HDF5 file.

    This function saves a chunk of data to an existing HDF5 file, given the filename,
    the name of the dataset in the file, the chunk data, and the start and end indices
    of the chunk in the dataset. The function uses the h5py library to open the file
    in append mode and write the chunk data to the specified dataset.

    Parameters:
        filename (str): The name of the HDF5 file to save the chunk to.
        dataset_name (str): The name of the dataset in the HDF5 file to save the chunk to.
        chunk_data (numpy.ndarray): The chunk of data to save to the HDF5 file.
        start_index (int): The start index of the chunk in the dataset.
        end_index (int): The end index of the chunk in the dataset.

    Returns:
        None
    """
    
    with h5py.File(filename, 'a') as f:
        dataset = f[dataset_name]
        dataset[start_index:end_index, :, :, :] = chunk_data

In [None]:
# Loading of the training part of the database in a tensor manner

folder_path = ['/kaggle/working/patches/Train', '/kaggle/working/masks/Train/annotator_1', '/kaggle/working/masks/Train/annotator_2', '/kaggle/working/masks/Train/annotator_3']

# Batch index
i = 0

for img, mask in synthetic_train.take(205):

    # Reshape masks and save these and the images in files
    mask_1 = tf.reshape(mask,[BATCH_SIZE, TARGET_SHAPE[0], TARGET_SHAPE[1], NUM_ANNOTATORS*2])
    save_image_in_folder(img, mask_1, folder_path, i)
    i+= 1

print('Section for training with annotations of dataset saved successfully')

In [None]:
# Define array dimensions
total_samples = 1025 # Total number of samples
channels = [3,1] # Number of channels
height = 512 # Height of each image
width = 512 # Width of each image
chunk_size = 5 # Number of samples in each chunk/block

i = 0

# Create the HDF5 file and define the dataset

with h5py.File('images_train.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[0]), dtype='float32')
    
with h5py.File('masks_train.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[1]), dtype='float32')

for img, mask in original_train.take(205):
    
    # Save batch images
    chunk_data = img.numpy()
    save_chunk_to_hdf5('images_train.h5', 'dataset', chunk_data, i, i+5)
    
    # Save batch masks
    chunk_data = (tf.where(mask > 0.5, tf.ones_like(mask), tf.zeros_like(mask))).numpy()
    save_chunk_to_hdf5('masks_train.h5', 'dataset', chunk_data, i, i+5)
    
    i += 5
    
print('Section for training of dataset saved successfully')

In [None]:
# Loading of the validation part of the database in a tensor manner

folder_path = ['/kaggle/working/patches/Val', '/kaggle/working/masks/Val']

# Define array dimensions
total_samples = 255 # Total number of samples
channels = [3,1] # Number of channels
height = 512 # Height of each image
width = 512 # Width of each image
chunk_size = 5 # Number of samples in each chunk/block

i = 0
i_2 = 0

# Create the HDF5 file and define the dataset

with h5py.File('images_val.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[0]), dtype='float32')
    
with h5py.File('masks_val.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[1]), dtype='float32')

for img, mask in original_val.take(51):
    
    # Save masks and images in files
    save_image_in_folder(img, mask, folder_path, i)
    i+= 1
    
    # Save batch images
    chunk_data = img.numpy()
    save_chunk_to_hdf5('images_val.h5', 'dataset', chunk_data, i_2, i_2+5)
    
    # Save batch masks
    chunk_data = (tf.where(mask > 0.5, tf.ones_like(mask), tf.zeros_like(mask))).numpy()
    save_chunk_to_hdf5('masks_val.h5', 'dataset', chunk_data, i_2, i_2+5)
    
    i_2 += 5

print('Section for validation of dataset saved successfully')

In [None]:
# Loading of the testing part of the database in a tensor manner

folder_path = ['/kaggle/working/patches/Test', '/kaggle/working/masks/Test']

# Define array dimensions
total_samples = 255 # Total number of samples
channels = [3,1] # Number of channels
height = 512 # Height of each image
width = 512 # Width of each image
chunk_size = 5 # Number of samples in each chunk/block

i = 0
i_2 = 0

# Create the HDF5 file and define the dataset

with h5py.File('images_test.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[0]), dtype='float32')
    
with h5py.File('masks_test.h5', 'w') as f:
    f.create_dataset('dataset', shape=(total_samples, height, width, channels[1]), dtype='float32')

for img, mask in original_test.take(51):
    
    # Save masks and images in files
    save_image_in_folder(img, mask, folder_path, i)
    i+= 1
    
    # Save batch images
    chunk_data = img.numpy()
    save_chunk_to_hdf5('images_test.h5', 'dataset', chunk_data, i_2, i_2+5)
    
    # Save batch masks
    chunk_data = (tf.where(mask > 0.5, tf.ones_like(mask), tf.zeros_like(mask))).numpy()
    save_chunk_to_hdf5('masks_test.h5', 'dataset', chunk_data, i_2, i_2+5)
    
    i_2 += 5    

print('Section for testing of dataset saved successfully')

In [None]:
# Specify the path to the PNG file
image_path = '/kaggle/working/masks/Train/annotator_1/sample_0.png'

# Load the image from the PNG file
image = Image.open(image_path)
image_array = np.array(image)

plt.imshow(image_array)
plt.show()

print(image_array.min(),image_array.max())

In [None]:
# Specify the path to the PNG file
image_path = '/kaggle/working/patches/Train/sample_0.png'

# Load the image from the PNG file
image = Image.open(image_path)
image_array = np.array(image)

plt.imshow(image_array)
plt.show()

print(image_array.min(),image_array.max())

In [None]:
# Define the content of the YAML file
config_data = {
    'data': {
        'dataset_config': '/kaggle/working/OxfordPet/dataset_config.yaml'
    },
    'logging': {
        'interval': 10,
        'mlruns_folder': '/work/work_mik/mlflow_server'
    },
    'model': {
        'alpha': 0.4,
        'backbone': 'unet',
        'batch_size': 5,
        'crowd_type': 'image',
        'decoder': {
            'activation': 'softmax'
        },
        'encoder': {
            'backbone': 'resnet34',
            'weights': 'imagenet'
        },
        'epochs': 20,
        'identity_reg': False,
        'learning_rate': 0.0001,
        'loss': 'ce',
        'lr_decay_after_epoch': 50,
        'lr_decay_param': 0.1,
        'min_trace': False,
        'optimizer': 'adam',
        'seed': 12
    }
}

# Define the content of the YAML file
dataset_config_data = {
    'data': {
        'dataset_name': 'OxfordPet',
        'class_no': 2,
        'class_names': ['background', 'pet'],
        'class_weights': [1.0, 1.0],  # [4.7607, 0.4756, 0.5844, 1.5684, 3.1598]
        'crowd': True,
        'sr_experiment': '/kaggle/working/OxfordPet/experiments/',
        'sr_path': '/kaggle/working/OxfordPet/experiments/',
        'ignore_last_class': False,  # index of class in gt to be deleted, else 'None'
        'path': '/kaggle/working/OxfordPet/',
        'train': {
            'images': 'patches/Train',
            'masks': 'masks/Train'
        },
        'val': {
            'images': 'patches/Val',
            'masks': 'masks/Val'
        },
        'test': {
            'images': 'patches/Test',
            'masks': 'masks/Test'
        },
        'visualize_images': {
            'train': [
                'sample_0.png',
                'sample_1.png',
                'sample_2.png',
                'sample_3.png',
                'sample_4.png'
            ],
            'val': [
                'sample_0.png',
                'sample_1.png',
                'sample_2.png',
                'sample_3.png',
                'sample_4.png',
                'sample_5.png',
                'sample_6.png'
            ],
            'test': [
                'sample_0.png',
                'sample_1.png',
                'sample_2.png',
                'sample_3.png',
                'sample_4.png'
            ]
        },
        'normalization': False,
        'augmentation': {
            'use_augmentation': False,  # switch everything off and on, if True automatically flips and 90 degree rotations are used in this case
            'gaussian_blur_kernel': 5,  # range 3-7
            'brightness_limit': 0.2,
            'contrast_limit': 0.2,
            'hue_shift_limit': 20,
            'sat_shift_limit': 30
        }
    }
}

# Save the content in a YAML file
with open('config.yaml', 'w') as file:
    yaml.dump(config_data, file, default_flow_style=False)

with open('dataset_config.yaml', 'w') as file:
    yaml.dump(dataset_config_data, file, default_flow_style=False)

print("yaml files created successfully.")

In [21]:
if not os.path.exists('/kaggle/working/OxfordPets'):
    os.makedirs('/kaggle/working/OxfordPets')

In [22]:
def move_files(source_dir, destination_dir):
  """
    Moves a directory from the source location to the destination location.

    This function uses shutil.move to move an entire directory, including all its
    files and subdirectories, from the source directory to the destination directory.

    Parameters:
    source_dir (str): The path to the source directory to be moved.
    destination_dir (str): The path to the destination directory where the source
                           directory will be moved.

    Returns:
    None

    Raises:
    Exception: If an error occurs during the move operation, it will be caught and
               a message will be printed.
  """
  try:
    shutil.move(source_dir, destination_dir)
    print(f'Carpet moved from {source_dir} to {destination_dir}')
  except Exception as e:
    print(f'Error: {e}')

In [None]:
move_files('/kaggle/working/masks','/kaggle/working/OxfordPets')
move_files('/kaggle/working/patches','/kaggle/working/OxfordPets')
move_files('/kaggle/working/masks_train.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/images_train.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/masks_val.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/images_val.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/masks_test.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/images_test.h5','/kaggle/working/OxfordPets')
move_files('/kaggle/working/config.yaml','/kaggle/working/OxfordPets')
move_files('/kaggle/working/dataset_config.yaml','/kaggle/working/OxfordPets')

In [24]:
def zip_folder(folder_path, output_path):
    """
    Compresses a folder and its contents into a zip file.

    This function takes the path of a folder and compresses it into a zip file,
    storing the resulting zip file at the specified output path. All files and
    subdirectories within the folder are included in the zip file, maintaining
    their relative paths.

    Parameters:
    folder_path (str): The path to the folder to be compressed.
    output_path (str): The path where the output zip file will be saved.

    Returns:
    None
    """
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                # Add file to zip with a path relative to the base folder
                arcname = os.path.relpath(file_path, start=folder_path)
                zipf.write(file_path, arcname)
                
    print('Folder successfully zipped')

In [None]:
folder_path = '/kaggle/working/OxfordPets'
output_path = '/kaggle/working/OxfordPets.zip'

zip_folder(folder_path, output_path)