In [27]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np

import os
import requests
from io import BytesIO
from PIL import Image
import numpy as np

import os
import random
import shutil
import matplotlib.pyplot as plt

import cv2
import gc


In [28]:
project_id = 'axial-glow-456914-n5'


from google.colab import auth
auth.authenticate_user()

In [31]:
from google.cloud import storage
storage_client = storage.Client(project=project_id)

bucket_name = 'cis-difusion-dataset'
bucket = storage_client.bucket(bucket_name)


In [None]:
def check_folder_contents(bucket, folder_name):
    """
    Checks and lists files inside a specific folder in a GCS bucket.

    Args:
        bucket: The GCS bucket object
        folder_name: The name of the folder to check

    Returns:
        A list of file names in the folder
    """
    # Make sure the folder name ends with a slash
    if not folder_name.endswith('/'):
        folder_name += '/'

    # List blobs with the specified prefix
    blobs = list(bucket.list_blobs(prefix=folder_name))

    # Filter out the folder itself
    files = [blob.name for blob in blobs if blob.name != folder_name]

    # Print results
    print(f"Checking folder: {folder_name}")

    if files:
        print(f"Found {len(files)} files in the folder:")
        for file in files:
            print(f"- {file}")
    else:
        print(f"The folder '{folder_name}' is empty or doesn't exist.")

    return files

# Example usage:
check_folder_contents(bucket, "DIV2K_train_HR")



In [32]:
folder_name = 'DIV2K_train_HR'  # If you want to download a specific folder
local_path = '/dataset/'  # Local path to save the downloaded data

def download_files_from_gcs(bucket_name, folder_name, local_path):
    """
    Downloads files from a Google Cloud Storage folder to a local directory.
    """
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)

    os.makedirs(local_path, exist_ok=True)

    blobs = bucket.list_blobs(prefix=folder_name)
    for blob in blobs:
        if not blob.name.endswith('/'):
            file_name = os.path.basename(blob.name)
            blob.download_to_filename(os.path.join(local_path, file_name))
            print(f'Downloaded: {blob.name} to {os.path.join(local_path, file_name)}')

    print('Download complete.')


In [33]:
def download_image_batch(bucket, folder_name, local_path, batch_size=100, start_index=0):
    """Downloads a batch of images from Google Cloud Storage."""
    # Create local directory if it doesn't exist
    os.makedirs(local_path, exist_ok=True)

    blobs = list(bucket.list_blobs(prefix=folder_name))
    image_paths = []
    count = 0

    for i, blob in enumerate(blobs):
        if i < start_index:
            continue  # Skip already downloaded images
        if not blob.name.endswith('/') and count < batch_size:
            file_name = os.path.basename(blob.name)
            local_file_path = os.path.join(local_path, file_name)
            blob.download_to_filename(local_file_path)
            image_paths.append(local_file_path)  # Ensure this is a string
            count += 1
            print(f'Downloaded: {blob.name} to {local_file_path}')
        else:
            break

    print(f'Downloaded {count} images.')
    return image_paths

In [34]:
def prepare_dataset(bucket_name, folder_name, noise_std=0.1, img_size=(128, 128), is_training=True):  # Added is_training flag
    """
    Prepares the dataset for training by adding noise to the images.
    """
    def load_and_preprocess_image(path):
        # Load and preprocess image
        img = tf.io.read_file(path)
        img = tf.image.decode_png(img, channels=3)
        # Resize the image
        img = tf.image.resize(img, img_size)
        img = tf.cast(img, tf.float32) / 255.0  # Normalize to [0,1]

        if is_training:
            # Add Gaussian noise
            noise = tf.random.normal(shape=tf.shape(img), mean=0.0, stddev=noise_std)
            noisy_img = img + noise

            # Clip values to keep between [0,1]
            noisy_img = tf.clip_by_value(noisy_img, 0.0, 1.0)

            return noisy_img, img  # Return noisy and original for training
        else:
            return img, img  # Return original twice for testing

    # List paths of all images in the folder
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=folder_name)
    image_paths = [f'gs://{bucket_name}/{blob.name}' for blob in blobs if not blob.name.endswith('/')]

    # Create dataset
    dataset = tf.data.Dataset.from_tensor_slices(image_paths)
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(4)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset



In [35]:
def prepare_dataset_local(local_image_paths, noise_std=0.1, img_size=(128, 128), is_training=True):
    """
    Prepara o dataset para treinamento usando imagens locais baixadas.
    """
    # First, ensure all paths are strings
    local_image_paths = [str(path) for path in local_image_paths]

    def load_and_preprocess_image(path):
        # Ensure path is a string (TensorFlow operations might convert it)
        path = tf.convert_to_tensor(path, dtype=tf.string)

        # Carregar e pré-processar imagem
        img = tf.io.read_file(path)
        img = tf.image.decode_png(img, channels=3)
        # Redimensionar a imagem
        img = tf.image.resize(img, img_size)
        img = tf.cast(img, tf.float32) / 255.0  # Normalizar para [0,1]

        if is_training:
            # Adicionar ruído Gaussiano
            noise = tf.random.normal(shape=tf.shape(img), mean=0.0, stddev=noise_std)
            noisy_img = img + noise

            # Limitar valores para manter entre [0,1]
            noisy_img = tf.clip_by_value(noisy_img, 0.0, 1.0)

            return noisy_img, img  # Retornar ruidosa e original para treinamento
        else:
            return img, img  # Retornar original duas vezes para teste

    # Criar dataset
    dataset = tf.data.Dataset.from_tensor_slices(local_image_paths)
    dataset = dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(4)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

In [42]:
class DnCNN(Model):
    def __init__(self, D, C=64):
        super(DnCNN, self).__init__()
        self.D = D
        # Create convolution layers
        self.conv_layers = [layers.Conv2D(C, kernel_size=3, padding='same', input_shape=(None, None, 3))]
        self.conv_layers.extend([layers.Conv2D(C, kernel_size=3, padding='same') for _ in range(D)])
        self.conv_layers.append(layers.Conv2D(3, kernel_size=3, padding='same'))
        # BatchNormalization doesn't take an activation parameter
        self.bn_layers = [layers.BatchNormalization() for _ in range(D)]

    def call(self, x, training=False):
        h = tf.nn.relu(self.conv_layers[0](x))
        for i in range(self.D):
            # Apply batch normalization
            h = self.bn_layers[i](self.conv_layers[i + 1](h), training=training)
            # Apply ReLU activation separately
            h = tf.nn.relu(h)
        y = self.conv_layers[-1](h) + x
        return y


In [43]:
@tf.function
def train_model(model, train_dataset, epochs=200, learning_rate=1e-3):
    optimizer = tf.keras.optimizers.Adam(learning_rate)
    loss_fn = tf.keras.losses.MeanSquaredError()

    model.compile(optimizer=optimizer, loss=loss_fn)
    model.fit(train_dataset, epochs=epochs)


In [44]:
def train_model_in_batches(model, bucket, train_folder_name, local_path, batch_size=100, epochs_per_batch=10):
    """Treina o modelo em batches de imagens baixadas do GCS."""
    num_batches = 5  # Você pode ajustar isso com base no número desejado de batches

    for batch_index in range(num_batches):
        print(f"Treinando no batch {batch_index + 1}/{num_batches}")

        # Baixar um batch de imagens do conjunto de treinamento
        start_index = batch_index * batch_size  # Calcular índice inicial para download

        # Usar train_folder_name para treinamento
        image_paths = download_image_batch(bucket, train_folder_name, local_path, batch_size, start_index)

        # Preparar dataset para o batch atual usando as imagens baixadas
        train_dataset = prepare_dataset_local(image_paths, noise_std=0.1, img_size=(128, 128), is_training=True)

        # Treinar o modelo para o batch atual
        train_model(model, train_dataset, epochs=epochs_per_batch)

        # Salvar pesos após cada batch
        model.save_weights('/content/weights.weights.h5')
        print(f"Treinamento do batch {batch_index + 1} completo. Pesos salvos.")

        # Apagar imagens baixadas para liberar espaço
        shutil.rmtree(local_path)
        os.makedirs(local_path, exist_ok=True)
        gc.collect()

In [45]:
def test_network_batch(model, bucket_name, test_folder_name, noise_std=0.1, batch_size=4):
    """Tests the model on a batch of images loaded from GCS."""

    # Load the saved weights into the model
    if os.path.exists('/content/weights.weights.h5'):  # Check if weights file exists
        model.load_weights('/content/weights.weights.h5')
        print("Loaded saved weights for testing.")
    else:
        print("Warning: Weights file not found. Using untrained model for testing.")

    # Definir image_paths antes de usá-lo
    local_path = '/content/test_dataset/'
    os.makedirs(local_path, exist_ok=True)
    image_paths = download_image_batch(storage_client.bucket(bucket_name), test_folder_name, local_path, batch_size)

    # Preparar o dataset de teste
    test_dataset = prepare_dataset_local(image_paths, is_training=False, noise_std=noise_std, img_size=(128, 128))

    for noisy_images, clean_images in test_dataset.take(1):
        denoised_images = model(noisy_images, training=False)

        for i in range(min(5, noisy_images.shape[0])):
            # Visualize the results
            plt.figure(figsize=(15, 5))

            plt.subplot(1, 3, 1)
            plt.imshow(tf.squeeze(clean_images[i]).numpy())
            plt.title("Original Image")
            plt.axis("off")

            plt.subplot(1, 3, 2)
            plt.imshow(tf.squeeze(noisy_images[i]).numpy())
            plt.title("Noisy Image")
            plt.axis("off")

            plt.subplot(1, 3, 3)
            plt.imshow(tf.squeeze(denoised_images[i]).numpy())
            plt.title("Denoised Image")
            plt.axis("off")

            plt.show()

In [46]:
def test_network(model, image_path, noise_std=0.1):
    # Load and preprocess the image
    img = tf.io.read_file(image_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.resize(img, (128, 128))
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.expand_dims(img, axis=0)
    # Add Gaussian noise
    noise = tf.random.normal(shape=tf.shape(img), mean=0.0, stddev=noise_std)
    noisy_img = img + noise
    noisy_img = tf.clip_by_value(noisy_img, 0.0, 1.0)  # Keep values in [0, 1]

    # Predict noise using the model
    denoised_img = model(noisy_img, training=False)

    # Denoise the image by subtracting the predicted noise
    denoised_img = tf.clip_by_value(denoised_img, 0.0, 1.0)

    # Visualize the results
    plt.figure(figsize=(15, 5))

    plt.subplot(2, 3, 1)
    plt.imshow(tf.squeeze(img).numpy())
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(2, 3, 2)
    plt.imshow(tf.squeeze(noisy_img).numpy())
    plt.title("Noisy Image")
    plt.axis("off")

    plt.subplot(2, 3, 3)
    plt.imshow(tf.squeeze(denoised_img).numpy())
    plt.title("Denoised Image")
    plt.axis("off")

    plt.show()
    return (img, noisy_img, denoised_img)




In [48]:
# Definir parâmetros
model = DnCNN(D=8)
train_folder_name = f"{folder_name}/DIV2K_train_HR"  # Nome da pasta de treinamento
local_path = '/content/dataset/'  # Caminho local para salvar os dados baixados
os.makedirs('/content/dataset/', exist_ok=True)
batch_size = 100  # Tamanho do batch
epochs_per_batch = 10  # Épocas por batch

# Treinar o modelo em batches
train_model_in_batches(model, bucket, train_folder_name, local_path, batch_size, epochs_per_batch)

# Testar o modelo
test_folder_name = f"{folder_name}/DIV2K_valid_HR"  # Nome da pasta de teste
result = test_network_batch(model, bucket_name, test_folder_name, noise_std=0.1)

Treinando no batch 1/5
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0001.png to /content/dataset/0001.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0002.png to /content/dataset/0002.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0003.png to /content/dataset/0003.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0004.png to /content/dataset/0004.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0005.png to /content/dataset/0005.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0006.png to /content/dataset/0006.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0007.png to /content/dataset/0007.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0008.png to /content/dataset/0008.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0009.png to /content/dataset/0009.png
Downloaded: DIV2K_train_HR/DIV2K_train_HR/DIV2K_train_HR/0010.png to /content/dataset/0010.png
Downloaded: DIV2K_train_HR/

TypeError: in user code:

    File "<ipython-input-43-319b02983d4b>", line 7, in train_model  *
        model.fit(train_dataset, epochs=epochs)
    File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/data_adapters/tf_dataset_adapter.py", line 68, in num_batches
        cardinality = int(self._dataset.cardinality())

    TypeError: int() argument must be a string, a bytes-like object or a real number, not 'SymbolicTensor'


In [None]:
def apply_median_filter(result, kernel_size=3):
    # Convert TensorFlow tensor to numpy array
    img = tf.squeeze(result[0]).numpy()
    noisy_img = tf.squeeze(result[1]).numpy()


    # Apply median filter
    denoised_img = np.zeros_like(noisy_img)
    for i in range(3):  # Apply filter to each channel independently
        denoised_img[:, :, i] = cv2.medianBlur(noisy_img[:, :, i], kernel_size)
    plt.figure(figsize=(15, 5))

    plt.subplot(2, 3, 4)
    plt.imshow(tf.squeeze(img).numpy())
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(2, 3, 5)
    plt.imshow(tf.squeeze(noisy_img).numpy())
    plt.title("Noisy Image")
    plt.axis("off")

    plt.subplot(2, 3, 6)
    plt.imshow(tf.squeeze(denoised_img).numpy())
    plt.title("Denoised Image")
    plt.axis("off")

    plt.show()



In [None]:
# Test the network on a sample image
result = test_network_batch(model, "images/test/image_138.png", noise_std=0.1)
median = apply_median_filter(result)