# Denoising (DAE)

## Introduction

## 0. Imports & variables

In [1]:
import os
import tensorflow as tf
import numpy as np
import pandas as  pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
dataset_path = "processed_data/photos/"

## 1. Chargement des données

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

def load_and_noisify_images(folder, noise_factor=0.5):
    images_list = []
    noisy_images_list = []
    
    # Barre de progression pour le chargement des images
    filelist = os.listdir(folder)
    for filename in tqdm(filelist, desc="Loading and Noising Images"):
        img_path = os.path.join(folder, filename)
        
        try:
            # Lire et redimensionner l'image
            img = tf.io.read_file(img_path)
            img = tf.image.decode_image(img, channels=3, dtype=tf.float32)
            img = tf.image.resize(img, [100, 100])
            img = img / 255.0  # Normalisation

            # Bruitage
            noisy = img + noise_factor * tf.random.normal(shape=img.shape, mean=0., stddev=1.)
            noisy = tf.clip_by_value(noisy, clip_value_min=0., clip_value_max=1.)
            
            images_list.append(img)
            noisy_images_list.append(noisy)
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")

    return tf.stack(images_list), tf.stack(noisy_images_list)

# Chemin d'accès au dossier contenant les images
folder_path = "processed_data/photos"

# Chargement et bruitage des images
original_images, noisy_images = load_and_noisify_images(folder_path)

Loading and Noising Images:   0%|          | 0/29979 [00:00<?, ?it/s]

Error processing aug_photo_5977_0_1039.jpeg: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run ExpandDims: Dst tensor is not initialized. [Op:ExpandDims]
Error processing aug_photo_5977_0_5654.jpeg: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run ExpandDims: Dst tensor is not initialized. [Op:ExpandDims]
Error processing aug_photo_5978_0_2259.jpeg: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run ExpandDims: Dst tensor is not initialized. [Op:ExpandDims]
Error processing aug_photo_5978_0_3069.jpeg: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run ExpandDims: Dst tensor is not initialized. [Op:ExpandDims]
Error processing

In [None]:
from sklearn.model_selection import train_test_split

# Diviser les données en ensembles d'entraînement et de validation
X_train, X_valid, y_train, y_valid = train_test_split(noisy_images, original_images, test_size=0.15, random_state=42)

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model

input_img = Input(shape=(150, 150, 3))

# Encodeur
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# Décodeur
x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
history = autoencoder.fit(X_train, y_train,
                          epochs=50,
                          batch_size=128,
                          shuffle=True,
                          validation_data=(X_valid, y_valid),
                          verbose=1)

In [None]:
df = pd.DataFrame({'image': list(images_np)})

df['height'] = df['image'].apply(lambda x: x.shape[0])
df['width'] = df['image'].apply(lambda x: x.shape[1])
df['channels'] = df['image'].apply(lambda x: x.shape[2])

In [None]:
print(df.head())

## 2. Exploration des données

In [None]:
images_np = images_np.astype(np.float32)
images_np /= 255.0

print("Après normalisation:")
print("Valeur minimale:", images_np.min())
print("Valeur maximale:", images_np.max())

In [None]:
sample_images = images_np[:5]

fig, axes = plt.subplots(1, 5, figsize=(15, 5))

for img, ax in zip(sample_images, axes):
    ax.imshow(img)
    ax.axis('off')

plt.show()

In [None]:
plt.figure(figsize=(10, 8))

# Scatter plot
plt.scatter(df['height'], df['width'], alpha=0.6, edgecolors="w", linewidth=0.5)

# Configuration du graphique
plt.xlabel('Hauteur (pixels)')
plt.ylabel('Largeur (pixels)')
plt.title('Distribution des dimensions des images')
plt.grid(True)

# Pour afficher une ligne de référence diagonale (utile si vous voulez voir combien d'images sont carrées)
plt.plot([0, max(df['height'])], [0, max(df['width'])], 'r--')

plt.show()

In [None]:
# Pour exécution eager
tf.config.run_functions_eagerly(True)

def resize_images_with_tf(image_arrays, target_size):
    resized_images = []
    input_images = tf.convert_to_tensor(image_arrays, dtype=tf.float32)
    batch_resized = tf.image.resize(input_images, target_size)
    
    resized_images.extend([img.cpu().numpy() if tf.is_tensor(img) else img for img in batch_resized])
    return resized_images

# Je suppose que la colonne contenant les images s'appelle "image" dans votre DataFrame
image_arrays = df['image'].tolist()

# Redimensionnez les images à 128x128 pixels
target_size = (128, 128)
resized_images = resize_images_with_tf(image_arrays, target_size)

# Convertir la liste des images redimensionnées en DataFrame
df_resized = pd.DataFrame({'image': [img.tolist() for img in resized_images]})

# Extraire la hauteur et la largeur des images
df_resized['height'] = df_resized['image'].apply(lambda img: np.array(img).shape[0])
df_resized['width'] = df_resized['image'].apply(lambda img: np.array(img).shape[1])

In [None]:
# Visualiser la distribution des dimensions
plt.figure(figsize=(10, 7))
plt.scatter(df_resized['height'], df_resized['width'], alpha=0.6, edgecolors="w", linewidth=0.5)
plt.title("Distribution des images en fonction de leur hauteur et de leur largeur (redimensionnées)")
plt.grid(True)
plt.xlabel("Hauteur (pixels)")
plt.ylabel("Largeur (pixels)")
plt.plot([0, max(df_resized['height'])], [0, max(df_resized['width'])], 'r--')
plt.show()

In [None]:
def load_images_from_folder(folder):
    images = []
    filenames = []
    
    for filename in tqdm(os.listdir(folder), desc="Loading images"):
        img_path = os.path.join(folder, filename)
        
        try:
            # Lire l'image en couleur (RGB) avec TensorFlow
            img = tf.io.read_file(img_path)
            img = tf.image.decode_image(img, channels=3)
            if img is not None:
                images.append(img.numpy())
                filenames.append(filename)
        except Exception as e:
            print(f"Error loading {filename}: {e}")

    return images, filenames

photos_folder = "processed_data/photos/"
photos, photo_filenames = load_images_from_folder(photos_folder)

In [None]:
def add_noise_to_image(img, noise_factor=0.5):
    noisy_img = img + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=img.shape)
    return np.clip(noisy_img, 0, 255).astype(np.uint8)

noisy_photos = [add_noise_to_image(photo) for photo in tqdm(photos, desc="Adding noise")]

In [None]:
noisy_photos_folder = "processed_data/noisy_photos/"
os.makedirs(noisy_photos_folder, exist_ok=True)

for noisy_photo, filename in tqdm(zip(noisy_photos, photo_filenames), total=len(noisy_photos), desc="Saving noisy images"):
    output_path = os.path.join(noisy_photos_folder, filename)
    tf.io.write_file(output_path, tf.image.encode_jpeg(noisy_photo))

In [None]:
# Convertir votre liste de photos bruitées en DataFrame
df_noisy = pd.DataFrame({'image': noisy_photos})

def display_images(original, noisy, num=10):
    plt.figure(figsize=(20, 4))
    for i in range(num):
        # Afficher les images originales
        ax = plt.subplot(2, num, i + 1)
        plt.imshow(original[i])
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        
        # Afficher les images bruitées
        ax = plt.subplot(2, num, i + 1 + num)
        plt.imshow(noisy[i])
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

# Extraire des échantillons des images originales et bruitées pour la visualisation
original_images = photos[:10]
noisy_images = df_noisy['image'].tolist()[:10]

In [None]:
display_images(original_images, noisy_images)

In [None]:
batch_size = 500  # réduire encore plus la taille des lots

for i in range(0, len(df_noisy), batch_size):
    df_noisy.loc[i:i+batch_size-1, 'image'] = df_noisy.iloc[i:i+batch_size]['image'].transform(lambda x: (x / 255.).astype(np.float32))

for i in range(0, len(df), batch_size):
    df.loc[i:i+batch_size-1, 'image'] = df.iloc[i:i+batch_size]['image'].transform(lambda x: (x / 255.).astype(np.float32))

In [None]:
from sklearn.model_selection import train_test_split

# Diviser les données en ensembles d'entraînement et de validation
X_train, X_valid, y_train, y_valid = train_test_split(df_noisy['image'].tolist(), df['image'].tolist(), test_size=0.15, random_state=42)

# Convertir les listes en arrays numpy pour l'entraînement
X_train = np.array(X_train)
X_valid = np.array(X_valid)
y_train = np.array(y_train)
y_valid = np.array(y_valid)

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model

input_img = Input(shape=(150, 150, 3))  # adapt this if using `channels_first` image data format

# Encodeur
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# Décodeur
x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
history = autoencoder.fit(X_train, y_train,
                          epochs=50,
                          batch_size=128,
                          shuffle=True,
                          validation_data=(X_valid, y_valid),
                          verbose=1)

## 3. Pré-processing

## 4. Création du modèle

## 5. Entraînement du modèle

## 6. Evaluation du modèle