# Task di Segmentazione tramite Unet

## Imports

In [None]:
from google.colab import files
from google.colab import drive

#Access Google Drive
drive.mount('/content/gdrive/', force_remount=True)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tifffile
import matplotlib.pyplot as plt
import numpy as np
from skimage.io import imread
import os

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split

# **Esplorazione Dataset**




## Dataset Training

**Composizione del dataset per il training**


*   **images:** Contiene 1000 immagini nei formati TIFF, TIF, e PNG.
Queste immagini rappresentano le immagini originali delle cellule acquisite tramite microscopio.
Sono utilizzate come input per addestrare la rete neurale U-Net, che dovrà imparare a segmentare le immagini future.
*   **labels:** Contiene le maschere corrispondenti alle immagini presenti nella cartella images. (1000 totali)

Ogni maschera è un'immagine segmentata che evidenzia le diverse parti dell'immagine originale, separando le cellule dal background e distinguendo ogni cellula come un'entità unica.

La maschera rappresenta la ground truth, ovvero ciò che ci aspettiamo che la U-Net produca come output dopo il training.










**Chiarificazione**

Per ogni immagine presente nella cartella images, esiste una corrispondente maschera nella cartella labels.
Le due cartelle sono allineate, ovvero ogni immagine in images ha la sua maschera associata con lo stesso nome di file nella cartella labels.

**obiettivo principale**

Durante il training:

La rete neurale prende come input le immagini dalla cartella images.
Confronta il proprio output con le maschere nella cartella labels per imparare a:
Identificare le cellule.
Separarle dallo sfondo.
Distinguere le diverse cellule tra loro.
Alla fine del training, la rete sarà in grado di segmentare nuove immagini di cellule in modo automatico, producendo una maschera simile a quelle nella cartella labels.

In [None]:
# Paths to the image and mask
image_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/images/cell_00459.tif"
mask_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/labels/cell_00459_label.tiff"

# Load the image and mask
image = tifffile.imread(image_path)
mask = tifffile.imread(mask_path)

# Display basic information about the image and mask
print("Image Shape:", image.shape)
print("Mask Shape:", mask.shape)
print("Unique Values in Mask:", np.unique(mask)[:10], "...")  # Preview unique labels

# Plot the image and its mask side by side
plt.figure(figsize=(16, 8))

# Original image
plt.subplot(1, 2, 1)
plt.imshow(image, cmap="gray")
plt.title("Original Image")
plt.axis("off")

# Corresponding mask
plt.subplot(1, 2, 2)
plt.imshow(mask, cmap="nipy_spectral")  # Colormap for distinguishing labels
plt.title("Segmentation Mask")
plt.axis("off")

plt.show()

In [None]:
# Define the folder path
folder_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/labels"

# Count the number of image files in the folder
try:
    # List all files in the folder and filter for common image extensions
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.tif', '.tiff'))]
    num_images = len(image_files)

    print(f"Number of images in the folder '{folder_path}': {num_images}")
except FileNotFoundError:
    print(f"The folder '{folder_path}' does not exist. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
import os

# Define the folder path
folder_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/images"

# Count the number of image files in the folder
try:
    # List all files in the folder and filter for common image extensions
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.tif', '.tiff', '.png', '.jpg', '.jpeg', '.bmp'))]
    num_images = len(image_files)

    print(f"Number of images in the folder '{folder_path}': {num_images}")
except FileNotFoundError:
    print(f"The folder '{folder_path}' does not exist. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
from skimage.io import imread
import matplotlib.pyplot as plt
import numpy as np

# Paths to the image and mask
image_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/images/cell_00188.png"
mask_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/labels/cell_00188_label.tiff"

# Load the image and mask using skimage
image = imread(image_path)
mask = imread(mask_path)

# Display basic information about the image and mask
print("Image Shape:", image.shape)
print("Image Data Type:", image.dtype)
print("Mask Shape:", mask.shape)
print("Mask Data Type:", mask.dtype)
print("Unique Values in Mask:", np.unique(mask)[:10], "...")  # Preview unique labels

# Plot the image and its mask side by side
plt.figure(figsize=(16, 8))

# Original image
plt.subplot(1, 2, 1)
plt.imshow(image, cmap="gray")
plt.title("Original Image")
plt.axis("off")

# Corresponding mask
plt.subplot(1, 2, 2)
plt.imshow(mask, cmap="nipy_spectral")  # Colormap for distinguishing labels
plt.title("Segmentation Mask")
plt.axis("off")

plt.show()


## Dataset Test

Questo dataset verrà utilizzato per valutare l'accuratezza della segmentazione ottenuta dalla U-Net dopo il processo di addestramento.

In particolare, per testare le prestazioni della rete, abbiamo a disposizione 50 nuove immagini, ciascuna associata alla rispettiva label (maschera di riferimento).

Queste immagini, mai utilizzate durante l'addestramento, vengono inserite nella rete come input. Successivamente, confrontiamo le segmentazioni prodotte dalla rete con le labels fornite nel dataset, in modo da misurare quanto l'output della rete si avvicini alla segmentazione attesa. Questo confronto ci permette di valutare la capacità della rete di generalizzare su dati non visti.

In [None]:
# Define the folder path
folder_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Testing/Public/labels"

# Count the number of image files in the folder
try:
    # List all files in the folder and filter for common image extensions
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.tif', '.tiff'))]
    num_images = len(image_files)

    print(f"Number of images in the folder '{folder_path}': {num_images}")
except FileNotFoundError:
    print(f"The folder '{folder_path}' does not exist. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
import os

# Define the folder path
folder_path = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Testing/Public/images"

# Count the number of image files in the folder
try:
    # List all files in the folder and filter for common image extensions
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.tif', '.tiff', '.png', '.jpg', '.jpeg', '.bmp'))]
    num_images = len(image_files)

    print(f"Number of images in the folder '{folder_path}': {num_images}")
except FileNotFoundError:
    print(f"The folder '{folder_path}' does not exist. Please check the path.")
except Exception as e:
    print(f"An error occurred: {e}")


# Preprocessing Dataset Training
In questa sezione puliamo e prepariamo il dataset per il training della rete. In particolare uniformiamo la grandezza delle immagini e le mettiamo in scala di grigio. inoltre ci assicuriamo che le maschere siano della stessa grandezza delle immagini

In [None]:
from PIL import Image, UnidentifiedImageError
import os
import numpy as np
import tensorflow as tf

# Funzione per caricare immagini in scala di grigi e abbinare dinamicamente le etichette
def load_grayscale_images_with_labels(image_dir, label_dir):
    """
    Carica e preprocessa immagini in scala di grigi con le etichette corrispondenti.
    Solo le immagini con un'etichetta valida vengono incluse.

    Args:
        image_dir (str): Path alla directory contenente le immagini.
        label_dir (str): Path alla directory contenente le etichette.

    Returns:
        np.ndarray: Immagini preprocessate in scala di grigi.
        np.ndarray: Etichette preprocessate.
    """
    images = []
    labels = []
    print(f"Caricamento immagini dalla directory: {image_dir}")
    print(f"Caricamento etichette dalla directory: {label_dir}")

    # Ottieni tutti i file delle etichette disponibili
    label_files = {os.path.splitext(f)[0]: f for f in os.listdir(label_dir)}

    for filename in sorted(os.listdir(image_dir)):
        # Controlla estensioni valide per le immagini
        if filename.lower().endswith(('.tiff', '.tif', '.png', '.jpg', '.jpeg', '.bmp')):
            # Estrai il nome base del file senza estensione
            base_name = os.path.splitext(filename)[0]

            # Cerca il file di etichetta corrispondente
            label_filename = label_files.get(base_name + "_label")

            if label_filename:
                try:
                    # Carica e preprocessa l'immagine
                    img = Image.open(os.path.join(image_dir, filename)).convert('L')  # Scala di grigi
                    img = img.resize((256, 256), Image.Resampling.LANCZOS)  # Resize
                    img = np.array(img, dtype=np.float32) / 255.0  # Normalizza tra [0, 1]

                    # Carica e preprocessa l'etichetta
                    label_path = os.path.join(label_dir, label_filename)
                    lbl = Image.open(label_path).convert('1')  # Binario
                    lbl = lbl.resize((256, 256), Image.Resampling.NEAREST)  # Resize
                    lbl = np.array(lbl, dtype=np.uint8)  # Converte in interi 0 o 1

                    # Aggiungi immagine ed etichetta alla lista
                    images.append(img[..., np.newaxis])  # Aggiungi dimensione canale
                    labels.append(lbl[..., np.newaxis])  # Aggiungi dimensione canale
                    print(f"Immagine ed etichetta caricate con successo: {filename}")
                except (IOError, UnidentifiedImageError) as e:
                    print(f"Errore nel caricamento di immagine o etichetta per {filename}: {e}")
            else:
                print(f"Etichetta non trovata per l'immagine: {filename}")
        else:
            print(f"File ignorato (estensione non valida): {filename}")

    return np.array(images), np.array(labels)

# Funzione per caricare il dataset
def load_dataset(image_dir, label_dir):
    images, labels = load_grayscale_images_with_labels(image_dir, label_dir)
    print(f"Numero di immagini caricate: {len(images)}")
    print(f"Numero di etichette caricate: {len(labels)}")
    return tf.data.Dataset.from_tensor_slices((images, labels))

In [None]:
# Imposta i percorsi direttamente nel codice
PERCORSO_TRAINING_IMMAGINI = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/images"
PERCORSO_TRAINING_LABELS = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Training/labels"
PERCORSO_TUNING_IMMAGINI = '/content/gdrive/MyDrive/NeurIPS22-CellSeg/Tuning/images'
PERCORSO_TUNING_LABELS = '/content/gdrive/MyDrive/NeurIPS22-CellSeg/Tuning/labels'

PERCORSO_TEST_IMMAGINI = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Testing/Public/images"
PERCORSO_TEST_LABELS = "/content/gdrive/MyDrive/NeurIPS22-CellSeg/Testing/Public/labels"

In [None]:
# Carica i dataset di addestramento e tuning
train_dataset = load_dataset(PERCORSO_TRAINING_IMMAGINI, PERCORSO_TRAINING_LABELS)
tuning_dataset = load_dataset(PERCORSO_TUNING_IMMAGINI, PERCORSO_TUNING_LABELS)

In [None]:
import matplotlib.pyplot as plt

# Funzione per plottare un campione dal dataset
def plot_sample(dataset, title="Sample"):
    """
    Plotta un campione dal dataset, mostrando immagine e maschera corrispondente.

    Args:
        dataset (tf.data.Dataset): Il dataset da cui estrarre i campioni.
        title (str): Titolo del plot.
    """
    # Estrai un campione dal dataset
    for image, label in dataset.take(1):  # Prendi un batch (immagine e maschera)
        image = image.numpy().squeeze()  # Rimuove dimensioni extra
        label = label.numpy().squeeze()  # Rimuove dimensioni extra

        # Plot
        plt.figure(figsize=(10, 5))

        # Immagine originale
        plt.subplot(1, 2, 1)
        plt.imshow(image, cmap='gray')
        plt.title(f"{title} - Immagine originale")
        plt.axis('off')

        # Maschera corrispondente
        plt.subplot(1, 2, 2)
        plt.imshow(label, cmap='gray')
        plt.title(f"{title} - Maschera")
        plt.axis('off')

        plt.show()


In [None]:
# Plotta un campione dal dataset di training
print("Campione dal dataset di training:")
plot_sample(train_dataset, title="Training")

# Plotta un campione dal dataset di tuning
print("Campione dal dataset di tuning:")
plot_sample(tuning_dataset, title="Tuning")


# Unet training

In [None]:
def unet_multiclass_segmentation(input_size=(256, 256, 1)):
    inputs = layers.Input(input_size)

    # Encoder
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.BatchNormalization()(c1)
    c1 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.BatchNormalization()(c2)
    c2 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.BatchNormalization()(c3)
    c3 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = layers.MaxPooling2D((2, 2))(c3)

    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = layers.BatchNormalization()(c4)
    c4 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
    p4 = layers.MaxPooling2D((2, 2))(c4)

    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(p4)
    c5 = layers.BatchNormalization()(c5)
    c5 = layers.Conv2D(1024, (3, 3), activation='relu', padding='same')(c5)

    # Decoder
    u6 = layers.Conv2DTranspose(512, (2, 2), strides=(2, 2), padding='same')(c5)
    u6 = layers.concatenate([u6, c4])
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(u6)
    c6 = layers.BatchNormalization()(c6)
    c6 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(c6)

    u7 = layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(c6)
    u7 = layers.concatenate([u7, c3])
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(u7)
    c7 = layers.BatchNormalization()(c7)
    c7 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(c7)

    u8 = layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c7)
    u8 = layers.concatenate([u8, c2])
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(u8)
    c8 = layers.BatchNormalization()(c8)
    c8 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(c8)

    u9 = layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c8)
    u9 = layers.concatenate([u9, c1])
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(u9)
    c9 = layers.BatchNormalization()(c9)
    c9 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c9)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c9)

    # Final output layer for binary segmentation
    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid', padding='same')(c9)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

model = unet_multiclass_segmentation(input_size=(256, 256, 1))
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
import tensorflow as tf

# Train the model with augmented data
history = model.fit(
    train_dataset.batch(32),
    validation_data=tuning_dataset.batch(32),
    epochs=8,
    verbose=1
)


In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot training & validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Save the model in the local runtime
model.save("/content/gdrive/MyDrive/trained_unet_model_V1.keras")
print("Model saved locally as 'trained_unet_model.keras'")


# TEST

In [None]:
# Carica i dataset di addestramento e tuning
test_dataset = load_dataset(PERCORSO_TEST_IMMAGINI, PERCORSO_TEST_LABELS)

In [None]:
print(test_dataset)

In [None]:
import tensorflow as tf

# Load the trained model
loaded_model = tf.keras.models.load_model("/content/gdrive/MyDrive/trained_unet_model_V1.keras")


In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf

# Check the structure of the dataset
print("Dataset element spec:", test_dataset.element_spec)

# Iterate through a few samples to inspect the data
for i, (image, mask) in enumerate(test_dataset.take(1)):  # Take one sample
    print(f"Sample {i + 1}:")
    print("Image shape:", image.shape, "dtype:", image.dtype)
    print("Mask shape:", mask.shape, "dtype:", mask.dtype)

    # Visualize the image and mask
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(image.numpy().squeeze(), cmap='gray')  # Squeeze to remove the channel dimension
    plt.title("Image")
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(mask.numpy().squeeze(), cmap='gray')  # Squeeze to remove the channel dimension
    plt.title("Mask")
    plt.axis('off')

    plt.tight_layout()
    plt.show()
    break


In [None]:
import numpy as np

# Initialize counters for total pixels and correctly predicted pixels
total_pixels = 0
correct_pixels = 0

# Iterate through the test dataset
for i, (image, mask) in enumerate(test_dataset):
    # Predict the mask for the image
    image_batch = tf.expand_dims(image, axis=0)  # Add batch dimension
    predicted_mask = loaded_model.predict(image_batch)

    # Post-process the prediction (if needed)
    predicted_mask_binary = predicted_mask.squeeze().astype("uint8")  # Binary mask (already 0 or 1)

    # Ensure ground truth mask is binary (if necessary)
    true_mask_binary = mask.numpy().squeeze().astype("uint8")

    # Count correct pixels
    correct_pixels += np.sum(predicted_mask_binary == true_mask_binary)
    total_pixels += np.prod(true_mask_binary.shape)  # Total pixels in the mask

# Calculate pixel-wise accuracy
pixel_accuracy = correct_pixels / total_pixels
print(f"Final Pixel-Wise Segmentation Accuracy: {pixel_accuracy:.4f}")


In [None]:
for i, (image, mask) in enumerate(test_dataset):
    print(f"Processing image {i + 1}...")

    # Predict the mask for the current image
    image_batch = tf.expand_dims(image, axis=0)  # Add batch dimension
    predicted_mask = loaded_model.predict(image_batch)  # Raw predictions

    # Plot the original image, ground truth mask, and predicted mask
    plt.figure(figsize=(18, 6))

    # Original Image
    plt.subplot(1, 3, 1)
    plt.imshow(image.numpy().squeeze(), cmap='gray')
    plt.title(f"Original Image {i + 1}")
    plt.axis('off')

    # Ground Truth Mask
    plt.subplot(1, 3, 2)
    plt.imshow(mask.numpy().squeeze(), cmap='gray', vmin=0, vmax=1)
    plt.title(f"Ground Truth Mask {i + 1}")
    plt.axis('off')

    # Predicted Mask
    plt.subplot(1, 3, 3)
    plt.imshow(predicted_mask.squeeze(), cmap='gray', vmin=0, vmax=1)
    plt.colorbar()
    plt.title(f"Predicted Mask {i + 1} (Raw)")
    plt.axis('off')

    # Show the plots
    plt.tight_layout()
    plt.show()
