In [None]:
# Collegamento a Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
from PIL import Image
import numpy as np

def split_data_with_centers_custom():
    main_folder = '/content/drive/MyDrive/CHALLENGE/PolypGen-EIM-24-25'
    output_folder = '/content/drive/MyDrive/CHALLENGE/PolypGen-EIM-24-25_DATASET_INFERENCE'

    test_sequences = ['seq2', 'seq12', 'seq13']
    valid_sequences = ['seq11', 'seq14', 'seq6']
    train_sequences = ['seq3', 'seq1', 'seq4', 'seq22', 'seq16', 'seq7', 'seq20', 'seq21', 'seq9', 'seq19', 'seq23', 'seq17', 'seq18', 'seq5', 'seq8', 'seq10', 'seq15']
    center_folders = [f'data_C{i}' for i in range(1, 7)]

    def copy_files(src_image_path, src_mask_path, dest_image_path, dest_mask_path):
        # Creare le directory di destinazione se non esistono
        os.makedirs(os.path.dirname(dest_image_path), exist_ok=True)
        os.makedirs(os.path.dirname(dest_mask_path), exist_ok=True)

        if os.path.exists(src_image_path) and os.path.exists(src_mask_path):
            try:
                # Salvare l'immagine originale
                with Image.open(src_image_path) as img:
                    img.save(dest_image_path)

                # Convertire, binarizzare e salvare la maschera
                with Image.open(src_mask_path) as mask:

                    # Salvare la maschera in formato PNG
                    mask.save(dest_mask_path)
            except Exception as e:
                print(f"Errore nella conversione della maschera: {src_mask_path}. Dettagli: {e}")
        else:
            print(f"File non trovato: {src_image_path} o {src_mask_path}")

    def process_sequences(sequences, split):
        count_images = 0
        for seq in sequences:
            image_folder = os.path.join(main_folder, 'sequenceData', seq, f'images_{seq}')
            mask_folder = os.path.join(main_folder, 'sequenceData', seq, f'masks_{seq}')
            if not os.path.exists(image_folder) or not os.path.exists(mask_folder):
                print(f"Cartelle mancanti per {seq}.")
                continue

            for image_name in os.listdir(image_folder):
                if image_name.endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_folder, image_name)
                    mask_name = image_name.replace('.jpg', '_mask.jpg').replace('.jpeg', '_mask.jpeg').replace('.png', '_mask.png')
                    mask_path = os.path.join(mask_folder, mask_name)

                    # Destinazione per immagini e maschere specifica per la sequenza
                    dest_image_path = os.path.join(output_folder, split, seq, 'images', image_name)
                    dest_mask_name = image_name.replace('.jpg', '_mask.jpg')
                    dest_mask_path = os.path.join(output_folder, split, seq, 'masks', dest_mask_name)

                    copy_files(image_path, mask_path, dest_image_path, dest_mask_path)
                    count_images += 1
        print(f"{split}: Copiate {count_images} immagini dalle sequenze.")

    def add_center_images_to_train():
        count_images = 0
        for center in center_folders:
            center_id = center[-2:]
            image_folder = os.path.join(main_folder, center, f'images_{center_id}')
            mask_folder = os.path.join(main_folder, center, f'masks_{center_id}')
            if not os.path.exists(image_folder) or not os.path.exists(mask_folder):
                print(f"Cartelle mancanti per {center}.")
                continue

            for image_name in os.listdir(image_folder):
                if image_name.endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_folder, image_name)
                    mask_name = image_name.replace('.jpg', '_mask.jpg').replace('.jpeg', '_mask.jpeg').replace('.png', '_mask.png')
                    mask_path = os.path.join(mask_folder, mask_name)

                    # Destinazione per immagini e maschere specifica per il centro
                    dest_image_path = os.path.join(output_folder, 'TRAIN', center, 'images', image_name)
                    dest_mask_name = image_name.replace('.jpg', '_mask.jpg')
                    dest_mask_path = os.path.join(output_folder, 'TRAIN', center, 'masks', dest_mask_name)

                    copy_files(image_path, mask_path, dest_image_path, dest_mask_path)
                    count_images += 1
        print(f"TRAIN: Aggiunte {count_images} immagini dai centri.")

    # Esegui i processi
    process_sequences(test_sequences, 'TEST')
    process_sequences(valid_sequences, 'VALID')
    process_sequences(train_sequences, 'TRAIN')
    add_center_images_to_train()

    print("Divisione dei dati completata con conversione delle maschere in formato PNG e organizzazione per centro e sequenza.")

# Esegui lo script
split_data_with_centers_custom()


TEST: Copiate 563 immagini dalle sequenze.
VALID: Copiate 568 immagini dalle sequenze.
TRAIN: Copiate 1094 immagini dalle sequenze.
TRAIN: Aggiunte 1537 immagini dai centri.
Divisione dei dati completata con conversione delle maschere in formato PNG e organizzazione per centro e sequenza.
