In [None]:
# Collegamento a Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
######################################
###### DIVISIONE RANDOM DATASET#######
######################################

# 563 immagini in TEST (15%): SEQUENZE 2,12,13 =563
# 568 immagini in VALID (15%): SEQUENZE 11,14,6 =568
#SEQUENZE RIMASTE: 18,5,8,10,15,3,1,4,22,16,7,20,21,9,19,23,17 ---> RIMANGONO 1094 (SEQUENZE) + 1537 (IMMAGINI CENTRI)= 2631-->TEST(30%)


In [None]:
import os
from PIL import Image
import numpy as np

def split_data_with_centers():
    main_folder = '/content/drive/MyDrive/CHALLENGE/PolypGen-EIM-24-25'
    output_folder = '/content/drive/MyDrive/CHALLENGE/PolypGen-EIM-24-25_SPLIT_No_organize'

    test_sequences = ['seq2', 'seq12', 'seq13']
    valid_sequences = ['seq11', 'seq14', 'seq6']
    train_sequences = ['seq3', 'seq1', 'seq4', 'seq22', 'seq16', 'seq7', 'seq20', 'seq21', 'seq9', 'seq19', 'seq23', 'seq17','seq18', 'seq5', 'seq8','seq10', 'seq15']
    center_folders = [f'data_C{i}' for i in range(1, 7)]

    #funzione per creare le immagini con dimensione 256X256 con le relative maschere binarizzate
    def copy_files(src_image_path, src_mask_path, dest_image_path, dest_mask_path):
        # Creare le directory di destinazione se non esistono
        os.makedirs(os.path.dirname(dest_image_path), exist_ok=True)
        os.makedirs(os.path.dirname(dest_mask_path), exist_ok=True)

        if os.path.exists(src_image_path) and os.path.exists(src_mask_path):
            try:
                # Ridimensionare e salvare l'immagine originale
                with Image.open(src_image_path) as img:
                    new_size = (256, 256)
                    # Ridimensiona l'immagine
                    img_resized = img.resize(new_size)
                    img_resized.save(dest_image_path.replace('jpg','png'))

                # Convertire, binarizzare, ridimensionare e salvare la maschera
                with Image.open(src_mask_path) as mask:
                    mask_array = np.array(mask.convert('L'))  # Scala di grigi
                    binary_mask_array = np.where(mask_array > 127, 1, 0).astype(np.uint8)
                    binary_mask = Image.fromarray(binary_mask_array)
                    new_size = (256, 256)
                    # Ridimensiona l'immagine
                    mask_resized = binary_mask.resize(new_size)

                    # Salvare la maschera in formato PNG
                    dest_mask_path = dest_mask_path.replace('.jpg', '.png').replace('.jpeg', '.png').replace('.png', '.png')
                    mask_resized.save(dest_mask_path, format='PNG')
            except Exception as e:
                print(f"Errore nella conversione della maschera: {src_mask_path}. Dettagli: {e}")
        else:
            print(f"File non trovato: {src_image_path} o {src_mask_path}")

    #blocco di codice per eseguire le funzioni su tutte le immagini delle sequenze
    def process_sequences(sequences, split):
        count_images = 0
        for seq in sequences:
            image_folder = os.path.join(main_folder, 'sequenceData', seq, f'images_{seq}')
            mask_folder = os.path.join(main_folder, 'sequenceData', seq, f'masks_{seq}')
            if not os.path.exists(image_folder) or not os.path.exists(mask_folder):
                print(f"Cartelle mancanti per {seq}.")
                continue

            for image_name in os.listdir(image_folder):
                if image_name.endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_folder, image_name)
                    mask_name = image_name.replace('.jpg', '_mask.jpg').replace('.jpeg', '_mask.jpeg').replace('.png', '_mask.png')
                    mask_path = os.path.join(mask_folder, mask_name)

                    dest_image_path = os.path.join(output_folder, split, 'images', image_name)
                    dest_mask_name = image_name.replace('.jpg', '.png').replace('.jpeg', '.png').replace('.png', '.png')
                    dest_mask_path = os.path.join(output_folder, split, 'masks', dest_mask_name)

                    copy_files(image_path, mask_path, dest_image_path, dest_mask_path)
                    count_images += 1
        print(f"{split}: Copiate {count_images} immagini dalle sequenze.")

    # il blocco seguente aggiunge le immagini dei centri al training set
    def add_center_images_to_train():
        count_images = 0
        for center in center_folders:
            center_id = center[-2:]
            image_folder = os.path.join(main_folder, center, f'images_{center_id}')
            mask_folder = os.path.join(main_folder, center, f'masks_{center_id}')
            if not os.path.exists(image_folder) or not os.path.exists(mask_folder):
                print(f"Cartelle mancanti per {center}.")
                continue

            for image_name in os.listdir(image_folder):
                if image_name.endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(image_folder, image_name)
                    mask_name = image_name.replace('.jpg', '_mask.jpg').replace('.jpeg', '_mask.jpeg').replace('.png', '_mask.png')
                    mask_path = os.path.join(mask_folder, mask_name)

                    dest_image_path = os.path.join(output_folder, 'TRAIN', 'images', image_name)
                    dest_mask_name = image_name.replace('.jpg', '.png').replace('.jpeg', '.png').replace('.png', '.png')
                    dest_mask_path = os.path.join(output_folder, 'TRAIN', 'masks', dest_mask_name)

                    copy_files(image_path, mask_path, dest_image_path, dest_mask_path)
                    count_images += 1
        print(f"TRAIN: Aggiunte {count_images} immagini dai centri.")

    # Esegui i processi
    process_sequences(test_sequences, 'TEST')
    process_sequences(valid_sequences, 'VALID')
    process_sequences(train_sequences, 'TRAIN')
    add_center_images_to_train()

    print("Divisione dei dati completata con conversione delle maschere in formato PNG, binarizzazione e ridimensionamento a 256x256.")

# Esegui lo script
split_data_with_centers()


TEST: Copiate 563 immagini dalle sequenze.
VALID: Copiate 568 immagini dalle sequenze.
TRAIN: Copiate 1094 immagini dalle sequenze.
TRAIN: Aggiunte 1537 immagini dai centri.
Divisione dei dati completata con conversione delle maschere in formato PNG, binarizzazione e ridimensionamento a 256x256.
