In [2]:
import cv2
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from PIL import Image
from functools import reduce
import warnings
def compose(*funcs):
    if funcs:
        return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
    else:
        raise ValueError('Composition of empty sequence not supported.')

def letterbox_image(image_pil, target_size_wh, padding_color):
    """
    Resizes an image to a target size while maintaining aspect ratio by adding padding.
    The input image (image_pil) is expected to be a PIL Image.
    The padding_color is an integer for grayscale images.
    """
    iw, ih = image_pil.size
    w_target, h_target = target_size_wh

    if iw == 0 or ih == 0: # Handle empty input image
        return Image.new('L', target_size_wh, padding_color)

    scale = min(w_target/iw, h_target/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    # Ensure new dimensions are at least 1 pixel if scaled down significantly
    nw = max(1, nw)
    nh = max(1, nh)

    resized_image = image_pil.resize((nw,nh), Image.BICUBIC)
    
    new_image = Image.new('L', target_size_wh, padding_color) # 'L' for grayscale
    new_image.paste(resized_image, ((w_target-nw)//2, (h_target-nh)//2))
    return new_image


def convert_to_grayscale(img_bgr):
    if len(img_bgr.shape) == 2: return img_bgr
    if img_bgr.shape[2] == 1: return img_bgr.reshape(img_bgr.shape[0], img_bgr.shape[1])
    # Specific weights for BGR to Grayscale conversion
    b, g, r = cv2.split(img_bgr)
    gray_img = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray_img.astype(np.uint8)

def apply_clahe(img_gray):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(img_gray)

def apply_gaussian_blur(img_gray, kernel_size=(5,5)):
    return cv2.GaussianBlur(img_gray, kernel_size, 0)

def apply_median_filter(img_gray, kernel_size=5):
    return cv2.medianBlur(img_gray, kernel_size)

def cv2_to_pil_grayscale(img_cv2):
    return Image.fromarray(img_cv2, mode='L')

def pil_to_cv2_grayscale(img_pil):
    return np.array(img_pil)

def segment_fundus_and_create_mask(image_cv2_gray, image_name_for_debug=""):
    """
    Segmenta il fondo oculare e restituisce una maschera binaria e il bounding box del fondo.
    Restituisce: (mask, bounding_box) dove bounding_box è (x, y, w, h) o None.
    """
    # Parametri di tuning per la segmentazione
    blur_kernel_size_seg = (15, 15) 
    threshold_value = 30 
    morph_kernel_size_open = (15,15) # Kernel per MORPH_OPEN
    morph_kernel_size_close = (35,35) # Kernel più grande per MORPH_CLOSE per unire regioni
    
    blurred_for_seg = cv2.GaussianBlur(image_cv2_gray, blur_kernel_size_seg, 0)
    
    # Prova cv2.THRESH_OTSU se un valore fisso non è robusto
    # _, thresh_img = cv2.threshold(blurred_for_seg, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    _, thresh_img = cv2.threshold(blurred_for_seg, threshold_value, 255, cv2.THRESH_BINARY)

    kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, morph_kernel_size_open)
    kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, morph_kernel_size_close)
    
    thresh_img = cv2.morphologyEx(thresh_img, cv2.MORPH_OPEN, kernel_open, iterations=1)
    thresh_img = cv2.morphologyEx(thresh_img, cv2.MORPH_CLOSE, kernel_close, iterations=2)

    contours, _ = cv2.findContours(thresh_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    mask = np.zeros_like(image_cv2_gray)
    bounding_box = None

    if contours:
        contours = sorted(contours, key=cv2.contourArea, reverse=True)
        # Considera di filtrare contorni troppo piccoli rispetto all'area dell'immagine
        # min_area_ratio = 0.05 # Esempio: il contorno deve essere almeno il 5% dell'immagine
        # if cv2.contourArea(contours[0]) > image_cv2_gray.shape[0] * image_cv2_gray.shape[1] * min_area_ratio:
        fundus_contour = contours[0]
        hull = cv2.convexHull(fundus_contour)
        cv2.drawContours(mask, [hull], -1, (255), thickness=cv2.FILLED)
        bounding_box = cv2.boundingRect(hull) # Restituisce (x, y, w, h)
        # else:
        #    print(f"Attenzione: Contorno principale troppo piccolo per {image_name_for_debug}. L'immagine risultante potrebbe essere nera.")
    else:
        #print(f"Attenzione: Nessun contorno del fondo trovato per {image_name_for_debug}. L'immagine risultante potrebbe essere nera.")
        pass
    return mask, bounding_box

# --- Parametri Globali ---
FINAL_IMAGE_SIZE = (512, 512)
FUNDUS_TARGET_SCALE_FACTOR = 0.9 # Il fondo occuperà il 90% della dimensione maggiore dell'immagine finale
DEBUG_SAVE_INTERMEDIATE = False # Imposta a True per salvare immagini di debug
DEBUG_OUTPUT_DIR = '/home/jupyter-sdm/GENITO/LAVORO_COMPLETO/Dataset_resize/1_IDRiD_DEBUG/'
if DEBUG_SAVE_INTERMEDIATE:
    os.makedirs(DEBUG_OUTPUT_DIR, exist_ok=True)




In [3]:

warnings.filterwarnings("ignore", "Premature end of JPEG file")

# Percorsi file e cartelle
csv_path = '/home/jupyter-sdm/GENITO/DATASETS/EyePACS/retinopathy_solution.csv'  
image_dir = '/home/jupyter-sdm/GENITO/DATASETS/EyePACS/test7' 
final_output_dir = '/home/jupyter-sdm/GENITO/LAVORO_COMPLETO/Dataset_resize/7_EyePACS'

file_extension = ".jpeg"
column_class_name = 'level'
colum_image_name = 'image'

try:
    df = pd.read_csv(csv_path)
except FileNotFoundError:
    print(f"Errore: File CSV non trovato a {csv_path}")
    exit()

classes = df[column_class_name].unique()
for cls in classes:
    class_output_path = os.path.join(final_output_dir, str(cls))
    os.makedirs(class_output_path, exist_ok=True)

conta_errate = 0
conta_bbox_nulle = 0

for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing", ncols=100, ascii=True):
    image_name_no_ext = row[colum_image_name]
    image_name = image_name_no_ext + file_extension # Verrà aggiornato se si trova .jpeg
    image_class = str(row[column_class_name])
    src_path = os.path.join(image_dir, image_name_no_ext + file_extension) # Prova prima .jpg
    output_class_path = os.path.join(final_output_dir, image_class)

    if not os.path.exists(src_path):
        src_path_jpeg = os.path.join(image_dir, image_name_no_ext + ".jpeg")
        if os.path.exists(src_path_jpeg):
            src_path = src_path_jpeg
            image_name = image_name_no_ext + ".jpeg" # Aggiorna il nome file per l'output
        else:
            conta_errate += 1
            continue
        
    img_bgr = cv2.imread(src_path, cv2.IMREAD_COLOR)
    
    if img_bgr is None:
        conta_errate += 1
        continue
    
    try:
        # 1. Converti in Scala di Grigi
        img_cv2_gray = convert_to_grayscale(img_bgr)

        # 2. Segmenta il fondo, crea la maschera e ottieni il bounding box
        fundus_mask, fundus_bbox = segment_fundus_and_create_mask(img_cv2_gray.copy(), image_name)

        if DEBUG_SAVE_INTERMEDIATE:
            cv2.imwrite(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_0_gray.png"), img_cv2_gray)
            cv2.imwrite(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_1_mask.png"), fundus_mask)

        # 3. Applica CLAHE, Gaussian Blur, Median Filter all'immagine in scala di grigi
        img_clahe = apply_clahe(img_cv2_gray)
        img_gaussian_blurred = apply_gaussian_blur(img_clahe)
        img_median_filtered = apply_median_filter(img_gaussian_blurred)
        fully_processed_gray_data = img_median_filtered

        if DEBUG_SAVE_INTERMEDIATE:
             cv2.imwrite(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_2_fully_processed_gray.png"), fully_processed_gray_data)

        # 4. Applica la maschera per annerire lo sfondo dell'immagine processata
        masked_processed_fundus_cv2 = cv2.bitwise_and(fully_processed_gray_data, fully_processed_gray_data, mask=fundus_mask)

        if DEBUG_SAVE_INTERMEDIATE:
            cv2.imwrite(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_3_masked_fundus.png"), masked_processed_fundus_cv2)

        # --- Normalizzazione della Dimensione del Fondo ---
        if fundus_bbox:
            x, y, w_bbox, h_bbox = fundus_bbox
            if w_bbox > 0 and h_bbox > 0:
                # Ritaglia il fondo mascherato usando il bounding box
                cropped_fundus_cv2 = masked_processed_fundus_cv2[y:y+h_bbox, x:x+w_bbox]

                if DEBUG_SAVE_INTERMEDIATE:
                    cv2.imwrite(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_4_cropped_fundus.png"), cropped_fundus_cv2)

                # Calcola le nuove dimensioni per il fondo ritagliato
                target_max_dim_px = int(max(FINAL_IMAGE_SIZE) * FUNDUS_TARGET_SCALE_FACTOR)
                
                current_max_dim_bbox = max(w_bbox, h_bbox)
                scale_ratio = target_max_dim_px / current_max_dim_bbox if current_max_dim_bbox > 0 else 1
                
                new_w = int(w_bbox * scale_ratio)
                new_h = int(h_bbox * scale_ratio)
                
                # Assicura che le nuove dimensioni siano almeno 1x1
                new_w = max(1, new_w)
                new_h = max(1, new_h)

                interpolation = cv2.INTER_AREA if scale_ratio < 1 else cv2.INTER_CUBIC
                resized_cropped_fundus_cv2 = cv2.resize(cropped_fundus_cv2, (new_w, new_h), interpolation=interpolation)
                
                # Converti il fondo normalizzato e ridimensionato in PIL
                image_to_letterbox_pil = cv2_to_pil_grayscale(resized_cropped_fundus_cv2)

                if DEBUG_SAVE_INTERMEDIATE:
                    image_to_letterbox_pil.save(os.path.join(DEBUG_OUTPUT_DIR, f"{image_name_no_ext}_5_resized_cropped_fundus.png"))
            else:
                # Bounding box non valido (es. w o h = 0)
                image_to_letterbox_pil = Image.new('L', (1,1), 0) # Immagine placeholder piccola da letterboxare
                conta_bbox_nulle +=1
        else:
            # Nessun bounding box trovato (nessun contorno)
            image_to_letterbox_pil = Image.new('L', (1,1), 0) # Immagine placeholder
            conta_bbox_nulle +=1
            
        # 5. Letterbox: inserisce l'immagine (ora il fondo normalizzato) in un canvas 512x512
        #    Il colore di padding è 0 (nero) perché lo sfondo del fondo è già nero.
        letterboxed_img_pil = letterbox_image(image_to_letterbox_pil, FINAL_IMAGE_SIZE, padding_color=0) 
        
        # 6. Converti di nuovo in NumPy array (CV2) per il salvataggio
        final_img_to_save = pil_to_cv2_grayscale(letterboxed_img_pil)
        
        # 7. Salva l'immagine elaborata
        output_filename = os.path.splitext(image_name)[0] + '.png'
        output_path = os.path.join(output_class_path, output_filename)
        cv2.imwrite(output_path, final_img_to_save)
        
    except Exception as e:
        import traceback
        conta_errate += 1
        continue

print(f"Numero di immagini non lette o con errori di processing: {conta_errate}")
print(f"Numero di immagini con bounding box del fondo nullo o non valido: {conta_bbox_nulle}")
print("Processo di suddivisione e preprocessing completato!")

Processing:   2%|7                                            | 927/53576 [01:39<1:24:13, 10.42it/s]Premature end of JPEG file
Processing:   2%|7                                           | 945/53576 [02:11<12:06:09,  1.21it/s]Premature end of JPEG file
Processing:   2%|7                                           | 962/53576 [02:38<28:12:05,  1.93s/it]Premature end of JPEG file
Processing:   2%|7                                           | 964/53576 [02:43<33:33:44,  2.30s/it]Premature end of JPEG file
Processing:   2%|7                                           | 965/53576 [02:46<35:20:43,  2.42s/it]Premature end of JPEG file
Processing:   2%|7                                           | 966/53576 [02:48<36:55:30,  2.53s/it]Premature end of JPEG file
Processing:   2%|7                                           | 968/53576 [02:53<34:19:51,  2.35s/it]Premature end of JPEG file
Processing:   2%|7                                           | 970/53576 [02:57<33:27:21,  2.29s/it]Premature e

Numero di immagini non lette o con errori di processing: 50638
Numero di immagini con bounding box del fondo nullo o non valido: 3
Processo di suddivisione e preprocessing completato!



