In [1]:
import os
import cv2
import numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import csv

In [26]:
#Filtro
def apply_clahe_rgb(img):
    # convertir a LAB y aplicar CLAHE en L
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l2 = clahe.apply(l)
    lab2 = cv2.merge((l2, a, b))
    return cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)

def process_image(args):
    input_path, output_path = args
    try:
        img = cv2.imread(input_path)
        if img is None:
            return (os.path.basename(input_path), False, "No se pudo cargar la imagen")
        
        img_filtered = apply_clahe_rgb(img)
        
        img_resized = cv2.resize(img_filtered, (100,100), interpolation=cv2.INTER_AREA)
        compression = [cv2.IMWRITE_PNG_COMPRESSION, 9]
        cv2.imwrite(output_path, img_resized, compression)

        return (os.path.basename(input_path), True)
    
    except Exception as e:
        return (os.path.basename(input_path), False, str(e))
    
def process_imgs_parallel(input_path, output_path, n_workers=None):
    Path(output_path).mkdir(parents=True, exist_ok=True)
    img_files = list(Path(input_path).rglob('*.png'))
    
    folders = {}
    for img in img_files:
        folder = img.parent.name
        folders[folder] = folders.get(folder, 0) + 1

    args_list = []
    for img_path in img_files:
        relative_path = img_path.relative_to(input_path)
        output_dir = Path(output_path) / relative_path
        output_dir.parent.mkdir(parents=True, exist_ok=True)

        args_list.append((str(img_path),
                        str(output_dir)))
        
    if(n_workers==None):
        n_workers = min(32, (os.cpu_count() or 4) * 4)
    
    total = len(args_list)
    success = 0
    failed = 0
    with ThreadPoolExecutor(max_workers=n_workers) as executor:
        print(f"Procesando {str(total)} imagenes")
        with tqdm(total=total, desc='Progreso', unit="img") as bar:
            for result in executor.map(process_image, args_list):
                if result[1]:
                    success += 1
                else:
                    failed += 1
                    print(result[2])
                bar.update(1)
    print(f"Exito: {success}")
    print(f"Error: {failed}")
    

    

In [28]:
process_imgs_parallel('COVID-19_Radiography_Dataset', 'Imgs')

Procesando 21165 imagenes


Progreso: 100%|██████████| 21165/21165 [00:21<00:00, 978.82img/s] 

Exito: 21165
Error: 0





In [2]:
def imgs_to_csv(file_name, imgs_dir):
    img_files = list(Path(imgs_dir).rglob('*.png'))
    header = True
    with open(file_name, 'w', newline='') as f:
        writer = csv.writer(f)

        for img_path in img_files:
            img = cv2.imread(img_path)
            img_row = img.flatten()

            if header:
                pixels = len(img_row)
                headers = ['target'] + [f'pixel{j}' for j in range(pixels)]
                writer.writerow(headers)
                header = False
            
            img_name = os.path.basename(img_path)
            target = img_name.split('-')[0]

            row = [target] + img_row.tolist()
            writer.writerow(row)
    print("Set de datos creado")


In [4]:
imgs_to_csv('COVID-19_DF.csv', 'Imgs')

Set de datos creado
