# Preprocesamiento de imagenes en paralelo

### Librerías

In [None]:
import os
import numpy as np
import time
from PIL import Image
from pathlib import Path
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

### Parámetros

In [None]:
BATCH_SIZE = 200
TARGET_SIZE = (224, 224)
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])
INPUT_DIR = "/home/jiu/Documents/School IPN/Sexto Semester/Computo Paralelo/Segundo Dpto/Proyecto - Final de Dpto/ImageNet/ILSVRC/Data/CLS-LOC"
OUTPUT_DIR = "output/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

### Función de preprocesamiento para una imagen

In [None]:
def preprocess_image(img_path):
    try:
        img = Image.open(img_path).convert('RGB')
        img = img.resize(TARGET_SIZE)
        img_array = np.array(img).astype(np.float32) / 255.0
        img_array = (img_array - MEAN) / STD
        img_array = img_array.transpose(2, 0, 1)
        #print("Imagen Procesada")   
        return (img_array, img_path)
    except Exception as e:
        print(f"Error con {img_path}: {e}")
        return None

### Versión paralela

In [None]:
def save_preprocessed_image(arrays_path_pairs):
    for arr, img_path in arrays_path_pairs:
        rel_path = Path(img_path).relative_to(INPUT_DIR)
        new_path = Path(OUTPUT_DIR) / rel_path.with_suffix('.npy')
        new_path.parent.mkdir(parents=True, exist_ok=True)
        np.save(new_path, arr)

### Función para guardar

In [None]:
def save_preprocessed_image(arrays_path_pairs):
    for arr, img_path in arrays_path_pairs:
        rel_path = Path(img_path).relative_to(INPUT_DIR)
        new_path = Path(OUTPUT_DIR) / rel_path.with_suffix('.npy')
        new_path.parent.mkdir(parents=True, exist_ok=True)
        np.save(new_path, arr)

### Cargar rutas

In [None]:
def load_image_paths(directory):
    return list(Path(directory).rglob("*.JPEG"))

### Función principal

In [None]:

if __name__ == "__main__":
    execution_time = 0
    i = 0
    image_paths = load_image_paths(INPUT_DIR)
    total_images = len(image_paths)
    start_time = time.time()
    with Pool(1) as pool:
        for i in range(0, total_images, BATCH_SIZE):
            batch_paths = image_paths[i:i + BATCH_SIZE]
            for result in tqdm(pool.imap_unordered(preprocess_image, batch_paths), total=len(batch_paths), desc=f"Batch {i//BATCH_SIZE + 1}"):
                #if result is not None:
                #    save_preprocessed_image([result])
                pass
    final_time = time.time()
    execution_time = final_time - start_time 

# Medición y análisis comparativo

### Librerías

In [None]:
import time
import psutil
import multiprocessing
import matplotlib.pyplot as plt

### Función secuencial

In [None]:
def funcion_secuencial(imagenes, funcion_preprocesamiento):
    inicio = time.time()
    for img in imágenes:
        funcion_preprocesamiento(img)
    fin = time.time()
    return fin - inicio

### Función paralela

In [None]:
def funcion_paralela(imagenes, funcion_preprocesamiento, num_procesos):
    inicio = time.time()
    with multiprocessing.Pool(num_procesos) as pool:
        pool.map(funcion_preprocesamiento, imágenes)
    fin = time.time()
    return fin - inicio

### Tiempo y medición de CPU secuencial

In [None]:
tiempo_seq = funcion_secuencial(imagenes, preprocess_image)
uso_cpu_seq = medir_uso_cpu(tiempo_seq)

### Tiempo y medición de CPU en paralelo

In [None]:
tiempo_par = funcion_paralela(imagenes, preprocess_image, num_procesos=multiprocessing.cpu_count())
uso_cpu_par = medir_uso_cpu(tiempo_par)

### Medir uso de CPU

In [None]:
def medir_uso_cpu(duracion, intervalo=0.1):
    uso = []
    for _ in range(int(duracion / intervalo)):
        uso.append(psutil.cpu_percent(interval=intervalo))
    return uso

### Speedup

In [None]:
speedup = tiempo_seq / tiempo_par
print(f"Speedup: {speedup:.2f}")

### Visualización de resultados

In [None]:
plt.figure(figsize=(12, 5))

Uso de CPU

In [None]:
plt.subplot(1, 2, 1)
plt.plot(uso_cpu_seq, label="Secuencial")
plt.plot(uso_cpu_par, label="Paralelo")
plt.title("Uso de CPU durante ejecución")
plt.xlabel("Tiempo (intervalos)")
plt.ylabel("Uso de CPU (%)")
plt.legend()

Tiempo total

In [None]:
plt.subplot(1, 2, 2)
plt.bar(["Secuencial", "Paralelo"], [tiempo_seq, tiempo_par])
plt.title("Comparación de tiempo total")
plt.ylabel("Tiempo (s)")

plt.tight_layout()
plt.show()