In [1]:
# Configuración inicial: Conexión a MinIO
import boto3
import os
from dotenv import load_dotenv

load_dotenv()
access_key_id = os.getenv("ACCESS_KEY_ID")
secret_access_key = os.getenv("SECRET_ACCESS_KEY")
s3_endpoint = os.getenv("S3_API_ENDPOINT", "localhost:9000")

# Si el endpoint contiene "minio:" (nombre de servicio Docker), reemplazarlo por localhost
if s3_endpoint.startswith("minio:"):
    s3_endpoint = s3_endpoint.replace("minio:", "localhost:")

minio_url = "http://" + s3_endpoint
print(f"Conectando a MinIO en: {minio_url}")

minio_client = boto3.client(
    "s3",
    aws_access_key_id=access_key_id,
    aws_secret_access_key=secret_access_key,
    endpoint_url=minio_url
)

minio_bucket = "training-preparation-zone"


Conectando a MinIO en: http://localhost:9000


In [2]:
# Cargar datasets locales
import pandas as pd
import json

# Cargar dataset_train.json
with open("./dataset_train.json", 'r') as f:
    df_train = pd.read_json(f)

# Cargar dataset_train_augmented.json
with open("./dataset_train_augmented.json", 'r') as f:
    df_train_augmented = pd.read_json(f)

print(f"Dataset train: {len(df_train)} entradas")
print(f"Dataset train augmented: {len(df_train_augmented)} entradas")
print(f"\nPrimeras filas del dataset train:")
print(df_train.head())


Dataset train: 664 entradas
Dataset train augmented: 1992 entradas

Primeras filas del dataset train:
                                               image  \
0  images/ISIC_0025899_rotated_-13_contrast_1.06.png   
1  images/ISIC_0026803_rotated_-2_contrast_1.19_f...   
2  images/ISIC_0026803_rotated_-2_contrast_1.19_f...   
3  images/ISIC_0029577_brightness_0.87_contrast_0...   
4                            images/ISIC_0031981.png   

                            text     score  
0  texts/actinic_keratosis_0.txt  9934.281  
1  texts/actinic_keratosis_1.txt  9933.505  
2  texts/actinic_keratosis_2.txt  9932.875  
3  texts/actinic_keratosis_3.txt  9935.860  
4  texts/actinic_keratosis_4.txt  9969.258  


In [3]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from torch.optim import AdamW
from tqdm import tqdm
import numpy as np

# Hiperparámetros
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_ID = "openai/clip-vit-base-patch32"
MODEL_LARGE_ID = "openai/clip-vit-large-patch14"
BATCH_SIZE = 8
LEARNING_RATE = 5e-6
EPOCHS = 3

print(f"Dispositivo: {DEVICE}")
print(f"Modelo base: {MODEL_ID}")
print(f"Modelo potente: {MODEL_LARGE_ID}")


  from .autonotebook import tqdm as notebook_tqdm


Dispositivo: cpu
Modelo base: openai/clip-vit-base-patch32
Modelo potente: openai/clip-vit-large-patch14


## Dataset Class


In [4]:
import io

class SkinLesionDataset(Dataset):
    def __init__(self, dataframe, processor, minio_client, bucket_name):
        self.df = dataframe
        self.processor = processor
        self.minio_client = minio_client
        self.bucket_name = bucket_name

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_key = self.df.iloc[idx]['image']
        txt_key = self.df.iloc[idx]['text']

        # Intentar cargar imagen del bucket principal
        try:
            img_response = self.minio_client.get_object(Bucket=self.bucket_name, Key=img_key)
            img_bytes = img_response['Body'].read()
        except Exception as e:
            # Si no está en el bucket principal, buscar en otros buckets
            img_bytes = None
            for bucket in ["augmentation-zone", "exploitation-zone"]:
                try:
                    img_response = self.minio_client.get_object(Bucket=bucket, Key=img_key)
                    img_bytes = img_response['Body'].read()
                    break
                except Exception:
                    continue
            
            if img_bytes is None:
                raise FileNotFoundError(f"Imagen no encontrada: {img_key}")
        
        image = Image.open(io.BytesIO(img_bytes)).convert("RGB")

        # Intentar cargar texto del bucket principal
        try:
            txt_response = self.minio_client.get_object(Bucket=self.bucket_name, Key=txt_key)
            description = txt_response['Body'].read().decode('utf-8').strip()
        except Exception as e:
            # Si no está en el bucket principal, buscar en otros buckets
            description = None
            for bucket in ["augmentation-zone", "exploitation-zone"]:
                try:
                    txt_response = self.minio_client.get_object(Bucket=bucket, Key=txt_key)
                    description = txt_response['Body'].read().decode('utf-8').strip()
                    break
                except Exception:
                    continue
            
            if description is None:
                raise FileNotFoundError(f"Texto no encontrado: {txt_key}")

        inputs = self.processor(
            text=[description], 
            images=image, 
            return_tensors="pt", 
            padding="max_length", 
            truncation=True
        )
        
        return {k: v.squeeze(0) for k, v in inputs.items()}


## TRAIN BASE MODEL CLIP


In [5]:
# Inicializar modelo CLIP base
print("Cargando modelo CLIP base...")
model_base = CLIPModel.from_pretrained(MODEL_ID).to(DEVICE)
processor_base = CLIPProcessor.from_pretrained(MODEL_ID)
print("Modelo CLIP base cargado")

# Crear dataset y dataloader con el dataset aumentado
print(f"\nCreando dataset con {len(df_train_augmented)} entradas...")
dataset_train = SkinLesionDataset(df_train_augmented, processor_base, minio_client, minio_bucket)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
optimizer = AdamW(model_base.parameters(), lr=LEARNING_RATE, weight_decay=0.1)

print(f"Dataset creado: {len(dataset_train)} muestras")
print(f"Dataloader creado: {len(dataloader_train)} batches")


Cargando modelo CLIP base...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Modelo CLIP base cargado

Creando dataset con 1992 entradas...
Dataset creado: 1992 muestras
Dataloader creado: 249 batches


In [8]:
# Entrenamiento del modelo CLIP base
loss_history = []
model_base.train()

print("Iniciando entrenamiento...")
for epoch in range(EPOCHS):
    pbar = tqdm(dataloader_train, desc=f"Epoch {epoch+1}/{EPOCHS}")
    epoch_loss = 0
    
    for batch in pbar:
        optimizer.zero_grad()
        batch = {k: v.to(DEVICE) for k, v in batch.items()}
        
        outputs = model_base(
            input_ids=batch['input_ids'],
            pixel_values=batch['pixel_values'],
            attention_mask=batch['attention_mask'],
            return_loss=True
        )
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        pbar.set_postfix({"loss": loss.item()})
    
    avg_loss = epoch_loss / len(dataloader_train)
    loss_history.append(avg_loss)
    print(f"Epoch {epoch+1} completada - Pérdida promedio: {avg_loss:.4f}")

print("\n Entrenamiento completado!")
print(f"Pérdidas por época: {loss_history}")


Iniciando entrenamiento...


Epoch 1/3:   0%|          | 0/249 [00:00<?, ?it/s]


FileNotFoundError: Imagen no encontrada: images/ISIC_0025899_brightness_1.12_flipped.png

## Cargar Modelo CLIP Potente para Comparación


In [None]:
# Cargar modelo CLIP potente (sin entrenar)
print("Cargando modelo CLIP potente...")
model_large = CLIPModel.from_pretrained(MODEL_LARGE_ID).to(DEVICE)
processor_large = CLIPProcessor.from_pretrained(MODEL_LARGE_ID)
print("✓ Modelo CLIP potente cargado")


In [None]:
# Función para evaluar modelos
@torch.no_grad()
def evaluate_model(model, processor, dataframe, minio_client, bucket_name, device, model_name):
    """Evalúa un modelo CLIP y retorna métricas"""
    model.eval()
    
    # Crear dataset y dataloader
    dataset = SkinLesionDataset(dataframe, processor, minio_client, bucket_name)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    all_image_embeds = []
    all_text_embeds = []
    
    print(f"Evaluando {model_name}...")
    for batch in tqdm(dataloader, desc=f"Procesando {model_name}"):
        batch = {k: v.to(device) for k, v in batch.items()}
        img_emb = model.get_image_features(pixel_values=batch['pixel_values'])
        txt_emb = model.get_text_features(
            input_ids=batch['input_ids'], 
            attention_mask=batch['attention_mask']
        )
        
        all_image_embeds.append(F.normalize(img_emb, dim=-1))
        all_text_embeds.append(F.normalize(txt_emb, dim=-1))
    
    image_embeds = torch.cat(all_image_embeds)
    text_embeds = torch.cat(all_text_embeds)
    
    # Text-to-Image Retrieval
    sim_matrix = text_embeds @ image_embeds.T
    num_queries = sim_matrix.size(0)
    ranks = []
    
    for i in range(num_queries):
        sorted_indices = torch.argsort(sim_matrix[i], descending=True)
        rank_idx = torch.where(sorted_indices == i)[0]
        if len(rank_idx) > 0:
            rank = rank_idx[0].item() + 1
        else:
            rank = len(sorted_indices) + 1
        ranks.append(rank)
    
    ranks = np.array(ranks)
    
    # Calcular métricas
    metrics = {
        "Recall@1": np.mean(ranks <= 1),
        "Recall@5": np.mean(ranks <= 5),
        "Recall@10": np.mean(ranks <= 10),
        "Mean Rank": np.mean(ranks),
        "Median Rank": np.median(ranks),
        "MRR": np.mean(1.0 / ranks),
        "NDCG": np.mean([1.0 / np.log2(r + 1) for r in ranks]),
    }
    
    return metrics


In [None]:
# Evaluar modelo base entrenado
print("=" * 60)
print("EVALUANDO MODELO BASE ENTRENADO")
print("=" * 60)
metrics_base = evaluate_model(
    model_base, processor_base, df_train, 
    minio_client, minio_bucket, DEVICE, "CLIP Base (Entrenado)"
)

print("\nMétricas del modelo base entrenado:")
for key, value in metrics_base.items():
    print(f"  {key}: {value:.4f}")


In [None]:
# Evaluar modelo potente (sin entrenar)
print("\n" + "=" * 60)
print("EVALUANDO MODELO CLIP POTENTE (SIN ENTRENAR)")
print("=" * 60)
metrics_large = evaluate_model(
    model_large, processor_large, df_train,
    minio_client, minio_bucket, DEVICE, "CLIP Large (Sin entrenar)"
)

print("\nMétricas del modelo CLIP potente:")
for key, value in metrics_large.items():
    print(f"  {key}: {value:.4f}")


## Comparación de Resultados


In [1]:
# Comparación de resultados
print("\n" + "=" * 60)
print("COMPARACIÓN DE MODELOS")
print("=" * 60)

comparison_df = pd.DataFrame({
    "Métrica": list(metrics_base.keys()),
    "CLIP Base (Entrenado)": [metrics_base[k] for k in metrics_base.keys()],
    "CLIP Large (Sin entrenar)": [metrics_large[k] for k in metrics_large.keys()],
})

# Calcular diferencia
comparison_df["Diferencia"] = comparison_df["CLIP Base (Entrenado)"] - comparison_df["CLIP Large (Sin entrenar)"]
comparison_df["Mejor"] = comparison_df.apply(
    lambda row: "Base" if row["Diferencia"] > 0 else "Large",
    axis=1
)

print("\nTabla comparativa:")
print(comparison_df.to_string(index=False))

# Resumen
print("\n" + "=" * 60)
print("RESUMEN")
print("=" * 60)
base_wins = (comparison_df["Diferencia"] > 0).sum()
large_wins = (comparison_df["Diferencia"] < 0).sum()

print(f"\nMétricas donde CLIP Base (Entrenado) es mejor: {base_wins}")
print(f"Métricas donde CLIP Large (Sin entrenar) es mejor: {large_wins}")

if base_wins > large_wins:
    print("\n✓ El modelo CLIP Base entrenado supera al modelo CLIP Large en la mayoría de métricas!")
elif large_wins > base_wins:
    print("\n⚠ El modelo CLIP Large sin entrenar supera al modelo base entrenado.")
else:
    print("\n≈ Los modelos tienen un rendimiento similar.")



COMPARACIÓN DE MODELOS


NameError: name 'pd' is not defined