# TSNE - IMAGE/TEXT

In [None]:
import os
import clip
import torch
from PIL import Image
import numpy as np
from sklearn.manifold import TSNE
import umap
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

In [None]:
# 1. Cargar el modelo CLIP
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device=device)

# 2. Configuración de tus imágenes y etiquetas
image_dir = "generated_images/500p_quadruplets_v1" 
num_images = 0
for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
        num_images += 1

labels = []  # Lista para las etiquetas de imágenes
descriptions = []  # Lista para las descripciones (textos)
groups = [] # Lista para los grupos de imagenes

image_features_list = [] # Lista para guardar solo los image features
text_features_list = [] # Lista para guardar solo los text features

# 3. Procesamiento de imágenes y etiquetas
for i, filename in enumerate(os.listdir(image_dir)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):

        image_path = os.path.join(image_dir, filename)

        # Carga la imagen
        try:
            image = Image.open(image_path)
        except Image.UnidentifiedImageError: 
            print(f"No se puede abrir o identificar la imagen: {filename}. Saltando.")
            continue

        # Preprocesa la imagen con CLIP
        image_input = preprocess(image).unsqueeze(0).to(device)

        # Extrae el embedding de la imagen
        with torch.no_grad():
            image_features = model.encode_image(image_input).cpu().numpy()

        # Normaliza el embedding de la imagen
        image_features = image_features / np.linalg.norm(image_features)
        image_features_list.append(image_features.flatten()) # Añade a la lista de image features

        # Obtener el label del archivo. 
        label = filename.split(".")[0] # Usamos el nombre del archivo sin extension como label
        labels.append(label)

        # Crea la descripción basada en tu label (Ajusta esto según tus necesidades)
        description = f"{label}"
        descriptions.append(description)

        # Prepara la entrada de texto con CLIP
        text_input = clip.tokenize([description]).to(device)

        # Extrae el embedding del texto
        with torch.no_grad():
            text_features = model.encode_text(text_input).cpu().numpy()

        # Normaliza el embedding del texto
        text_features = text_features / np.linalg.norm(text_features)
        text_features_list.append(text_features.flatten()) # Añade a la lista de text features

        # Clasifica la imagen en un grupo (Ajusta esto según tu estructura de nombres de archivo)
        if "older" in label.lower():
            group = "older"
        elif "middle" in label.lower():
            group = "middle-aged"
        elif "young" in label.lower():
            group = "young"
        elif "person" in label.lower():
            group = "person"
        else:
            group = "unknown" # Default group if none of the keywords are found
        groups.append(group)

print("Verificación de la clasificación de grupos:")
for i in range(len(labels)):
    print(f"Label: {labels[i]}, Group: {groups[i]}")
print("-" * 40)

image_features_array = np.array(image_features_list)
text_features_array = np.array(text_features_list)

# 4. Reducción de dimensionalidad con t-SNE para IMAGENES
reduction_method = "tsne"

if reduction_method == "umap":
    image_reducer = umap.UMAP(n_components=2, random_state=42)
    reduced_image_features = image_reducer.fit_transform(image_features_array)
    image_title = 'UMAP Visualization of CLIP Image Embeddings'
elif reduction_method == "tsne":
    image_reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, num_images - 1) if num_images > 1 else 1) # Adjust perplexity
    reduced_image_features = image_reducer.fit_transform(image_features_array)
    image_title = 't-SNE Visualization of CLIP Image Embeddings' 
else:
    raise ValueError("Método de reducción no válido. Debe ser 'umap' o 'tsne'.")


# 4. Reducción de dimensionalidad con t-SNE para TEXTOS
reduction_method = "tsne"

if reduction_method == "umap":
    text_reducer = umap.UMAP(n_components=2, random_state=42)
    reduced_text_features = text_reducer.fit_transform(text_features_array)
    text_title = 'UMAP Visualization of CLIP Text Embeddings'
elif reduction_method == "tsne":
    text_reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, num_images - 1) if num_images > 1 else 1) # Adjust perplexity
    reduced_text_features = text_reducer.fit_transform(text_features_array)
    text_title = 't-SNE Visualization of CLIP Text Embeddings' 
else:
    raise ValueError("Método de reducción no válido. Debe ser 'umap' o 'tsne'.")


# 5. Calcular la similaridad coseno entre pares de embeddings (imagenes y textos)
similarity_matrix = cosine_similarity(image_features_array, text_features_array)

# Número de imágenes a comparar
num_comparisons = min(num_images, 8) # Aseguramos que no se compare mas de lo que hay

# Iterar sobre las primeras num_comparisons imágenes
print("\nSimilitud Coseno:")
for i in range(num_comparisons):
    similarity = similarity_matrix[i, i] # Compara la imagen i con el texto i
    print(f"Similaridad coseno entre la imagen {labels[i]} y su descripción: {similarity:.3f}") # Formatted output


print("\nRango del coseno:")
print("El valor del coseno se mueve entre -1 y 1.")
print("-1 indica una similitud negativa perfecta (vectores opuestos).")
print("0 indica que no hay similitud (vectores ortogonales).")
print("1 indica una similitud positiva perfecta (vectores idénticos).")

# 6. Visualización en 2D para IMAGE FEATURES
fig_image, ax_image = plt.subplots(figsize=(12, 10))

# Definir marcadores y colores para los grupos
group_markers = {"older": "o", "middle-aged": "s", "young": "^", "person": "D"} 
group_colors = {"older": 'blue', "middle-aged": 'green', "young": 'red', "person": 'purple'} 


# Colorear imágenes por grupo
for i in range(len(reduced_image_features)):
    group = groups[i]
    label = labels[i]
    color = group_colors[group]
    marker = group_markers[group]

    ax_image.scatter(reduced_image_features[i, 0], reduced_image_features[i, 1], color=color, marker=marker, alpha=0.7, label=f'{group.capitalize()}' if i == 0 or group not in [groups[j] for j in range(i)] else None) # Label only once per group, label simplificado y capitalizado

# Add text annotations for image points 
# for i in range(num_images):
#     ax_image.text(reduced_image_features[i, 0], reduced_image_features[i, 1], labels[i], fontsize=8, alpha=0.7, color='black') # Color for text annotations - set to black

# Create legend for groups (markers and colors)
handles_group_image = []
for group_name, color in group_colors.items():
    marker = group_markers[group_name]
    handles_group_image.append(mlines.Line2D([0], [0], marker=marker, color='w', label=f'{group_name.capitalize()}', markerfacecolor=color, markersize=10))

# Add the group legend - place below label legend or adjust location
ax_image.legend(handles=handles_group_image, title='Embedding Groups', loc='lower right')
ax_image.set_title(image_title) # Usa image_title que ahora es de t-SNE
ax_image.set_xlabel('t-SNE Dimension 1') # Eje X más específico
ax_image.set_ylabel('t-SNE Dimension 2') # Eje Y más específico
ax_image.grid(False) # Quitar la grid
ax_image.set_xticks([]) # Quitar los valores del eje X
ax_image.set_yticks([]) # Quitar los valores del eje Y
plt.tight_layout()

# 6. Visualización en 2D para TEXT FEATURES
fig_text, ax_text = plt.subplots(figsize=(12, 10))

# Colorear textos por grupo
for i in range(len(reduced_text_features)):
    group = groups[i]
    label = labels[i]
    color = group_colors[group]
    marker = group_markers[group]

    ax_text.scatter(reduced_text_features[i, 0], reduced_text_features[i, 1], color=color, marker=marker, alpha=0.7, label=f'{group.capitalize()}' if i == 0 or group not in [groups[j] for j in range(i)] else None)

# Create legend for groups (markers and colors)
handles_group_text = []
for group_name, color in group_colors.items():
    marker = group_markers[group_name]
    handles_group_text.append(mlines.Line2D([0], [0], marker=marker, color='w', label=f'{group_name.capitalize()}', markerfacecolor=color, markersize=10)) 

ax_text.legend(handles=handles_group_text, title='Embedding Groups', loc='lower right')
ax_text.set_title(text_title) 
ax_text.set_xlabel('t-SNE Dimension 1') 
ax_text.set_ylabel('t-SNE Dimension 2')
ax_text.grid(False) 
ax_text.set_xticks([]) 
ax_text.set_yticks([])
plt.tight_layout()
plt.show()

# TSNE - BY GROUP

In [None]:
image_title = "t-SNE Visualization of CLIP Image Embeddings"
text_title  = "t-SNE Visualization of CLIP Text Embeddings"

# -------------------------------------------------------------
# Definir marcadores y colores para los grupos
group_markers = {"older": "o", "middle-aged": "s", "young": "^", "person": "D"}
group_colors  = {"older": 'blue', "middle-aged": 'green', "young": 'red', "person": 'purple'}

# -------------------------------------------------------------
# Función para dibujar el plot t-SNE resaltando un grupo
def plot_tsne_highlighted(features, groups, labels, title, highlight_group, 
                          alpha_normal=0.7, alpha_other=0.1, legend_loc='lower right'):
    """
    Dibuja un scatter plot de t-SNE donde el grupo 'highlight_group'
    se muestra con alpha normal y los demás con mayor transparencia.
    Se conserva el estilo (sin ejes, misma leyenda, etc.).
    """
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Dibujar cada punto: si pertenece al grupo destacado, usar alpha normal,
    # en caso contrario, usar mayor transparencia.
    for i in range(len(features)):
        group = groups[i]
        marker = group_markers[group]
        color  = group_colors[group]
        alpha  = alpha_normal if group == highlight_group else alpha_other
        ax.scatter(features[i, 0], features[i, 1], color=color, marker=marker, alpha=alpha)
    
    # Crear la leyenda con todos los grupos
    handles = []
    for g in group_colors.keys():
        marker = group_markers[g]
        color  = group_colors[g]
        handles.append(mlines.Line2D([0], [0],
                                     marker=marker,
                                     color='w',
                                     label=g.capitalize(),
                                     markerfacecolor=color,
                                     markersize=10))
    ax.legend(handles=handles, title='Embedding Groups', loc=legend_loc)
    
    # Configuración de ejes y título
    ax.set_title(f"{title} - Highlight: {highlight_group.capitalize()}")
    ax.set_xlabel("t-SNE Dimension 1")
    ax.set_ylabel("t-SNE Dimension 2")
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.show()

# -------------------------------------------------------------
# Generar un plot para cada grupo en t-SNE (IMAGENES)
for g in group_colors.keys():
    plot_tsne_highlighted(reduced_image_features, groups, labels, image_title, highlight_group=g)

# -------------------------------------------------------------
# Generar un plot para cada grupo en t-SNE (TEXTOS)
for g in group_colors.keys():
    plot_tsne_highlighted(reduced_text_features, groups, labels, text_title, highlight_group=g)

# UMAP

In [None]:
import os
import clip
import torch
from PIL import Image
import numpy as np
from sklearn.manifold import TSNE
import umap
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

In [None]:
# 1. Cargar el modelo CLIP
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device=device)

# 2. Configuración de tus imágenes y etiquetas
image_dir = "generated_images/500p_quadruplets_v1/"  
num_images = 0
for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
        num_images += 1

labels = []  # Lista para las etiquetas de imágenes
descriptions = []  # Lista para las descripciones (textos)
groups = [] # Lista para los grupos de imagenes

image_features_list = [] # Lista para guardar solo los image features
text_features_list = [] # Lista para guardar solo los text features

# 3. Procesamiento de imágenes y etiquetas
for i, filename in enumerate(os.listdir(image_dir)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):

        image_path = os.path.join(image_dir, filename)

        # Carga la imagen
        try:
            image = Image.open(image_path)
        except Image.UnidentifiedImageError: # Corrected exception type
            print(f"No se puede abrir o identificar la imagen: {filename}. Saltando.")
            continue

        # Preprocesa la imagen con CLIP
        image_input = preprocess(image).unsqueeze(0).to(device)

        # Extrae el embedding de la imagen
        with torch.no_grad():
            image_features = model.encode_image(image_input).cpu().numpy()

        # Normaliza el embedding de la imagen
        image_features = image_features / np.linalg.norm(image_features)
        image_features_list.append(image_features.flatten()) # Añade a la lista de image features

        # Obtener el label del archivo. Esto puede variar segun como tengas los labels de tus archivos
        label = filename.split(".")[0] # Usamos el nombre del archivo sin extension como label
        labels.append(label)

        # Crea la descripción basada en tu label (Ajusta esto según tus necesidades)
        description = f"{label}"
        descriptions.append(description)

        # Prepara la entrada de texto con CLIP
        text_input = clip.tokenize([description]).to(device)

        # Extrae el embedding del texto
        with torch.no_grad():
            text_features = model.encode_text(text_input).cpu().numpy()

        # Normaliza el embedding del texto
        text_features = text_features / np.linalg.norm(text_features)
        text_features_list.append(text_features.flatten()) # Añade a la lista de text features

        # Clasifica la imagen en un grupo (Ajusta esto según tu estructura de nombres de archivo)
        if "older" in label.lower():
            group = "older"
        elif "middle" in label.lower():
            group = "middle-aged"
        elif "young" in label.lower():
            group = "young"
        elif "person" in label.lower():
            group = "person"
        else:
            group = "unknown" # Default group if none of the keywords are found
        groups.append(group)

print("Verificación de la clasificación de grupos:")
for i in range(len(labels)):
    print(f"Label: {labels[i]}, Group: {groups[i]}")
print("-" * 40)

image_features_array = np.array(image_features_list)
text_features_array = np.array(text_features_list)

# 4. Reducción de dimensionalidad con UMAP o t-SNE para IMAGENES
reduction_method = "umap" 

if reduction_method == "umap":
    image_reducer = umap.UMAP(n_components=2, random_state=42)
    reduced_image_features = image_reducer.fit_transform(image_features_array)
    image_title = 'UMAP Visualization of CLIP Image Embeddings'
elif reduction_method == "tsne":
    image_reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, num_images - 1) if num_images > 1 else 1) # Adjust perplexity
    reduced_image_features = image_reducer.fit_transform(image_features_array)
    image_title = 't-SNE Visualization of CLIP Image Embeddings'
else:
    raise ValueError("Método de reducción no válido. Debe ser 'umap' o 'tsne'.")

# 4. Reducción de dimensionalidad con UMAP o t-SNE para TEXTOS
reduction_method = "umap" 

if reduction_method == "umap":
    text_reducer = umap.UMAP(n_components=2, random_state=42)
    reduced_text_features = text_reducer.fit_transform(text_features_array)
    text_title = 'UMAP Visualization of CLIP Text Embeddings'
elif reduction_method == "tsne":
    text_reducer = TSNE(n_components=2, random_state=42, perplexity=min(30, num_images - 1) if num_images > 1 else 1) 
    reduced_text_features = text_reducer.fit_transform(text_features_array)
    text_title = 't-SNE Visualization of CLIP Text Embeddings'
else:
    raise ValueError("Método de reducción no válido. Debe ser 'umap' o 'tsne'.")

# 5. Calcular la similaridad coseno entre pares de embeddings (imagenes y textos)
similarity_matrix = cosine_similarity(image_features_array, text_features_array)

# Número de imágenes a comparar
num_comparisons = min(num_images, 8) 

# Iterar sobre las primeras num_comparisons imágenes
print("\nSimilitud Coseno:")
for i in range(num_comparisons):
    similarity = similarity_matrix[i, i] # Compara la imagen i con el texto i
    print(f"Similaridad coseno entre la imagen {labels[i]} y su descripción: {similarity:.3f}") # Formatted output

print("\nRango del coseno:")
print("El valor del coseno se mueve entre -1 y 1.")
print("-1 indica una similitud negativa perfecta (vectores opuestos).")
print("0 indica que no hay similitud (vectores ortogonales).")
print("1 indica una similitud positiva perfecta (vectores idénticos).")

# 6. Visualización en 2D para IMAGE FEATURES
fig_image, ax_image = plt.subplots(figsize=(12, 10))

# Definir marcadores y colores para los grupos
group_markers = {"older": "o", "middle-aged": "s", "young": "^", "person": "D"} # More markers for groups
group_colors = {"older": 'blue', "middle-aged": 'green', "young": 'red', "person": 'purple'} # Colors for groups

# Colorear imágenes por grupo
for i in range(len(reduced_image_features)):
    group = groups[i]
    label = labels[i]
    color = group_colors[group]
    marker = group_markers[group]

    ax_image.scatter(reduced_image_features[i, 0], reduced_image_features[i, 1], color=color, marker=marker, alpha=0.7, label=f'Image Embeddings ({group})' if i == 0 or group not in [groups[j] for j in range(i)] else None) # Label only once per group

# Add text annotations for image points
# for i in range(num_images):
#     ax_image.text(reduced_image_features[i, 0], reduced_image_features[i, 1], labels[i], fontsize=8, alpha=0.7, color='black') # Color for text annotations - set to black

# Create legend for groups (markers and colors)
handles_group_image = []
for group_name, color in group_colors.items():
    marker = group_markers[group_name]
    handles_group_image.append(plt.Line2D([0], [0], marker=marker, color='w', label=f'Image ({group_name})', markerfacecolor=color, markersize=10))

# Add the group legend - place below label legend or adjust location
ax_image.legend(handles=handles_group_image, title='Image Embedding Groups', loc='lower left') # Adjust location as needed

ax_image.set_title(image_title)
ax_image.set_xlabel('Dimension 1')
ax_image.set_ylabel('Dimension 2')
ax_image.grid(False)
ax_image.set_xticks([])
ax_image.set_yticks([])
plt.tight_layout()

# 6. Visualización en 2D para TEXT FEATURES
fig_text, ax_text = plt.subplots(figsize=(12, 10))

# Definir marcadores y colores para los grupos (reutilizamos los de arriba)
# group_markers = {"older": "o", "middle-aged": "s", "young": "^", "person": "x", "unknown": "d"} # More markers for groups
# group_colors = {"older": 'blue', "middle-aged": 'green', "young": 'red', "person": 'purple', "unknown": 'gray'} # Colors for groups

# Colorear textos por grupo
for i in range(len(reduced_text_features)):
    group = groups[i]
    label = labels[i]
    color = group_colors[group]
    marker = group_markers[group]

    ax_text.scatter(reduced_text_features[i, 0], reduced_text_features[i, 1], color=color, marker=marker, alpha=0.7, label=f'Text Embeddings ({group})' if i == 0 or group not in [groups[j] for j in range(i)] else None) # Label only once per group

# Add text annotations for text points
# for i in range(num_images):
#     ax_text.text(reduced_text_features[i, 0], reduced_text_features[i, 1], labels[i], fontsize=8, alpha=0.7, color='black') # Color for text annotations - set to black

# Create legend for groups (markers and colors)
handles_group_text = []
for group_name, color in group_colors.items():
    marker = group_markers[group_name]
    handles_group_text.append(plt.Line2D([0], [0], marker=marker, color='w', label=f'Text ({group_name})', markerfacecolor=color, markersize=10))

# Add the group legend - place below label legend or adjust location
ax_text.legend(handles=handles_group_text, title='Text Embedding Groups', loc='lower right') # Adjust location as needed
ax_text.set_title(text_title)
ax_text.set_xlabel('Dimension 1')
ax_text.set_ylabel('Dimension 2')
ax_text.set_xticks([])
ax_text.set_yticks([])
ax_text.grid(False)
plt.tight_layout()
plt.show()

# UMAP - BY GROUP

In [None]:
# -------------------------------------------------------------
image_title = "UMAP Visualization of CLIP Image Embeddings"
text_title  = "UMAP Visualization of CLIP Text Embeddings"

# -------------------------------------------------------------
# group_markers = {"older": "o", "middle-aged": "s", "young": "^", "person": "D"}
# group_colors  = {"older": 'blue', "middle-aged": 'green', "young": 'red', "person": 'purple'}

# -------------------------------------------------------------
# Función para dibujar un plot destacando un grupo en 'reduced_features'
def plot_features_highlighted(
    reduced_features,
    groups,
    labels,
    group_markers,
    group_colors,
    title,
    highlight_group,
    alpha_other=0.1
):
    """
    Dibuja un scatter plot donde 'highlight_group' se muestra con alpha=0.7
    y el resto de grupos con alpha=alpha_other. Mantiene misma leyenda.
    """
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Pintar puntos: si pertenecen al grupo destacado -> alpha normal (0.7),
    # si no -> alpha reducido (por defecto 0.1)
    for i in range(len(reduced_features)):
        group = groups[i]
        marker = group_markers[group]
        color  = group_colors[group]
        alpha  = 0.7 if group == highlight_group else alpha_other
        
        ax.scatter(
            reduced_features[i, 0],
            reduced_features[i, 1],
            color=color,
            marker=marker,
            alpha=alpha
        )
    
    handles = []
    for g_name, g_color in group_colors.items():
        g_marker = group_markers[g_name]
        handles.append(
            mlines.Line2D(
                [0], [0],
                marker=g_marker,
                color='w',
                label=f'{g_name}',
                markerfacecolor=g_color,
                markersize=10
            )
        )
    ax.legend(handles=handles, title='Groups', loc='lower left') 
    
    ax.set_title(f"{title} - Highlight: {highlight_group}")
    ax.set_xlabel('Dimension 1')
    ax.set_ylabel('Dimension 2')
    ax.grid(False)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.tight_layout()
    plt.show()

# -------------------------------------------------------------
# Generar un plot por cada grupo para IMAGE FEATURES
for g in group_colors.keys():
    plot_features_highlighted(
        reduced_features=reduced_image_features,
        groups=groups,
        labels=labels,
        group_markers=group_markers,
        group_colors=group_colors,
        title=image_title,
        highlight_group=g,
        alpha_other=0.1
    )

# -------------------------------------------------------------
# Generar un plot por cada grupo para TEXT FEATURES
for g in group_colors.keys():
    plot_features_highlighted(
        reduced_features=reduced_text_features,
        groups=groups,
        labels=labels,
        group_markers=group_markers,
        group_colors=group_colors,
        title=text_title,
        highlight_group=g,
        alpha_other=0.1
    )

# INTERACTIVE UMAP

In [None]:
import os
import clip
import torch
from PIL import Image
import numpy as np
import umap
import re
import plotly.graph_objects as go

# 1. Cargar el modelo CLIP
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device=device)

## IMAGE

In [None]:
image_dir = "generated_images/500p_quadruplets_v1/"  
num_images = 0
for filename in os.listdir(image_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
        num_images += 1

labels = []  # Lista para las etiquetas de imágenes
groups = []  # Lista para los grupos de edad
activities = []  # Lista para las actividades
image_features_list = []  # Lista para guardar los image features

def extract_activity(filename):
    filename_lower = filename.lower()
    
    # Patrón para middle-aged
    middle_aged_pattern = r'middle-aged_person_([a-z_]+)\.'
    match = re.search(middle_aged_pattern, filename_lower)
    
    if match:
        return match.group(1).replace('_', ' ')
    
    # Patrones para otros grupos
    group_patterns = {
        'young_person_': r'young_person_([a-z_]+)\.',
        'older_person_': r'older_person_([a-z_]+)\.',
        'person_': r'person_([a-z_]+)\.'  # Para el grupo "person" sin edad
    }
    
    for pattern in group_patterns.values():
        match = re.search(pattern, filename_lower)
        if match:
            return match.group(1).replace('_', ' ')
    
    return "unknown"

print("Procesando imágenes y extrayendo embeddings...")
for i, filename in enumerate(os.listdir(image_dir)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):

        image_path = os.path.join(image_dir, filename)

        # Carga la imagen
        try:
            image = Image.open(image_path)
        except Image.UnidentifiedImageError:
            print(f"No se puede abrir o identificar la imagen: {filename}. Saltando.")
            continue

        # Preprocesa la imagen con CLIP
        image_input = preprocess(image).unsqueeze(0).to(device)

        # Extrae el embedding de la imagen
        with torch.no_grad():
            image_features = model.encode_image(image_input).cpu().numpy()

        # Normaliza el embedding de la imagen
        image_features = image_features / np.linalg.norm(image_features)
        image_features_list.append(image_features.flatten())

        # Obtener el label del archivo
        label = filename.split(".")[0]  # Usamos el nombre del archivo sin extension como label
        labels.append(label)

        # Obtener la actividad
        activity = extract_activity(filename)
        activities.append(activity)

        label_lower = filename.lower()  # Usamos el nombre del archivo en minúsculas

        if "middle-aged" in label_lower or "middle_aged" in label_lower:
            group = "middle-aged"
        elif "older" in label_lower:
            group = "older"
        elif "middle" in label_lower:
            group = "middle-aged"
        elif "young" in label_lower:
            group = "young"
        elif "person" in label_lower:
            group = "person"
        else:
            group = "unknown"
        groups.append(group)

unique_activities = sorted(list(set(activities)))
print(f"Total de imágenes procesadas: {len(labels)}")
print(f"Actividades detectadas: {len(unique_activities)}")
print(f"Ejemplos de actividades: {', '.join(unique_activities[:5])}")

print("\nEstadísticas por grupo de edad:")
for group_name in group_colors.keys():
    if group_name in groups:
        group_indices = [i for i, g in enumerate(groups) if g == group_name]
        group_activities = [activities[i] for i in group_indices]
        print(f"Grupo '{group_name}': {len(group_indices)} imágenes")
        print(f"  Ejemplos de actividades: {', '.join(sorted(set(group_activities))[:5])}")
        print(f"  Ejemplos de archivos: {', '.join([labels[i] for i in group_indices[:3]])}")

print("Aplicando reducción dimensional con UMAP...")
image_features_array = np.array(image_features_list)
image_reducer = umap.UMAP(n_components=2, random_state=42)
reduced_image_features = image_reducer.fit_transform(image_features_array)

group_colors = {
    "older": 'blue',
    "middle-aged": 'green',
    "young": 'red',
    "person": 'purple',
    "unknown": 'gray'
}

fig = go.Figure()

base_traces = []
for group in group_colors.keys():
    if group not in groups:
        continue

    indices = [i for i, g in enumerate(groups) if g == group]

    trace = go.Scatter(
        x=reduced_image_features[indices, 0],
        y=reduced_image_features[indices, 1],
        mode='markers',
        marker=dict(
            size=10,
            color=group_colors[group],
            opacity=0.7
        ),
        text=[f"Label: {labels[i]}<br>Group: {groups[i]}<br>Activity: {activities[i]}" for i in indices],
        name=f"{group}",
        customdata=[i for i in indices],  
        hoverinfo='text',
        visible=True
    )
    fig.add_trace(trace)
    base_traces.append(trace)

highlight_traces = []
for activity in unique_activities:
    activity_indices = [idx for idx, act in enumerate(activities) if act == activity]

    if activity_indices:
        trace = go.Scatter(
            x=reduced_image_features[activity_indices, 0],
            y=reduced_image_features[activity_indices, 1],
            mode='markers+text',
            marker=dict(
                size=15,
                color='yellow',
                opacity=1,
                line=dict(width=2, color='black')
            ),
            text=[labels[idx] for idx in activity_indices],
            textposition="top center",
            name=f"Highlighted - {activity}",
            visible=False  
        )
        fig.add_trace(trace)
        highlight_traces.append(trace)

buttons = []
button_all = dict(
    args=[{
        'visible': [True] * len(base_traces) + [False] * len(highlight_traces)
    }],
    label="All Activities",
    method="update"
)
buttons.append(button_all)

for i, activity in enumerate(unique_activities):
    visibility = [True] * len(base_traces) 
    highlight_visibility = [False] * len(highlight_traces)  

    highlight_visibility[i] = True

    button = dict(
        args=[{
            'visible': visibility + highlight_visibility
        }],
        label=activity.capitalize(),
        method="update"
    )
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1.3, 
            xanchor="right", 
            y=1.01,
            yanchor="top"
        ),
    ]
)

fig.update_layout(
    title=dict(
        text="<b>Interactive visualization of CLIP (ViT-B/32) Image Embeddings by Activity</b>",
        x=0.4,
        xanchor="center"
    ),
    annotations=[
        dict(
            text="<b>Select an activity:</b>",
            x=1.2,
            y=1.03, 
            xref="paper",
            yref="paper",
            showarrow=False,
            xanchor="right" 
        )
    ]
)

fig.update_layout(
    height=1000,  
    width=1400,   
    margin=dict(t=100, r=200,b=100),  
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.08,  
        xanchor="center",
        x=0.5
    )
)

fig.update_xaxes(showticklabels=False, showgrid=True, zeroline=True)
fig.update_yaxes(showticklabels=False, showgrid=True, zeroline=False)
fig.show()

fig.write_html("500p_image_embeddings_umap_activity_interactive_042225.html")

print("\nSe ha generado la visualización interactiva de Image Embeddings.")
print("Usa el menú desplegable para seleccionar una actividad específica.")

## TEXT

In [None]:
# image_dir = "generated_images/500p_quadruplets_v1" 
# num_images = 0
# for filename in os.listdir(image_dir):
#     if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
#         num_images += 1

labels = []  # Lista para las etiquetas de imágenes
descriptions = []  # Lista para las descripciones (textos)
groups = []  # Lista para los grupos de edad
activities = []  # Lista para las actividades
text_features_list = []  # Lista para guardar los text features

# def extract_activity(filename):
#     filename_lower = filename.lower()
    
#     middle_aged_pattern = r'middle-aged_person_([a-z_]+)\.'
#     match = re.search(middle_aged_pattern, filename_lower)
    
#     if match:
#         return match.group(1).replace('_', ' ')
    
#     group_patterns = {
#         'young_person_': r'young_person_([a-z_]+)\.',
#         'older_person_': r'older_person_([a-z_]+)\.',
#         'person_': r'person_([a-z_]+)\.'  # Para el grupo "person" sin edad
#     }
    
#     for pattern in group_patterns.values():
#         match = re.search(pattern, filename_lower)
#         if match:
#             return match.group(1).replace('_', ' ')
    
#     return "unknown"

# 3. Procesamiento de textos
print("Procesando textos y extrayendo embeddings...")
for i, filename in enumerate(os.listdir(image_dir)):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
        # Obtener el label del archivo
        label = filename.split(".")[0]  # Usamos el nombre del archivo sin extension como label
        labels.append(label)

        # Obtener la actividad
        activity = extract_activity(filename)
        activities.append(activity)

        # Crea la descripción basada en tu label
        description = f"{label}"
        descriptions.append(description)

        # Prepara la entrada de texto con CLIP
        text_input = clip.tokenize([description]).to(device)

        # Extrae el embedding del texto
        with torch.no_grad():
            text_features = model.encode_text(text_input).cpu().numpy()

        # Normaliza el embedding del texto
        text_features = text_features / np.linalg.norm(text_features)
        text_features_list.append(text_features.flatten())

        # Clasificar en un grupo de edad
        label_lower = filename.lower()  # Usamos el nombre del archivo en minúsculas

        if "middle-aged" in label_lower or "middle_aged" in label_lower:
            group = "middle-aged"
        elif "older" in label_lower:
            group = "older"
        elif "middle" in label_lower:
            group = "middle-aged"
        elif "young" in label_lower:
            group = "young"
        elif "person" in label_lower:
            group = "person"
        else:
            group = "unknown"
        groups.append(group)

unique_activities = sorted(list(set(activities)))
print(f"Total de textos procesados: {len(labels)}")
print(f"Actividades detectadas: {len(unique_activities)}")
print(f"Ejemplos de actividades: {', '.join(unique_activities[:5])}")

print("\nEstadísticas por grupo de edad:")
for group_name in group_colors.keys():
    if group_name in groups:
        group_indices = [i for i, g in enumerate(groups) if g == group_name]
        group_activities = [activities[i] for i in group_indices]
        print(f"Grupo '{group_name}': {len(group_indices)} textos")
        print(f"  Ejemplos de actividades: {', '.join(sorted(set(group_activities))[:5])}")
        print(f"  Ejemplos de archivos: {', '.join([labels[i] for i in group_indices[:3]])}")

print("Aplicando reducción dimensional con UMAP...")
text_features_array = np.array(text_features_list)
text_reducer = umap.UMAP(n_components=2, random_state=42)
reduced_text_features = text_reducer.fit_transform(text_features_array)

group_colors = {
    "older": 'blue',
    "middle-aged": 'green',
    "young": 'red',
    "person": 'purple',
    "unknown": 'gray'
}

fig = go.Figure()

base_traces = []
for group in group_colors.keys():
    if group not in groups:
        continue

    indices = [i for i, g in enumerate(groups) if g == group]

    trace = go.Scatter(
        x=reduced_text_features[indices, 0],
        y=reduced_text_features[indices, 1],
        mode='markers',
        marker=dict(
            size=10,
            color=group_colors[group],
            opacity=0.7
        ),
        text=[f"Label: {labels[i]}<br>Group: {groups[i]}<br>Activity: {activities[i]}" for i in indices],
        name=f"{group}",
        customdata=[i for i in indices],  
        hoverinfo='text',
        visible=True
    )
    fig.add_trace(trace)
    base_traces.append(trace)

highlight_traces = []
for activity in unique_activities:
    activity_indices = [idx for idx, act in enumerate(activities) if act == activity]

    # Si tenemos prompts para esta actividad
    if activity_indices:
        trace = go.Scatter(
            x=reduced_text_features[activity_indices, 0],
            y=reduced_text_features[activity_indices, 1],
            mode='markers+text',
            marker=dict(
                size=15,
                color='yellow',
                opacity=1,
                line=dict(width=2, color='black')
            ),
            text=[labels[idx] for idx in activity_indices],
            textposition="top center",
            name=f"Highlighted - {activity}",
            visible=False 
        )
        fig.add_trace(trace)
        highlight_traces.append(trace)

buttons = []

button_all = dict(
    args=[{
        'visible': [True] * len(base_traces) + [False] * len(highlight_traces)
    }],
    label="All Activities",
    method="update"
)
buttons.append(button_all)

for i, activity in enumerate(unique_activities):
    visibility = [True] * len(base_traces)  # Trazas base siempre visibles
    highlight_visibility = [False] * len(highlight_traces)  # Todas las trazas destacadas ocultas por defecto

    highlight_visibility[i] = True

    button = dict(
        args=[{
            'visible': visibility + highlight_visibility
        }],
        label=activity.capitalize(),
        method="update"
    )
    buttons.append(button)

fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=1.3, 
            xanchor="right", 
            y=1.01,
            yanchor="top"
        ),
    ]
)

fig.update_layout(
    title=dict(
        text="<b>Interactive visualization of CLIP (ViT-B/32) Text Embeddings by Activity</b>",
        x=0.4,
        xanchor="center"
    ),
    annotations=[
        dict(
            text="<b>Select an activity:</b>",
            x=1.2, 
            y=1.03, 
            xref="paper",
            yref="paper",
            showarrow=False,
            xanchor="right" 
        )
    ]
)

fig.update_layout(
    height=1000,  
    width=1400,   
    margin=dict(t=100, r=200,b=100),  
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.08,  
        xanchor="center",
        x=0.5
    )
)

fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False)
fig.show()

fig.write_html("500p_text_embeddings_umap_activity_interactive_042225.html")

print("\nSe ha generado la visualización interactiva de Text Embeddings.")
print("Usa el menú desplegable para seleccionar una actividad específica.")