# Coverage and Entropy Analysis by Participant

Este notebook permite visualizar la evolución del coverage o entropy a lo largo del tiempo del experimento.

## Características:
- Eje X: Tiempo en minutos
- Eje Y: Coverage o Entropy
- Cada imagen: 15s visualización + 5s intervalo = 20s total
- Scatterplot con matplotlib

In [None]:
!pip install matplotlib

In [None]:
!pip install seaborn

In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import entropy
import cv2
import json

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

In [31]:
# Cargar datos
df = pd.read_csv('df_todo_con_clase_ratio.csv')

with open('data_hololens.json', 'r') as f:
    hololens_data = json.load(f)

print(f'Total registros: {len(df)}')
print(f'Participantes: {df["participante"].nunique()}')
print(f'Imágenes: {df["ImageName"].nunique()}')

Total registros: 869600
Participantes: 30
Imágenes: 150


In [32]:
def calculate_coverage(heatmap, threshold_method='otsu'):
    """Calcula coverage usando binarización de Otsu."""
    if heatmap is None or heatmap.size == 0:
        return 0.0
    
    # Normalizar a 0-255
    heatmap_norm = ((heatmap - heatmap.min()) / (heatmap.max() - heatmap.min() + 1e-10) * 255).astype(np.uint8)
    
    if threshold_method == 'otsu':
        _, binary_map = cv2.threshold(heatmap_norm, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    else:
        threshold = np.percentile(heatmap_norm, 80)
        binary_map = (heatmap_norm > threshold).astype(np.uint8) * 255
    
    coverage = (np.sum(binary_map > 0) / binary_map.size) * 100
    return coverage

def calculate_entropy_from_heatmap(heatmap):
    """Calcula entropía de Shannon."""
    if heatmap is None or heatmap.size == 0:
        return 0.0
    
    heatmap_flat = heatmap.flatten() + 1e-10
    heatmap_prob = heatmap_flat / heatmap_flat.sum()
    return entropy(heatmap_prob, base=2)

def create_heatmap_from_points(points, image_width=800, image_height=600, grid_size=50):
    """Crea heatmap 2D desde puntos de gaze."""
    x_bins = np.linspace(0, image_width, grid_size)
    y_bins = np.linspace(0, image_height, grid_size)
    
    heatmap, _, _ = np.histogram2d(
        points['pixelX'],
        points['pixelY'],
        bins=[x_bins, y_bins]
    )
    return heatmap

In [33]:
def get_participant_image_sequence(participant_id, hololens_data):
    """Obtiene secuencia de imágenes con tiempos."""
    participant_data = None
    for p in hololens_data.get('participants', []):
        if p.get('id') == participant_id:
            participant_data = p
            break
    
    if not participant_data:
        return []
    
    images = participant_data.get('images', [])
    TIME_PER_IMAGE = 15
    TIME_BETWEEN_IMAGES = 5
    TOTAL_TIME = TIME_PER_IMAGE + TIME_BETWEEN_IMAGES
    
    sequence = []
    for idx, img in enumerate(images):
        start_time_minutes = (idx * TOTAL_TIME) / 60.0
        sequence.append({
            'ImageName': img.get('imageName'),
            'time_minutes': start_time_minutes,
            'index': idx,
            'score': img.get('score', None)
        })
    return sequence

In [34]:
def calculate_metrics_for_participant(participant_id, df, hololens_data, metric='coverage'):
    """Calcula coverage o entropy para todas las imágenes de un participante."""
    image_sequence = get_participant_image_sequence(participant_id, hololens_data)
    
    if not image_sequence:
        return []
    
    results = []
    for img_info in image_sequence:
        image_name = img_info['ImageName']
        image_data = df[
            (df['participante'] == participant_id) &
            (df['ImageName'] == image_name)
        ]
        
        if len(image_data) == 0:
            results.append({
                'ImageName': image_name,
                'time_minutes': img_info['time_minutes'],
                'index': img_info['index'],
                'score': img_info['score'],
                'value': 0.0,
                'has_data': False
            })
            continue
        
        heatmap = create_heatmap_from_points(image_data)
        
        if metric == 'coverage':
            value = calculate_coverage(heatmap)
        else:
            value = calculate_entropy_from_heatmap(heatmap)
        
        results.append({
            'ImageName': image_name,
            'time_minutes': img_info['time_minutes'],
            'index': img_info['index'],
            'score': img_info['score'],
            'value': value,
            'has_data': True
        })
    
    return results

In [35]:
def plot_participant_metrics(participant_id, df, hololens_data, metric='coverage', figsize=(16, 6)):
    """Crea scatterplot de coverage o entropy a lo largo del tiempo."""
    print(f'Calculando {metric} para participante {participant_id}...')
    results = calculate_metrics_for_participant(participant_id, df, hololens_data, metric)
    
    if not results:
        print(f'No hay datos para participante {participant_id}')
        return
    
    results_df = pd.DataFrame(results)
    results_with_data = results_df[results_df['has_data']]
    
    print(f'Total imágenes: {len(results_df)}, con datos: {len(results_with_data)}')
    
    fig, ax = plt.subplots(figsize=figsize)
    
    scatter = ax.scatter(
        results_with_data['time_minutes'],
        results_with_data['value'],
        c=results_with_data['value'],
        cmap='Blues',
        s=80,
        alpha=0.7,
        edgecolors='white',
        linewidth=1.5
    )
    
    ax.plot(
        results_with_data['time_minutes'],
        results_with_data['value'],
        color='steelblue',
        alpha=0.4,
        linewidth=1.5,
        linestyle='--'
    )
    
    ax.set_xlabel('Tiempo (minutos)', fontsize=14, fontweight='bold')
    
    if metric == 'coverage':
        ax.set_ylabel('Coverage (%)', fontsize=14, fontweight='bold')
        title = f'Saliency Coverage - Participante {participant_id}'
    else:
        ax.set_ylabel('Entropy (bits)', fontsize=14, fontweight='bold')
        title = f'Saliency Entropy - Participante {participant_id}'
    
    ax.set_title(title, fontsize=16, fontweight='bold', pad=20)
    ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
    
    cbar = plt.colorbar(scatter, ax=ax, pad=0.01)
    cbar.set_label(f'{metric.capitalize()}', fontsize=12, fontweight='bold')
    
    mean_value = results_with_data['value'].mean()
    ax.axhline(y=mean_value, color='red', linestyle='--', linewidth=2, alpha=0.6, label=f'Media: {mean_value:.2f}')
    ax.legend(loc='upper right', fontsize=12, framealpha=0.9)
    
    plt.tight_layout()
    
    print(f'\nEstadísticas:')
    print(f'Media: {mean_value:.2f}')
    print(f'Std: {results_with_data["value"].std():.2f}')
    print(f'Min: {results_with_data["value"].min():.2f}')
    print(f'Max: {results_with_data["value"].max():.2f}')
    
    plt.show()
    return results_df

## Ejemplos de Uso

In [36]:
# CAMBIAR ESTE PARÁMETRO PARA VER DIFERENTES PARTICIPANTES
participant_id = 2

# Visualizar Coverage
results_coverage = plot_participant_metrics(
    participant_id=participant_id,
    df=df,
    hololens_data=hololens_data,
    metric='coverage',
    figsize=(18, 6)
)

Calculando coverage para participante 2...
No hay datos para participante 2


In [37]:
# Visualizar Entropy
results_entropy = plot_participant_metrics(
    participant_id=participant_id,
    df=df,
    hololens_data=hololens_data,
    metric='entropy',
    figsize=(18, 6)
)

Calculando entropy para participante 2...
No hay datos para participante 2


## Comparación entre Participantes

In [None]:
def compare_participants(participant_ids, df, hololens_data, metric='coverage', figsize=(18, 8)):
    """Compara métricas de múltiples participantes."""
    fig, ax = plt.subplots(figsize=figsize)
    
    colors = plt.cm.tab10(np.linspace(0, 1, len(participant_ids)))
    
    for idx, participant_id in enumerate(participant_ids):
        results = calculate_metrics_for_participant(participant_id, df, hololens_data, metric)
        results_df = pd.DataFrame(results)
        results_with_data = results_df[results_df['has_data']]
        
        ax.plot(
            results_with_data['time_minutes'],
            results_with_data['value'],
            color=colors[idx],
            marker='o',
            markersize=6,
            linewidth=2,
            alpha=0.7,
            label=f'P{participant_id}'
        )
    
    ax.set_xlabel('Tiempo (minutos)', fontsize=14, fontweight='bold')
    ax.set_ylabel(f'{metric.capitalize()}', fontsize=14, fontweight='bold')
    ax.set_title(f'Comparación de {metric.capitalize()}', fontsize=16, fontweight='bold', pad=20)
    ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)
    ax.legend(loc='best', fontsize=11, framealpha=0.9)
    
    plt.tight_layout()
    plt.show()

# Ejemplo: comparar 3 participantes
compare_participants(
    participant_ids=[2, 10, 16],
    df=df,
    hololens_data=hololens_data,
    metric='coverage'
)