# 05_error_viewer
Builds an HTML error gallery from misclassification shortlists.

**Inputs:**
- `results/svm_bioclip/analysis/top_misclassified_samples.csv`
- `results/svm_bioclip/analysis/top10_per_species.csv`

**Outputs:**
- `results/svm_bioclip/analysis/error_gallery.html`


In [None]:
import sys
from pathlib import Path
repo_root = Path.cwd().resolve().parent
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))
from src.utils.paths import load_paths
paths = load_paths()


In [14]:
# === Block 11 — HTML Error Gallery for Misclassified Samples ===

from pathlib import Path
import pandas as pd
import html

# Use the same RESULTS_ROOT and ANALYSIS_ROOT as before
ANALYSIS_ROOT = RESULTS_ROOT / "analysis"
gallery_path = ANALYSIS_ROOT / "error_gallery.html"

# ---------------------------------------------------------------------
# 1) Load the shortlists we created in Block 10
# ---------------------------------------------------------------------

top_global_path = ANALYSIS_ROOT / "top_misclassified_samples.csv"
top_per_species_path = ANALYSIS_ROOT / "top10_per_species.csv"

if not top_global_path.exists() or not top_per_species_path.exists():
    raise FileNotFoundError("Top misclassified CSVs not found. Run Block 10 first.")

top_global = pd.read_csv(top_global_path)
top_per_species = pd.read_csv(top_per_species_path)

print(f"[LOAD] Global top misclassified: {len(top_global)} rows")
print(f"[LOAD] Per-species top misclassified: {len(top_per_species)} rows")

# ---------------------------------------------------------------------
# 2) Helper to generate HTML for one sample row
# ---------------------------------------------------------------------

def make_image_tag(path, label):
    """Return an <img> tag for a local image path, or a placeholder if missing."""
    if pd.isna(path) or not Path(path).exists():
        return f'<div class="img-missing">No {html.escape(label)} image</div>'
    # Use file:// so the browser can open local images
    src = "file://" + str(Path(path).resolve())
    return f'<img src="{html.escape(src)}" alt="{html.escape(label)}" class="tick-image">'

def sample_card(row):
    """Return HTML for a single sample card with dorsal/ventral and metadata."""
    sample_id = str(row.get("sample_id", ""))
    true_species = row.get("species_true_dominant", row.get("species_true", ""))
    most_common_pred = row.get("most_common_pred", "")
    incorrect_rate = row.get("incorrect_rate", "")
    n_incorrect = row.get("n_incorrect", "")
    n_rows = row.get("n_rows", "")
    sex = row.get("sex_dominant", "")
    life_stage = row.get("life_stage_dominant", "")
    attached = row.get("attached_dominant", "")
    dorsal_path = row.get("dorsal_image_path", "")
    ventral_path = row.get("ventral_image_path", "")

    dorsal_img = make_image_tag(dorsal_path, "dorsal")
    ventral_img = make_image_tag(ventral_path, "ventral")

    # Safety: handle NaNs nicely
    def fmt(x):
        if isinstance(x, float):
            if pd.isna(x):
                return ""
            return f"{x:.3f}"
        return "" if pd.isna(x) else str(x)

    html_block = f"""
    <div class="sample-card">
      <div class="meta">
        <div><strong>Sample ID:</strong> {html.escape(fmt(sample_id))}</div>
        <div><strong>True species:</strong> {html.escape(fmt(true_species))}</div>
        <div><strong>Most common wrong prediction:</strong> {html.escape(fmt(most_common_pred))}</div>
        <div><strong>Incorrect:</strong> {html.escape(fmt(n_incorrect))} / {html.escape(fmt(n_rows))} 
             (rate = {html.escape(fmt(incorrect_rate))})</div>
        <div><strong>Sex:</strong> {html.escape(fmt(sex))}</div>
        <div><strong>Life stage:</strong> {html.escape(fmt(life_stage))}</div>
        <div><strong>Attached (fed status proxy):</strong> {html.escape(fmt(attached))}</div>
      </div>
      <div class="images">
        <div class="image-column">
          <div class="image-label">Dorsal</div>
          {dorsal_img}
        </div>
        <div class="image-column">
          <div class="image-label">Ventral</div>
          {ventral_img}
        </div>
      </div>
    </div>
    """
    return html_block

# ---------------------------------------------------------------------
# 3) Build the HTML document
# ---------------------------------------------------------------------

sections = []

# --- Section 1: Global top misclassified ---
global_cards = "\n".join(sample_card(row) for _, row in top_global.iterrows())
sections.append(f"""
<section>
  <h2>Global Top Misclassified Samples</h2>
  <p>These are the {len(top_global)} hardest samples across all species, based on how often they are misclassified and how consistently.</p>
  {global_cards}
</section>
""")

# --- Section 2: Top misclassified per species ---
species_groups = top_per_species.groupby("species_true_dominant", dropna=False)

species_sections_html = []
for species_name, group in species_groups:
    species_label = species_name if pd.notna(species_name) else "Unknown species"
    cards = "\n".join(sample_card(row) for _, row in group.iterrows())
    species_sections_html.append(f"""
    <section class="species-section">
      <h3>Top misclassified for species: {html.escape(str(species_label))}</h3>
      {cards}
    </section>
    """)

sections.append("""
<section>
  <h2>Top Misclassified Samples Per Species</h2>
  <p>These sections show the hardest samples within each species, based on misclassification frequency and rate.</p>
  {inner}
</section>
""".format(inner="\n".join(species_sections_html)))

# --- Combine everything into a full HTML page ---
full_html = f"""
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <title>Tick Error Analysis Gallery</title>
  <style>
    body {{
      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
      margin: 20px;
      background-color: #111;
      color: #eee;
    }}
    h1, h2, h3 {{
      color: #ffcc66;
    }}
    .sample-card {{
      border: 1px solid #444;
      border-radius: 10px;
      padding: 12px;
      margin: 12px 0;
      background-color: #1b1b1b;
      display: flex;
      flex-direction: column;
      gap: 8px;
    }}
    .meta {{
      font-size: 0.9rem;
      line-height: 1.3;
    }}
    .images {{
      display: flex;
      flex-direction: row;
      gap: 16px;
      margin-top: 8px;
    }}
    .image-column {{
      flex: 1;
      text-align: center;
    }}
    .image-label {{
      margin-bottom: 4px;
      font-size: 0.85rem;
      color: #ccc;
    }}
    .tick-image {{
      max-width: 320px;
      max-height: 320px;
      border-radius: 8px;
      border: 1px solid #555;
      background-color: #000;
    }}
    .img-missing {{
      color: #888;
      font-size: 0.85rem;
      border: 1px dashed #555;
      border-radius: 8px;
      padding: 20px;
    }}
    section {{
      margin-bottom: 32px;
    }}
    .species-section {{
      margin-top: 24px;
      padding-top: 8px;
      border-top: 1px dashed #444;
    }}
  </style>
</head>
<body>
  <h1>Tick Misclassification Error Gallery</h1>
  <p>
    This gallery shows misclassified samples from the BioCLIP+SVM experiments.
    Each card shows dorsal and ventral images (if available), along with true label,
    most common wrong prediction, misclassification rate, and biological metadata
    such as sex, life stage, and attached status (fed vs not fed proxy).
  </p>
  {"".join(sections)}
</body>
</html>
"""

# ---------------------------------------------------------------------
# 4) Write HTML file
# ---------------------------------------------------------------------

gallery_path.write_text(full_html, encoding="utf-8")
print(f"\n✅ Error gallery written to: {gallery_path}")
print("Open this file in your browser (e.g., double-click it in Finder or `open` it from the terminal).")


[LOAD] Global top misclassified: 50 rows
[LOAD] Per-species top misclassified: 40 rows

✅ Error gallery written to: ../results/svm_bioclip/analysis/error_gallery.html
Open this file in your browser (e.g., double-click it in Finder or `open` it from the terminal).
