## TSV Schema
Things to include:
- index (int)
- image_path (str)
- question (str)
- options (JSON list of str)
- answer (str)
- (optional) hint, category, split

### What uBench Has
- image
- questions (contains question and options and snwer)

In [8]:
%%capture 

import os, json, argparse
import pandas as pd
from datasets import load_dataset
from PIL import Image
from vlmeval.dataset.image_base import ImageBaseDataset

In [None]:
def _letters(n: int):
    assert 1 <= n <= 26, "This template supports up to 26 options."
    return list(string.ascii_uppercase[:n])  # ['A', ... 'Z'][:n]

def _parse_letter(text: str, num_opts: int) -> str:
    """Strict A..(A+num_opts-1) parser; returns 'INVALID' if no single-letter answer is found."""
    if not isinstance(text, str):
        return "INVALID"
    t = text.strip().upper()
    valid = set(_letters(num_opts))
    if len(t) == 1 and t in valid:
        return t
    # tolerate explanations like "Answer: H ..."
    for ch in reversed(t):
        if ch in valid:
            return ch
    return "INVALID"

class MicroBenchTSV(ImageBaseDataset):
    """
    Minimal TSV-backed MCQ dataset for VLMEvalKit.

    TSV columns (required):
      - index (int)
      - image_path (str)          # single path or multiple paths joined by ';'
      - question (str)
      - options (JSON list[str])  # e.g., ["None of the above", "Actin", ...]
      - answer (str)              # single letter "A".."Z"

    Optional:
      - hint (str)
      - category (str)
      - split (str)

    Notes:
    - If image_path contains multiple paths separated by ';', we will emit multiple image messages.
    - We keep prompting very strict: model must output a single letter.
    """
    TYPE = 'MCQ'
    NAME = 'MICROBENCH_TSV'

    # data_file is a path RELATIVE to $LMUData (default: ~/LMUData)
    # Example: data_file='microbench/uBench_classification_10.tsv'
    def __init__(self, data_file: str, **kwargs):
        super().__init__(data_file=data_file, **kwargs)
        # Load TSV into self.data (a pandas DataFrame)
        # ImageBaseDataset will resolve $LMUData for us.
        tsv_path = self.data_file
        if not os.path.isabs(tsv_path):
            # ImageBaseDataset may already resolve; if not, try LMUData env
            lmu_root = os.environ.get('LMUData', os.path.expanduser('~/LMUData'))
            tsv_path = os.path.join(lmu_root, self.data_file)
        self.data = pd.read_csv(tsv_path, sep='\t')

        required = ['index', 'image_path', 'question', 'options', 'answer']
        missing = [c for c in required if c not in self.data.columns]
        if missing:
            raise KeyError(f"TSV missing required columns: {missing}")

    def build_prompt(self, line):
        """Return VLMEvalKit multimodal messages: [{'type': 'image'|'text', 'value': ...}, ...]"""
        row = self.data.iloc[line] if isinstance(line, int) else line

        # Build image message(s)
        img_field = str(row['image_path'])
        img_paths = [p for p in img_field.split(';') if p.strip()]  # support multi-image
        # Keep as absolute if already absolute; else try to resolve relative to LMUData
        lmu_root = os.environ.get('LMUData', os.path.expanduser('~/LMUData'))
        def resolve(p):
            return p if os.path.isabs(p) else os.path.join(lmu_root, p)
        image_msgs = [dict(type='image', value=resolve(p)) for p in img_paths]

        # Options
        opts = row['options']
        if isinstance(opts, str):
            opts = json.loads(opts)
        letters = _letters(len(opts))
        options_txt = "\n".join(f"{L}. {t}" for L, t in zip(letters, opts))

        # Optional hint for context (if present)
        hint = str(row['hint']).strip() if 'hint' in self.data.columns and not pd.isna(row['hint']) else ""
        hint_txt = (hint + "\n") if hint else ""

        prompt = (
            f"{hint_txt}{row['question']}\n"
            f"Options:\n{options_txt}\n"
            f"Answer with a single letter ({letters[0]}–{letters[-1]}) only."
        )
        return [*image_msgs, dict(type='text', value=prompt)]

    def evaluate(self, eval_file, **kwargs):
        """
        Expect model predictions saved by VLMEvalKit under eval_file with at least:
        - prediction (raw model text output)
        - options (copied from TSV or rejoined) for dynamic parser range
        - answer (gold letter)
        We compute accuracy and invalid rate.
        """
        df = self.load(eval_file)   # helper from ImageBaseDataset to read predictions Excel/CSV

        # Make sure we can get num options per-row; fall back to TSV if missing
        if 'options' not in df.columns:
            # join with original TSV by 'index'
            df = df.merge(self.data[['index', 'options']], on='index', how='left', suffixes=('', '_tsv'))

        def _num_opts(opt_field):
            try:
                return len(json.loads(opt_field)) if isinstance(opt_field, str) else len(opt_field)
            except Exception:
                return 4

        num_opts = df['options'].map(_num_opts)

        pred = [
            _parse_letter(pred_text, n)
            for pred_text, n in zip(df['prediction'], num_opts)
        ]
        gold = [str(a).strip().upper() for a in df['answer']]

        import numpy as np
        pred = np.array(pred, dtype=object)
        gold = np.array(gold, dtype=object)
        valid_mask = pred != 'INVALID'

        acc = float((pred == gold).mean())
        invalid_rate = float((~valid_mask).mean())

        return {
            'accuracy': [acc],
            'invalid_rate': [invalid_rate]
        }