In [8]:
from smolagents import CodeAgent, LiteLLMModel, tool
import csv, statistics
from typing import List, Dict, Any

In [9]:
model = LiteLLMModel(
    model_id="ollama/qwen2.5-coder:7b",
    api_base="http://localhost:11434",
    system_prompt="You are a terse Python coding assistant. Use tools precisely."
)

In [10]:
@tool
def csv_feature_query(
    path: str,
    feature: str,
    mode: str = "stats",
    k: int = 1,
    sample: int = None,
    step_filter: str = "measurement"
) -> dict:
    """
    Query OCP/CA/CV measurements from a CSV log.

    Args:
        path (str): Full path to the CSV file.
        feature (str): One of 'OCP', 'CA', 'CV' (case-insensitive).
        mode (str): One of:
            - 'stats'  -> count, min, max, mean, median
            - 'top'    -> top-k by value (desc)
            - 'bottom' -> bottom-k by value (asc)
            - 'median' -> median value only
            - 'value'  -> value for a given sample (requires `sample`)
        k (int): How many rows to return for 'top'/'bottom'. Default 1.
        sample (int): Sample id for 'value' mode.
        step_filter (str): Only consider rows where step==this (e.g., 'measurement').

    Returns:
        dict: Structured result depending on mode.
    """
    feat = feature.strip().upper()

    # Load rows
    rows: List[Dict[str, Any]] = []
    with open(path, newline="", encoding="utf-8") as f:
        rdr = csv.DictReader(f)
        for r in rdr:
            if step_filter and r.get("step") != step_filter:
                continue
            rows.append(r)

    if not rows:
        return {"error": "No rows matched step_filter.", "count": 0}

    # Detect wide vs long schema
    wide_map = {"OCP": "value_ocp", "CA": "value_ca", "CV": "value_cv"}
    long_value_col = "value" if "value" in rows[0] and "feature" in rows[0] else None

    if feat not in {"OCP","CA","CV"}:
        raise ValueError("feature must be OCP, CA, or CV")

    # Extract (sample, value) pairs
    vals: List[Dict[str, Any]] = []
    for r in rows:
        try:
            if long_value_col:
                if (r.get("feature") or "").upper() != feat:
                    continue
                v_raw = r.get(long_value_col)
            else:
                v_raw = r.get(wide_map[feat])
            if v_raw is None or v_raw == "":
                continue
            v = float(v_raw)
            sid = r.get("sample")
            sid = int(sid) if sid not in (None, "",) else None
            vals.append({"sample": sid, "value": v, "row": r})
        except (ValueError, TypeError):
            continue

    if not vals:
        return {"feature": feat, "count": 0, "message": "No numeric values."}

    # Helpers
    values_only = [x["value"] for x in vals]
    def topk(n):  return sorted(vals, key=lambda d: d["value"], reverse=True)[:max(0,n)]
    def botk(n):  return sorted(vals, key=lambda d: d["value"])[:max(0,n)]

    # Modes
    if mode == "stats":
        return {
            "feature": feat,
            "count": len(values_only),
            "min": min(values_only),
            "max": max(values_only),
            "mean": sum(values_only)/len(values_only),
            "median": statistics.median(values_only),
        }

    if mode == "top":
        out = [{"sample": d["sample"], "value": d["value"]} for d in topk(k)]
        return {"feature": feat, "k": k, "results": out}

    if mode == "bottom":
        out = [{"sample": d["sample"], "value": d["value"]} for d in botk(k)]
        return {"feature": feat, "k": k, "results": out}

    if mode == "median":
        return {"feature": feat, "median": statistics.median(values_only)}

    if mode == "value":
        if sample is None:
            raise ValueError("`sample` is required for mode='value'.")
        match = [d for d in vals if d["sample"] == int(sample)]
        if not match:
            return {"feature": feat, "sample": sample, "found": False}
        # If multiple, return the last
        d = match[-1]
        return {"feature": feat, "sample": sample, "value": d["value"], "found": True}

    raise ValueError("Invalid mode. Use 'stats'|'top'|'bottom'|'median'|'value'.")


In [11]:

agent = CodeAgent(
    model=model,
    tools=[csv_feature_stats],
    add_base_tools=False,
)

In [16]:
path = "C:\Users\chekim\Workspace\llm-driven-robotics\experiment_logs\20250924104104.csv"

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (1957205916.py, line 1)

In [None]:
response = agent.run(
    r"Call csv_feature_query with the given path, feature=OCP, mode=top, k=2"
)
print(response)