In [1]:
import pandas as pd

In [2]:
data_path = "/home/vmadmin/intent/src/test/gemini-3-flash-preview.csv"
df = pd.read_csv(data_path)

In [None]:
import ast
import json
import re

# Regex patterns capture the metadata we need inside the SDK trace blobs
RESPONSE_ID_RE = re.compile(r"response_id='([^']+)'")
CREATE_TIME_RE = re.compile(r"create_time=([^ ,)]+)")
MODEL_VERSION_RE = re.compile(r"model_version='([^']+)'")
FUNCTION_CALL_RE = re.compile(
    r"function_call=FunctionCall\([^)]*?name='([^']+)'",
    re.DOTALL,
)
USAGE_START = "usage_metadata=GenerateContentResponseUsageMetadata("
USAGE_END = ") automatic_function_calling_history"
TOKEN_KEYS = [
    "candidates_token_count",
    "prompt_token_count",
    "thoughts_token_count",
    "total_token_count",
]
POLICY_STATUS_PATTERNS = [
    re.compile(r"status\s*[:=]\s*\"?([A-Za-z0-9_\- ]+)\"?", re.IGNORECASE),
    re.compile(r"code\s*[:=]\s*\"?([A-Za-z0-9_\- ]+)\"?", re.IGNORECASE),
    re.compile(r"policyStatus\s*[:=]\s*\"?([A-Za-z0-9_\- ]+)\"?", re.IGNORECASE),
]


def _maybe_match(pattern: re.Pattern, text: str):
    match = pattern.search(text)
    return match.group(1) if match else None


def parse_usage_block(blob: str) -> tuple[str | None, dict]:
    start = blob.find(USAGE_START)
    end = blob.find(USAGE_END, start)
    if start == -1 or end == -1:
        return None, {key: None for key in TOKEN_KEYS}
    usage_block = blob[start:end] + ")"
    inner = blob[start + len(USAGE_START) : end]
    token_data = {}
    for key in TOKEN_KEYS:
        token_match = re.search(rf"{key}=([0-9]+)", inner)
        token_data[key] = int(token_match.group(1)) if token_match else None
    return usage_block, token_data


def parse_blob(blob: str) -> dict:
    if not isinstance(blob, str):
        return {}
    usage_block, token_data = parse_usage_block(blob)
    result = {
        "response_id": _maybe_match(RESPONSE_ID_RE, blob),
        "create_time": _maybe_match(CREATE_TIME_RE, blob),
        "model_version": _maybe_match(MODEL_VERSION_RE, blob),
        "function_call_name": _maybe_match(FUNCTION_CALL_RE, blob),
    }
    result.update(token_data)
    return result


def extract_policy_status(policy_blob: str | None):
    if not isinstance(policy_blob, str):
        return None
    normalized = policy_blob.strip()
    if not normalized:
        return None
    if normalized[0] == normalized[-1] == '"':
        normalized = normalized[1:-1]
    normalized = normalized.replace('""', '"')

    parsed = None
    for parser in (ast.literal_eval, json.loads):
        try:
            parsed = parser(normalized)
            break
        except Exception:
            continue

    if isinstance(parsed, dict):
        for key in ("status", "code", "policyStatus"):
            value = parsed.get(key)
            if value not in (None, ""):
                return value

    for pattern in POLICY_STATUS_PATTERNS:
        match = pattern.search(normalized)
        if match:
            candidate = match.group(1).strip()
            if candidate.isdigit():
                return int(candidate)
            return candidate
    return None


records = []
for _, row in df.iterrows():
    policy_status = extract_policy_status(row.get("policy"))
    for source_column in ("intent_processing", "type_definition"):
        blob = row.get(source_column)
        if not isinstance(blob, str) or not blob.strip():
            continue
        parsed = parse_blob(blob)
        if not any(parsed.values()):
            continue
        parsed.update(
            {
                "intent": row.get("intent"),
                "response_kind": source_column,
                "policy_status": policy_status,
            }
        )
        records.append(parsed)

metadata_df = pd.DataFrame.from_records(records)

preferred_order = [
    "intent",
    "response_kind",
    "policy_status",
    "response_id",
    "function_call_name",
    "create_time",
    "model_version",
    *TOKEN_KEYS,
]
intersection = [col for col in preferred_order if col in metadata_df.columns]
metadata_df = metadata_df.reindex(columns=intersection + [
    col for col in metadata_df.columns if col not in intersection
])
metadata_df

Unnamed: 0,intent,response_kind,response_id,function_call_name,create_time,model_version,candidates_token_count,prompt_token_count,thoughts_token_count,total_token_count
0,Create a slice to support video journalists tr...,intent_processing,Clp2aZSFA4Lrz7IP-fi1mQs,create_session,,gemini-3-flash-preview,83,1538,2008,3629
1,Create a slice to support video journalists tr...,type_definition,DVp2acqeH_jUz7IPuYqw0Qs,,,gemini-3-flash-preview,3,282,451,736
2,Provision a slice for a university campus even...,intent_processing,XVp2aZLgI4zAqtsPrtrc4Q0,create_session,,gemini-3-flash-preview,183,1834,3174,5191
3,Provision a slice for a university campus even...,type_definition,YFp2acHdH5qqqtsPme272AY,,,gemini-3-flash-preview,3,278,323,604
4,Set up a slice for a shopping mall to improve ...,intent_processing,s1p2acHuGsiez7IPydjKsQ8,create_session,,gemini-3-flash-preview,284,2127,3773,6184
...,...,...,...,...,...,...,...,...,...,...
138,Create a slice for synchronized robotic weldin...,intent_processing,1YJ2af2_LtXQz7IP4uyo6A8,create_session,,gemini-3-flash-preview,2517,27770,3816,34103
139,Create a slice for synchronized robotic weldin...,type_definition,2IJ2acjsEsDVz7IPle7n6QQ,,,gemini-3-flash-preview,4,259,291,554
140,Provision a slice for smart greenhouse climate...,intent_processing,PIN2acb3KKugz7IPqYrG6AQ,create_session,,gemini-3-flash-preview,1681,28048,4742,34471
141,Establish a slice for public safety siren moni...,intent_processing,pIN2abP9EI7oz7IPgaKj6A4,create_session,,gemini-3-flash-preview,1744,28327,3161,33232


In [4]:
metadata_df.to_csv("/home/vmadmin/intent/src/results/gemini-3-flash-preview.csv", index=False, header=True)