### Extraindo metadados das respostas da OpenAI

In [1]:
import json
import pandas as pd
import re
from pathlib import Path

DATA_DIR = Path("/home/vmadmin/intent/src/test")
PREFERRED_FILE = DATA_DIR / "openai-gpt-5.csv"


def resolve_data_path():
    if PREFERRED_FILE.exists():
        return PREFERRED_FILE
    candidates = sorted(DATA_DIR.glob("openai*.csv"))
    if not candidates:
        raise FileNotFoundError("Nenhum arquivo openai*.csv encontrado na pasta src/test.")
    return candidates[0]


def load_dataset():
    data_path = resolve_data_path()
    df = pd.read_csv(data_path)
    df["data_source"] = data_path.name
    return df


def _literal_value(raw: str, cast=None):
    if raw is None:
        return None
    raw = raw.strip()
    if raw in {"None", "null"}:
        return None
    return cast(raw) if cast else raw


def parse_reasoning_fields(block: str) -> dict:
    if not block:
        return {}
    return {
        "reasoning_effort": _literal_value(
            re.search(r"effort='([^']*)'", block, re.S).group(1) if re.search(r"effort='([^']*)'", block, re.S) else None
        ),
        "reasoning_generate_summary": _literal_value(
            re.search(r"generate_summary=([^,\)]+)", block, re.S).group(1) if re.search(r"generate_summary=([^,\)]+)", block, re.S) else None
        ),
        "reasoning_summary": _literal_value(
            re.search(r"summary=([^,\)]+)", block, re.S).group(1) if re.search(r"summary=([^,\)]+)", block, re.S) else None
        ),
    }


def parse_usage_fields(block: str) -> dict:
    if not block:
        return {}
    patterns = {
        "usage_input_tokens": (r"input_tokens=([0-9]+)", int),
        "usage_output_tokens": (r"output_tokens=([0-9]+)", int),
        "usage_total_tokens": (r"total_tokens=([0-9]+)", int),
        "usage_input_tokens_cached": (r"input_tokens_details=InputTokensDetails\(cached_tokens=([0-9]+)\)", int),
        "usage_output_tokens_reasoning": (r"output_tokens_details=OutputTokensDetails\(reasoning_tokens=([0-9]+)\)", int),
    }
    extracted = {}
    for key, (pattern, caster) in patterns.items():
        match = re.search(pattern, block, re.S)
        extracted[key] = caster(match.group(1)) if match else None
    return extracted


def parse_policy_status(policy_column_value: str) -> str:
    if not isinstance(policy_column_value, str):
        return None
    try:
        payload = json.loads(policy_column_value)
    except json.JSONDecodeError:
        return None
    return payload.get("status")


def parse_function_call_name(payload: str) -> str:
    if not isinstance(payload, str):
        return None
    match = re.search(r"ResponseFunctionToolCall\(.*?name='([^']+)'", payload, re.S)
    return match.group(1) if match else None


def parse_response(payload: str):
    if not isinstance(payload, str) or not payload.startswith("Response"):
        return None

    def _extract(pattern: str, cast=None):
        match = re.search(pattern, payload, re.S)
        if not match:
            return None
        value = match.group(1)
        return cast(value) if cast else value.strip()

    reasoning_block = _extract(r"reasoning=(Reasoning\(.*?\))(?=,\s+safety)")
    usage_block = _extract(r"usage=(ResponseUsage\(.*?\))(?=,\s+user)")

    parsed = {
        "response_id": _extract(r"Response\(id='([^']+)'"),
        "created_at": _extract(r"created_at=([0-9.]+)", float),
        "completed_at": _extract(r"completed_at=([0-9.]+)", float),
        "model": _extract(r"model='([^']+)'"),
        "function_call_name": parse_function_call_name(payload),
    }
    parsed.update(parse_reasoning_fields(reasoning_block))
    parsed.update(parse_usage_fields(usage_block))
    return parsed


raw_df = load_dataset()
response_columns = ["intent_processing", "type_definition"]
records = []
for column in response_columns:
    for intent, payload, policy, data_source in zip(
        raw_df["intent"], raw_df[column], raw_df["policy"], raw_df["data_source"]
    ):
        parsed = parse_response(payload)
        if parsed:
            parsed.update(
                {
                    "intent": intent,
                    "response_kind": column,
                    "policy_status": parse_policy_status(policy)
                }
            )
            records.append(parsed)

responses_df = pd.DataFrame(records)
for col in ["created_at", "completed_at"]:
    responses_df[col] = pd.to_datetime(responses_df[col], unit="s")

responses_df = responses_df[
    [
        "intent",
        "response_kind",
        "response_id",
        "function_call_name",
        "model",
        "created_at",
        "completed_at",
        "reasoning_effort",
        "reasoning_generate_summary",
        "reasoning_summary",
        "usage_input_tokens",
        "usage_input_tokens_cached",
        "usage_output_tokens",
        "usage_output_tokens_reasoning",
        "usage_total_tokens",
        "policy_status"
    ]
].sort_values(["intent", "response_kind"]).reset_index(drop=True)
responses_df

Unnamed: 0,intent,response_kind,response_id,function_call_name,model,created_at,completed_at,reasoning_effort,reasoning_generate_summary,reasoning_summary,usage_input_tokens,usage_input_tokens_cached,usage_output_tokens,usage_output_tokens_reasoning,usage_total_tokens,policy_status
0,Create a slice for a harbor area supporting lo...,intent_processing,resp_05acdaa3867988e200697068a4e14481938c0c58f...,create_session,gpt-5-2025-08-07,2026-01-21 05:48:20,2026-01-21 05:48:31,medium,,,7934,3840,526,448,8460,Policy created successfully
1,Create a slice for a harbor area supporting lo...,type_definition,resp_0d12fa960388e7fe00697068aff8148193b6fe216...,,gpt-5-2025-08-07,2026-01-21 05:48:32,2026-01-21 05:48:38,medium,,,259,0,265,256,524,Policy created successfully
2,Create a slice for a mobile esports tournament...,intent_processing,resp_0b8539556b89149d0069706c4e8a1c819380b3d37...,create_session,gpt-5-2025-08-07,2026-01-21 06:03:58,2026-01-21 06:04:08,medium,,,32552,14848,539,448,33091,Policy created successfully
3,Create a slice for a mobile esports tournament...,type_definition,resp_0aff91df2c19490a0069706c5935d08196910d35e...,,gpt-5-2025-08-07,2026-01-21 06:04:09,2026-01-21 06:04:12,medium,,,262,0,137,128,399,Policy created successfully
4,Create a slice for a retail analytics platform...,intent_processing,resp_08561c0ecf355a2c00697069024dfc81939e5b890...,create_session,gpt-5-2025-08-07,2026-01-21 05:49:54,2026-01-21 05:50:06,medium,,,10015,4608,731,640,10746,Policy created successfully
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Provision a slice for urban telepresence kiosk...,type_definition,resp_0d348749e8f1f0b30069706d81d9b881938f9ddd9...,,gpt-5-2025-08-07,2026-01-21 06:09:05,2026-01-21 06:09:08,medium,,,257,0,137,128,394,Policy created successfully
196,Set up a slice for a shopping mall to improve ...,intent_processing,resp_0ff2d668f360ea6d00697067b4db248195909ac3c...,create_session,gpt-5-2025-08-07,2026-01-21 05:44:20,2026-01-21 05:44:36,medium,,,2081,1152,923,832,3004,Policy created successfully
197,Set up a slice for a shopping mall to improve ...,type_definition,resp_0fca1f422c34023000697067c507a881949196ec1...,,gpt-5-2025-08-07,2026-01-21 05:44:37,2026-01-21 05:44:41,medium,,,272,0,201,192,473,Policy created successfully
198,Set up a slice for remote training sessions us...,intent_processing,resp_058435f5ba2548d700697068916d408195bc942ef...,create_session,gpt-5-2025-08-07,2026-01-21 05:48:01,2026-01-21 05:48:13,medium,,,7396,3584,667,576,8063,Policy created successfully


In [None]:
anthropic_path = DATA_DIR / "anthropic-sonnet-4-5.csv"

if not anthropic_path.exists():
    raise FileNotFoundError(f"Arquivo não encontrado: {anthropic_path}")

anthropic_df = pd.read_csv(anthropic_path)
anthropic_df

In [None]:
responses_df.to_csv("/home/vmadmin/intent/src/results/gpt-5.csv", index=False, header=True)