In [None]:
import os
import json
import re
import pandas as pd
from datetime import datetime
from typing import Dict, Any, List, Optional

TARGET_MODEL_ID_SUBSTRING = "gemma3:4b"

try:
    import opik
    from opik import Opik
except ImportError:
    print("Opik SDK not found. Please install it: pip install opik")
    exit()

def parse_model_info(model_name: str, tags: List[str]) -> Dict[str, str]:
    """
    Parses model ID, display name, and size from the model name and tags.
    Adjust this function based on your actual model naming conventions.
    """
    model_id = model_name
    model_display_name = model_name
    model_size = "N/A"
    question_type = "N/A"

    match = re.search(r':([\d\.]+)b', model_name)
    if match:
        model_size = match.group(1) + "B"

    if "multiple_choice" in tags:
        question_type = "multiple_choice"
    elif "open_answer" in tags:
        question_type = "open_answer"

    return {
        "model_id": model_id,
        "model_display_name": model_display_name,
        "model_size": model_size,
        "question_type": question_type
    }

def get_opik_flat_data_for_csv(project_name: str = "LLMmark_determinism") -> List[Dict[str, Any]]:
    """
    Fetches detailed trace and span data from Opik and flattens it for CSV export.
    Each dictionary in the returned list represents a single span,
    including its parent trace's metadata.
    """
    client = Opik()
    flat_data = []

    print(f"Fetching traces from project: {project_name}...")

    traces = client.search_traces(
        project_name=project_name,
        filter_string=f'metadata.model_id contains "{TARGET_MODEL_ID_SUBSTRING}"',
        max_results=1000
    )

    if not traces:
        print(f"No traces found in project '{project_name}'. Please check the project name and your Opik configuration.")
        return []

    for i, trace in enumerate(traces):
        print(f"Processing trace {i+1}/{len(traces)}: {trace.name} ({trace.id})")

        trace_content = client.get_trace_content(trace.id)
        spans = client.search_spans(project_name=project_name, trace_id=trace.id)

        if not spans:
            print(f"  No spans found for trace {trace.id}. Skipping.")
            continue

        model_info = parse_model_info(trace.name, trace.tags)
        
        model_source = "N/A"
        if "local" in trace.tags:
            model_source = "local"
        elif "online" in trace.tags:
            model_source = "online"

        trace_flat_metadata = {
            "trace_id": trace.id,
            "run_name": trace.name,
            # "trace_llm_span_count": trace.llm_span_count,
            # "trace_comments": trace.comments, # Mantenido para traza, si se desea
            # "trace_model_id": model_info["model_id"],
            "model_display_name": model_info["model_display_name"],
            "language": trace_content.metadata.get("language", "en"),
            "prompting_tech": trace_content.metadata.get("prompting_tech", "N/A"),
            "num_runs_per_question": trace_content.metadata.get("num_runs_per_question", 1),
            "model_source": model_source,
            "temperature": trace_content.metadata.get("temperature", "N/A"),
            "top_p": 0.1,
            "exercise": trace_content.metadata.get("exercise", "N/A"),
            "question_type": model_info["question_type"],
            **{f"trace_meta_{k}": v for k, v in trace_content.metadata.items()
               if k not in ["language", "prompting_tech", "num_runs_per_question", 
                             "model_source", "temperature", "top_p", "exercise", 
                             "prompt_tech", "question_type", "comments", "model_id", "model_display_name", "top-p", "run_name"]}
        }

        # Process each span and combine with trace-level metadata
        for j, span in enumerate(spans):
            span_response_time_ms = span.output.get("response_time (s)", "N/A")

            span_input_question = span.input.get("question", str(span.input)) if isinstance(span.input, dict) else str(span.input)
            span_output_answer = span.output.get("answer", str(span.output)) if isinstance(span.output, dict) else str(span.output)
            span_output_raw_answer = span.output.get("raw_answer", span_output_answer) if isinstance(span.output, dict) else span_output_answer

            correct_answer = span.metadata.get("correct_answer", "PLACEHOLDER_CORRECT_ANSWER")

            span_data_row = {
                "span_id": span.id,
                "span_name": span.name,
                # "span_type": span.type,
                # "span_start_time": span.start_time,
                # "span_end_time": span.end_time,
                "response_time_ms": span_response_time_ms,
                "span_input_question": span_input_question,
                "span_output_answer": span_output_answer,
                "span_output_raw_answer": span_output_raw_answer,
                "span_correct_answer": correct_answer,
                "question_file": span.metadata.get("question_file", "N/A"),
            }
            
            combined_row = {**span_data_row, **trace_flat_metadata}
            flat_data.append(combined_row)

    return flat_data

my_project_name = "LLMmark_determinism" 

all_flat_data = []
all_flat_data.extend(get_opik_flat_data_for_csv(project_name=my_project_name))

if all_flat_data:
    df = pd.DataFrame(all_flat_data)
    output_filename = "opik_determinism_data.csv"
    output_dir = '../../../data/determinism'
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, output_filename)
    
    df.to_csv(output_path, index=False, encoding='utf-8')

    print(f"\nSuccessfully extracted data and saved to {output_path}")
    print("DataFrame head:")
    print(df.head())
    print(f"\nDataFrame shape: {df.shape}")
else:
    print("\nNo data extracted to create a CSV file.")

Fetching traces from project: LLMmark_determinism...
Processing trace 1/22: run_022_gemma3:4b (0197ac8a-3214-7e13-8f27-903669ebe657)
Processing trace 2/22: run_021_gemma3:4b (0197ac6b-ee77-7c4d-9b00-e26903d0972f)
Processing trace 3/22: run_020_gemma3:4b (0197ac13-5ead-7983-89c4-848884a0a638)
Processing trace 4/22: run_019_gemma3:4b (0197abf5-33af-73f1-be94-745b090979ec)
Processing trace 5/22: run_018_gemma3:4b (0197ab9e-bb3f-70b2-8d80-c5d54967ff91)
Processing trace 6/22: run_017_gemma3:4b (0197ab7e-bc2e-7ae9-b8ed-c3dea5c573bb)
Processing trace 7/22: run_016_gemma3:4b (0197ab28-87c0-78af-880d-0da76d879b38)
Processing trace 8/22: run_015_gemma3:4b (0197ab03-26c6-7f62-ae59-21e06ba7501d)
Processing trace 9/22: run_014_gemma3:4b (0197aaa2-4a01-7eef-a7de-f1089b297d2d)
Processing trace 10/22: run_013_gemma3:4b (0197aa8d-f3a0-74c1-8297-5f56524dd6eb)
Processing trace 11/22: run_012_gemma3:4b (0197aa10-6b4b-75f9-b254-819978f0d479)
Processing trace 12/22: run_011_gemma3:4b (0197aa0f-7476-7e74-8c8