In [None]:
import json
import re

def parse_results_file(file_path):
    """
    Parses a JSONL results file to extract workplace tasks from the user prompt
    and the generated query from the assistant response, cleaning up static instructions.
    """
    
    # 1. Regex to find the START of the tasks section
    start_pattern = r"Consider workplace tasks that .*? performs such as \(but not limited to\):"
    
    # 2. Regex to find the END of the tasks section (e.g., the next header)
    # Looks for a line starting with '#' or a double newline
    end_pattern = r"(?:\n\s*#|\n\s*\n)"
    
    # 3. List of static strings to EXCLUDE if they get caught
    # These are the instructions from your template that are cluttering the output
    ignore_lines = [
        "The scenario should be detailed and authentic",
        "representing genuine use cases",
        "Think about how different tools"
    ]

    # Regex for extracting the question xml
    question_pattern = r"<question>(.*?)</question>"

    print(f"Processing: {file_path}\n" + "="*80 + "\n")

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for i, line in enumerate(f):
                try:
                    entry = json.loads(line)
                    messages = entry.get('messages', [])
                    
                    if not messages:
                        continue

                    # --- Extract Tasks ---
                    user_content = next((m['content'] for m in messages if m['role'] == 'user'), "")
                    
                    # Find start
                    start_match = re.search(start_pattern, user_content, re.IGNORECASE)
                    tasks_text = "No tasks found"
                    
                    if start_match:
                        # Get everything after the start phrase
                        content_after = user_content[start_match.end():]
                        
                        # Find the first header or blank line to stop at
                        end_match = re.search(end_pattern, content_after)
                        if end_match:
                            raw_tasks = content_after[:end_match.start()]
                        else:
                            raw_tasks = content_after # Fallback if no header found

                        # Clean and Filter lines
                        cleaned_lines = []
                        for task_line in raw_tasks.split('\n'):
                            task_line = task_line.strip()
                            # Only keep lines that start with '-' and aren't in our ignore list
                            if task_line.startswith('-') and not any(phrase in task_line for phrase in ignore_lines):
                                cleaned_lines.append(task_line)
                        
                        if cleaned_lines:
                            tasks_text = "\n".join(cleaned_lines)

                    # --- Extract Query ---
                    assistant_content = next((m['content'] for m in messages if m['role'] == 'assistant'), "")
                    query_match = re.search(question_pattern, assistant_content, re.IGNORECASE | re.DOTALL)
                    
                    query_text = "No query found"
                    if query_match:
                        query_text = query_match.group(1).strip()

                    # --- Print ---
                    print(f"--- Entry {i+1} ---")
                    print(f"üìã TASKS PROVIDED:\n{tasks_text}\n")
                    print(f"‚ùì GENERATED QUERY:\n{query_text}")
                    print("-" * 40 + "\n")

                except json.JSONDecodeError:
                    print(f"Skipping line {i+1}: Invalid JSON")
                    continue
                    
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")

# ==========================================
# USAGE
# ==========================================
# Update this path to your actual results file
file_path = "ToolUse_onet_10_2tool_1769389186/ToolUse_s2q_onet_10_2tool_1769389186_kimi-k2-thinking_results.jsonl" 

parse_results_file(file_path)

Processing: ../data/ToolUse_onet_10_2tool_1769389186/ToolUse_s2q_onet_10_2tool_1769389186_kimi-k2-thinking_results.jsonl

--- Entry 1 ---
üìã TASKS PROVIDED:
- Search electronic sources, such as databases or repositories, or manual sources for information.

‚ùì GENERATED QUERY:
I'm a Document Management Specialist handling an urgent benefits inquiry from a remote employee in the EMEA region about Q1 2024 enrollment deadlines. Our organization maintains separate documentation systems for HR, Legal, and Compliance departments, but not all contain current benefits information‚Äîsome have outdated policy documents. I need to first identify which repositories are available and determine which ones are relevant and current, then search specifically for 'remote employee benefits enrollment EMEA Q1 2024' across only those identified sources. How can I efficiently accomplish this two-step process to ensure I'm providing accurate, up-to-date information?
----------------------------------------

In [2]:
import json
import os

def print_trajectories(file_path, num_entries=5):
    """
    Reads a JSONL file and prints the agent trajectories in a readable format.
    
    Args:
        file_path (str): Path to the .jsonl file.
        num_entries (int): Number of entries to print (set to None to print all).
    """
    if not os.path.exists(file_path):
        print(f"‚ùå File not found: {file_path}")
        return

    print(f"üìÇ Reading from: {file_path}\n")

    with open(file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(f):
            if num_entries and i >= num_entries:
                break
            
            data = json.loads(line)
            metadata = data.get('metadata', {})
            prompt_id = metadata.get('prompt_id', 'Unknown ID')
            row_id = metadata.get('row_id', i)
            
            print(f"{'='*80}")
            print(f"üÜî ROW: {row_id} | PROMPT ID: {prompt_id}")
            print(f"{'='*80}")

            messages = data.get('messages', [])
            
            for msg in messages:
                role = msg.get('role', 'unknown').upper()
                content = msg.get('content', '')
                
                # --- 1. SYSTEM / USER ---
                if role in ['SYSTEM', 'USER']:
                    print(f"\nüë§ [{role}]:")
                    print(f"   {content}")

                # --- 2. ASSISTANT (Reasoning & Content) ---
                elif role == 'ASSISTANT':
                    # Check for Reasoning (Thinking)
                    reasoning = msg.get('reasoning_content')
                    if reasoning:
                        print(f"\nüß† [ASSISTANT - THOUGHT]:")
                        print(f"   {reasoning}")

                    # Check for Tool Calls (Old & New format)
                    if 'function_call' in msg:
                        func = msg['function_call']
                        print(f"\nüõ†Ô∏è  [ASSISTANT - CALLING TOOL]:")
                        print(f"   Name: {func.get('name')}")
                        print(f"   Args: {func.get('arguments')}")
                    elif 'tool_calls' in msg:
                        for tool in msg['tool_calls']:
                            func = tool['function']
                            print(f"\nüõ†Ô∏è  [ASSISTANT - CALLING TOOL]:")
                            print(f"   Name: {func.get('name')}")
                            print(f"   Args: {func.get('arguments')}")
                    
                    # Standard Text Response
                    if content:
                        print(f"\nü§ñ [ASSISTANT]:")
                        print(f"   {content}")

                # --- 3. TOOL OUTPUTS (Function Results) ---
                elif role in ['FUNCTION', 'TOOL']:
                    name = msg.get('name', 'Unknown Tool')
                    print(f"\nüì¶ [TOOL OUTPUT - {name}]:")
                    # Try to pretty print JSON if possible
                    try:
                        parsed = json.loads(content)
                        print(json.dumps(parsed, indent=2))
                    except:
                        print(f"   {content}")
            
            print("\n" + "-"*80 + "\n")

# --- USAGE ---
# Replace with your actual filename
file_name = "ToolUse_onet_10_2tool_1769389186/processed/quality_checked/ToolUse_s2q_onet_10_2tool_1769389186_kimi-k2-thinking_3sanitized_qced_kimi-k2-thinking_2prepared_kimi-k2-thinking_high_pfc_results.jsonl" 

print_trajectories(file_name, num_entries=3)

üìÇ Reading from: ToolUse_onet_10_2tool_1769389186/processed/quality_checked/ToolUse_s2q_onet_10_2tool_1769389186_kimi-k2-thinking_3sanitized_qced_kimi-k2-thinking_2prepared_kimi-k2-thinking_high_pfc_results.jsonl

üÜî ROW: 0 | PROMPT ID: 00000000

üë§ [SYSTEM]:
   # Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "update_knowledge_document_source", "description": "\n    Update the knowledge document source URL with custom config JSON file\n    \n    Args:\n        knowledge_document_path: Path to the JSON config file\n    \n    Returns:\n        str: Confirmation message\n    ", "parameters": {"properties": {"knowledge_document_path": {"description": "", "title": "Knowledge Document Path", "type": "string"}}, "required": ["knowledge_document_path"], "title": "update_knowledge_document_source_Args", "type": "object", "additionalProper