In [11]:
import json

def extract_assistant_shadow_text(data):
    """
    Extracts text and relevant metadata from a JSON structure containing messages, then
    merges them into a single JSONL row (string).

    Specifically:
    1. Gathers all `text` content from messages where role="user".
    2. Gathers all `text` content from messages where role="assistant" and name="Shadow".
    3. Gathers specific metadata (arguments, function_name, plugin_name) from messages where role="tool" and name="Shadow".

    Args:
        data (dict): The JSON structure containing messages.

    Returns:
        str: A single JSONL row containing all extracted data.
    """

    user_texts = []
    assistant_shadow_texts = []
    tool_shadow_data = []
    token_data = []

    messages = data.get("messages", [])
    for message in messages:
        role = message.get("role")
        name = message.get("name", "")
        finish_reason = message.get("finish_reason", "")
        items = message.get("items", [])

        if role == "user":
            for item in items:
                text = item.get("text")
                if text:
                    user_texts.append(text)

        elif role == "assistant" and name == "Shadow":
            for item in items:
                text = item.get("text")
                if text:
                    assistant_shadow_texts.append(text)
                # 4. If finish_reason == "tool_calls", capture usage info
                if finish_reason == "tool_calls":
                    usage_info = message.get("metadata", {}).get("usage", {})
                    token_data.append(usage_info)

        elif role == "tool" and name == "Shadow":
            for item in items:
                metadata = item.get("metadata", {})
                arguments = metadata.get("arguments")
                function_name = item.get("function_name")
                plugin_name = item.get("plugin_name")
                if arguments and function_name and plugin_name:
                    tool_shadow_data.append({
                        "plugin_name": plugin_name,
                        "function_name": function_name,
                        "arguments": arguments
                    })

    # Combine the three lists into a single JSON object
    combined_data = {
        "user": user_texts,
        "assistant": assistant_shadow_texts,
        "tool_call": tool_shadow_data,
        "usage": token_data,
    }

    # Convert to a JSONL (one JSON object per line) string
    jsonl_row = json.dumps(combined_data)

    return jsonl_row

In [12]:
import json

# Read the JSON file
with open('../messages.json', 'r') as file:
    json_data = json.load(file)

# Call the function
jsonl_row = extract_assistant_shadow_text(json_data)
#tool_json = json.dumps(tool_shadow_data, indent=4)
#print(user, assistant, tool_json)
print(jsonl_row)

{"user": ["Hii shadow.  I have an upcomiing sales call and we are in the discovery phase of the sale.  The call is with Truein.  Can you help me construct a call agenda for this first sales call."], "assistant": ["Creating a structured agenda for your discovery call with Truein can help you effectively guide the conversation, ensuring you gather essential insights while also delivering value to your contact. Here's a suggested agenda tailored for a discovery call with Truein:\n\n### Sales Call Agenda with Truein\n\n1. **Introduction (5 minutes)**\n   - Briefly introduce yourself and your company.\n   - Recap the purpose of the call and what you hope to achieve.\n   - Express enthusiasm and gratitude for the opportunity to speak with them.\n\n2. **Understanding Truein's Background (10 minutes)**\n   - Ask about Truein's current operations, particularly how they manage time and attendance.\n   - Explore their primary customer base and any specific industry challenges they face.\n   - Und