In [1]:
import os
import json
from datetime import datetime
import random

# Function to convert a timestamp to the required format
def timestamp_to_str(timestamp):
    if timestamp is None:
        return 'None'
    return datetime.utcfromtimestamp(timestamp).strftime('%y-%m-%d %H:%M')

In [2]:
with open('../data/conversations.json', 'r') as f:
    data = json.load(f)

# Find small conversations
small_conversations = []
for conversation in data:
    if len(conversation['mapping'].keys()) < 5:
        small_conversations.append(conversation)

# Print the number of small conversations and total conversations
print('Number of small conversations: ', len(small_conversations))
print('Total conversations: ', len(data))
print('-' * 50)

# Select 3 random conversations from the list of small conversations
random_conversations = random.sample(small_conversations, k=3)

# Print the titles of the selected conversations
for conversation in random_conversations:
    print(conversation['title'])

Number of small conversations:  119
Total conversations:  628
--------------------------------------------------
Create ASCII 'A' Middle
Equipment leasing practice
NLP Skills and Tasks


In [3]:
def print_tree(mapping, node_id, level=0, fork_point=None):
    """Recursively prints the tree of nodes."""
    indent = '    ' * level
    node = mapping[node_id]
    message = node.get('message', None)
    
    # Get role
    role = message['author']['role'] if message and message.get('author') else 'unknown'
    if not message:
        role = 'root'

    # Print node details
    created = message['create_time'] if message else None
    updated = message['update_time'] if message else None
    print(f"{indent}{role} | {timestamp_to_str(created)} | {timestamp_to_str(updated)}: {node_id}")
    
    if role == 'root':
        print(f"{indent}{message if message else node}")

    # Print fork point details
    if fork_point:
        fork_msg = mapping[fork_point].get('message', None)
        fork_role = fork_msg['author']['role'] if fork_msg and fork_msg.get('author') else 'unknown'
        fork_content_type = fork_msg['content']['content_type'] if fork_msg and fork_msg.get('content') else fork_msg
        fork_created = fork_msg['create_time'] if fork_msg else 'None'
        print(f"{indent}Fork Point is: {fork_point} | {fork_role} | {fork_content_type} | {timestamp_to_str(fork_created)}")

    # If node has multiple children, update the fork point
    if len(node['children']) > 1:
        fork_msg = mapping[node_id].get('message', None)
        fork_role = fork_msg['author']['role'] if fork_msg and fork_msg.get('author') else 'unknown'
        fork_content_type = fork_msg['content']['content_type'] if fork_msg and fork_msg.get('content') else fork_msg
        fork_created = fork_msg['create_time'] if fork_msg else 'None'
        print(f"{indent}Updated Fork Point to: {node_id} | {fork_role} | {fork_content_type} | {timestamp_to_str(fork_created)}")
        fork_point = node_id

    # Recursively call print_tree on each child
    for child_id in node['children']:
        print_tree(mapping, child_id, level + 1, fork_point)

In [4]:

def examine_small_conversations(conversations):
    for convo in conversations:
        print(f"Conversation: {convo['title']}")
        print('-' * 100)

        mapping = convo['mapping']
        system_message_nodes = [node_id for node_id, node_data in mapping.items() if node_data['message'] and node_data['message']['author']['role'] == 'system']
        root_nodes = [node_id for node_id, node_data in mapping.items() if not node_data['parent']]

        if not system_message_nodes or not root_nodes:
            continue

        print(f"Number of system message nodes: {len(system_message_nodes)} | {mapping[system_message_nodes[0]]}")
        print(f"Number of root nodes: {len(root_nodes)} | {mapping[root_nodes[0]]}")
        print('=' * 100)

        root_children = mapping[root_nodes[0]]['children']
        print(f"Root node children: {root_children}")
        print(f"System message node id: {system_message_nodes[0]}")

        if system_message_nodes[0] == root_children[0]:
            print(f"First edge: (Root: {root_nodes[0]}) --> (System: {system_message_nodes[0]})")
        else:
            print(f"First edge: (Root: {root_nodes[0]}) -X-> (System: {system_message_nodes[0]})")
            print(mapping)
            continue

        # Print tree format for each child of the system node
        for child_id in mapping[system_message_nodes[0]]['children']:
            print_tree(mapping, child_id)

        print('=' * 100)
        print('\n')


examine_small_conversations(small_conversations[:3])


Conversation: Python's Data Serpent
----------------------------------------------------------------------------------------------------
Number of system message nodes: 1 | {'id': '6e53fb2e-3129-4643-9686-a8068e0a17a2', 'message': {'id': '6e53fb2e-3129-4643-9686-a8068e0a17a2', 'author': {'role': 'system', 'name': None, 'metadata': {}}, 'create_time': 1689127254.916375, 'update_time': None, 'content': {'content_type': 'text', 'parts': ['']}, 'status': 'finished_successfully', 'end_turn': True, 'weight': 1.0, 'metadata': {}, 'recipient': 'all'}, 'parent': 'aaa123b7-deec-411a-a943-961fcf81b338', 'children': ['aaa2b441-f0ed-49a2-b7ab-cf2b2d144531']}
Number of root nodes: 1 | {'id': 'aaa123b7-deec-411a-a943-961fcf81b338', 'message': None, 'parent': None, 'children': ['6e53fb2e-3129-4643-9686-a8068e0a17a2']}
Root node children: ['6e53fb2e-3129-4643-9686-a8068e0a17a2']
System message node id: 6e53fb2e-3129-4643-9686-a8068e0a17a2
First edge: (Root: aaa123b7-deec-411a-a943-961fcf81b338) --> (Sy

In [5]:
def print_conversation_structure(convo):
    print("=" * 40)
    print(f"Conversation: {convo['title']} | {convo['id']}")
    print("=" * 40)

    root_node_id = next((node_id for node_id, node_data in convo['mapping'].items() if not node_data['parent']), None)
    if not root_node_id:
        return

    system_message_node_id = next((node_id for node_id, node_data in convo['mapping'].items() if node_data['message'] and node_data['message']['author']['role'] == 'system'), None)

    print(f"Root Node: {root_node_id}")
    if system_message_node_id:
        system_time = timestamp_to_str(convo['mapping'][system_message_node_id]['message']['create_time'])
        print(f"System Node: {system_message_node_id} ({system_time})")

        for child_id in convo['mapping'][system_message_node_id]['children']:
            child_time = timestamp_to_str(convo['mapping'][child_id]['message']['create_time'])
            role = convo['mapping'][child_id]['message']['author']['role']
            print(f"  └── {role.capitalize()}: {child_id} ({child_time})")
            # Note: For simplicity, we're not going deeper than this level in the tree for this output.
    print("=" * 40)
    print()