In [None]:
import json
import gzip
import os

def chunk_tree(input_file, output_dir, max_nodes_per_chunk=100000):
    os.makedirs(output_dir, exist_ok=True)

    with gzip.open(input_file, 'rt', encoding='utf-8') as f:
        tree = json.load(f)

    current_id = 0
    chunk_index = 0
    current_chunk = None
    nodes_in_current_chunk = 0

    def process_node(node):
        nonlocal current_id, chunk_index, current_chunk, nodes_in_current_chunk
        current_id += 1
        node_id = current_id

        processed_node = {
            'id': node_id,
            'value': node['value'],
            'prob': node['prob'],
            'total_prob': node.get('total_prob'),
            'depth': node.get('depth'),
            'response_distribution': node.get('response_distribution'),
            'next': {}
        }

        if current_chunk is None:
            current_chunk = processed_node
            nodes_in_current_chunk = 1
        elif nodes_in_current_chunk >= max_nodes_per_chunk:
            # Start a new chunk
            save_chunk(current_chunk, chunk_index)
            chunk_index += 1
            current_chunk = processed_node
            nodes_in_current_chunk = 1
            return {'id': node_id, 'chunk': chunk_index}
        else:
            nodes_in_current_chunk += 1

        for key, child in node.get('next', {}).items():
            processed_node['next'][key] = process_node(child)

        return processed_node

    def save_chunk(chunk, index):
        filename = f'tree-{index}.json.gz'
        with gzip.open(os.path.join(output_dir, filename), 'wt', encoding='utf-8') as f:
            json.dump(chunk, f)

    root_chunk = process_node(tree)
    save_chunk(root_chunk, 0)

    print(f"Tree chunked and saved in {output_dir}")

# Usage
chunk_tree('static/tree.json.gz', 'static/chunked_tree')