In [3]:
import json

def convert_option_to_array(input_file, output_file):
    # Read the JSON file
    with open(input_file, 'r') as file:
        data = json.load(file)
    
    # Process each item in the data
    for item in data:
        # Check if 'option' is a string
        if isinstance(item.get('option'), str):
            try:
                # Parse the string as JSON
                parsed_option = json.loads(item['option'])
                
                # Ensure it's converted to a list if it's not already
                item['option'] = parsed_option if isinstance(parsed_option, list) else [parsed_option]
            except json.JSONDecodeError:
                # If parsing fails, keep the original value or set to an empty list
                item['option'] = []
    
    # Write the modified data back to a new JSON file
    with open(output_file, 'w') as file:
        json.dump(data, file, indent=2)
    
    print(f"Converted JSON saved to {output_file}")

# Example usage
input_file = 'data/labeling/mrr-graph.json'
output_file = 'data/labeling/mrr-graph-output.json'


convert_option_to_array(input_file, output_file)

Converted JSON saved to data/labeling/mrr-graph-output.json


In [4]:
import json
import os

def split_json_file(input_file, output_dir='data/labeling/llm', chunk_size=10):
    """
    Split a JSON file into multiple files, each containing 10 elements.
    
    :param input_file: Path to the input JSON file
    :param output_dir: Directory to save split files (default: 'split_json_files')
    :param chunk_size: Number of elements per output file (default: 10)
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Read the input JSON file
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    # Ensure the input is a list
    if not isinstance(data, list):
        raise ValueError("Input JSON must be an array/list")
    
    # Split the list into chunks of 10
    for i in range(0, len(data), chunk_size):
        # Create a chunk of 10 elements (or less for the last chunk)
        chunk = data[i:i+chunk_size]
        
        # Create output filename
        output_file = os.path.join(output_dir, f'llm_{i//chunk_size + 1}.json')
        
        # Write the chunk to a new JSON file
        with open(output_file, 'w') as f:
            json.dump(chunk, f, indent=2)
        
        print(f"Created {output_file} with {len(chunk)} elements")


split_json_file('data/labeling/mrr-llm.json')

Created data/labeling/llm/llm_1.json with 10 elements
Created data/labeling/llm/llm_2.json with 10 elements
Created data/labeling/llm/llm_3.json with 10 elements
Created data/labeling/llm/llm_4.json with 10 elements
Created data/labeling/llm/llm_5.json with 10 elements
Created data/labeling/llm/llm_6.json with 10 elements
Created data/labeling/llm/llm_7.json with 10 elements
Created data/labeling/llm/llm_8.json with 10 elements
Created data/labeling/llm/llm_9.json with 10 elements
Created data/labeling/llm/llm_10.json with 10 elements
Created data/labeling/llm/llm_11.json with 10 elements
Created data/labeling/llm/llm_12.json with 10 elements
Created data/labeling/llm/llm_13.json with 10 elements
Created data/labeling/llm/llm_14.json with 10 elements
Created data/labeling/llm/llm_15.json with 10 elements
Created data/labeling/llm/llm_16.json with 10 elements
Created data/labeling/llm/llm_17.json with 10 elements
Created data/labeling/llm/llm_18.json with 10 elements
Created data/labeli