In [None]:
import datetime

In [17]:
input_folder = '/Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files'  # Update this path
output_folder = '/Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/combined_json_files'

print(datetime.datetime.now())

In [18]:
import json
import os

def combine_judgment_fields(input_file, output_file):
    try:
        # Read the JSON file
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception as e:
        print(f"Error reading file {input_file}: {e}")
        return

    # Ensure data is a list of dictionaries
    if isinstance(data, dict):
        # If the data is a dict, wrap it in a list
        data = [data]
    elif not isinstance(data, list):
        print(f"Error: Expected a list of records in file {input_file}, but got {type(data).__name__}")
        return

    # Create a new list to store the modified data
    modified_data = []

    for record in data:
        if not isinstance(record, dict):
            print(f"Error: Expected a dictionary for each record, but got {type(record).__name__}")
            continue

        try:
            # Create the judgment_fulltext by concatenating judgment_fulltext_p1, judgment_fulltext_p2, etc.
            judgment_fulltext = ""
            nopages = int(record.get("judgment_nopage", 0))
            for i in range(1, nopages + 1):
                key = f"judgment_fulltext_p{i}"
                if key in record:
                    judgment_fulltext += record[key]
                else:
                    print(f"Warning: Key '{key}' not found in record from file {input_file}")

            # Create a new dictionary with the required fields
            new_record = {
                "case_id": record.get("case_id"),
                "basis_law": record.get("basis_law"),
                "red_case_no": record.get("red_case_no"),
                "category": record.get("category"),
                "summary": record.get("summary"),
                "judgment_fulltext": judgment_fulltext,
            }

            # Add the new record to the modified data list
            modified_data.append(new_record)
        except Exception as e:
            print(f"Error processing record in file {input_file}: {e}")

    # Write the modified data to the output JSON file
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(modified_data, f, indent=4, ensure_ascii=False)
    except Exception as e:
        print(f"Error writing to file {output_file}: {e}")

def process_folder(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    if not os.path.exists(input_folder):
        print(f"Error: Input folder '{input_folder}' does not exist.")
        return

    for filename in os.listdir(input_folder):
        if filename.endswith(".json"):
            input_file = os.path.join(input_folder, filename)
            output_file = os.path.join(output_folder, filename)
            print(f"Processing file: {input_file}")
            combine_judgment_fields(input_file, output_file)
        else:
            print(f"Skipping non-JSON file: {filename}")

# Usage example
#input_folder = "input_folder"
#output_folder = "output_folder"
process_folder(input_folder, output_folder)

print(datetime.datetime.now())

Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/7426 -7427_2557.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/5055_2559.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/6450_2558.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/6101_2564.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/7581_2561.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/5400_2560.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/8155_2555.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/5211_2559.json
Processing file: /Users/ekkalukw/data-src/kdai-llm-final-20241007/case csv/output_json_files/5178_2563.json
