In [13]:
import json
from collections import Counter

## Disambiguation 


### Converting first output to JSONL

In [8]:
def parse_custom_json(file_path):
    with open(file_path, 'r') as f:
        content = f.read()

    # Fix it: wrap in list and separate blocks with commas
    fixed_content = "[" + content.replace("}{", "},{") + "]"

    return json.loads(fixed_content)

def write_jsonl_from_parsed_list(parsed_list, output_path):
    with open(output_path, 'w') as f:
        for item in parsed_list:
            json.dump(item, f)
            f.write("\n")

In [None]:
result_file_path = './disambiguation_results.json' 

# Number of conflicts processed 
parsed_results = parse_custom_json(result_file_path)

# Write to clean JSONL format
write_jsonl_from_parsed_list(parsed_results, "disambiguation_results.jsonl")

### Analysis

In [12]:
def count_verdicts(jsonl_path):
    verdict_counter = Counter()

    with open(jsonl_path, 'r') as f:
        for line in f:
            if not line.strip():
                continue
            try:
                entry = json.loads(line)
                _, value = next(iter(entry.items()))

                if value is not None:
                    verdict = value.get("verdict")
                    verdict_counter[verdict] += 1
            except Exception as e:
                print(f"Error parsing line:\n{line[:100]}...\n{e}")

    return verdict_counter

In [21]:
result_file_path = './disambiguation_results.jsonl' 
verdicts = count_verdicts(result_file_path)

print("Verdict counts:")
total = 0
for verdict, count in verdicts.items():
    total += count
    print(f"  {verdict}: {count}")

print('--------')
print(f"Total processed conflicts: {total}")

print(f"Percentage of 'Unclear': {verdicts['Unclear'] / total * 100:.2f}%")

Verdict counts:
  Unclear: 5
  No: 28
  Yes: 93
--------
Total processed conflicts: 126
Percentage of 'Unclear': 3.97%
