In [1]:
import json, random, pathlib, pandas as pd

DATA = pathlib.Path('HARP.jsonl')
records = [json.loads(l) for l in DATA.open()]

l6 = pd.read_csv('../data/harp_level6.csv')
l6

Unnamed: 0,id,question,solution,answer
0,H6-001,"For $\{1, 2, 3, \ldots, n\}$ and each of its n...","Let $S$ be a non- empty subset of $\{1,2,3,4,5...",$448$
1,H6-002,"In the adjoining figure, two circles with radi...","Firstly, notice that if we reflect $R$ over $P...",$130$
2,H6-003,The adjoining figure shows two intersecting ch...,[asy] size(10cm); import olympiad; pair O = (0...,$175$
3,H6-004,Find the value of $10\cot(\cot^{-1}3+\cot^{-1}...,We know that $\tan(\arctan(x)) = x$ so we can ...,$15$
4,H6-005,What is the largest even integer that cannot b...,Take an even positive integer $x$. $x$ is eith...,$38$
...,...,...,...,...
192,H6-193,Let $ABCD$ be a tetrahedron such that $AB=CD= ...,"Notice that \(41=4^2+5^2\), \(89=5^2+8^2\), an...",$104$
193,H6-194,Let $\mathcal{B}$ be the set of rectangular bo...,"Observe that the ""worst"" possible box is one o...",$721$
194,H6-195,Let $\omega\neq 1$ be a 13th root of unity. Fi...,\[\prod_{k=0}^{12} \left(2- 2\omega^k + \omega...,$321$
195,H6-196,Let \(b\ge 2\) be an integer. Call a positive ...,We write the base-$b$ two-digit integer as $\l...,$211$


In [3]:
import json

def analyze_results(results_file_path):
    """
    Analyzes a results.json file to find problems with exactly five complete solutions.

    A solution is considered "complete" if its 'finish_reason' is not 'length'.

    Args:
        results_file_path (str): The path to the results.json file.

    Returns:
        tuple: A tuple containing:
            - set: A set of problem_ids for problems that meet the criteria.
            - int: The count of such problems.
    """
    target_problem_ids = set()
    count_target_problems = 0

    try:
        with open(results_file_path, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: The file '{results_file_path}' was not found.")
        return target_problem_ids, 0
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from the file '{results_file_path}'.")
        return target_problem_ids, 0
    except Exception as e:
        print(f"An unexpected error occurred while reading the file: {e}")
        return target_problem_ids, 0

    if "results" not in data or not isinstance(data["results"], list):
        print("Error: JSON structure is not as expected. Missing 'results' list.")
        return target_problem_ids, 0

    for problem in data["results"]:
        if not isinstance(problem, dict):
            # Skip if problem is not a dictionary
            continue

        problem_id = problem.get("problem_id")
        solutions = problem.get("iterations")

        if problem_id is None or not isinstance(solutions, list):
            continue

        if len(solutions) == 5:
            count_target_problems += 1
            target_problem_ids.add(problem_id)
        else:
            print(f"Skipping problem {problem_id} with {len(solutions)} solutions")
                
    return target_problem_ids, count_target_problems

file_path = "/sailhome/jshen3/research_projects/reasoning-distillation/results/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6_20250505_004152/results.json"
file_path2 = "/sailhome/jshen3/research_projects/reasoning-distillation/results/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6_20250505_171231/results.json"
file_path3 = "/sailhome/jshen3/research_projects/reasoning-distillation/results/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6_20250508_183631/results.json"

print(f"Analyzing file: {file_path}")
problem_ids_set, num_problems = analyze_results(file_path)
problem_ids_set2, num_problems2 = analyze_results(file_path2)
problem_ids_set3, num_problems3 = analyze_results(file_path3)
# problem_ids_set5, num_problems5 = analyze_results(file_path5)
if num_problems > 0:
    print(f"\nFound {num_problems, num_problems2, num_problems3} problems with exactly 5 complete solutions.")
    print(f"Total problems: {num_problems + num_problems2 + num_problems3}")
    print("Problem IDs:")
    # Print a few example IDs if the set is large, or all if it's small
    ids_to_print = list(problem_ids_set)
    ids_to_print2 = list(problem_ids_set2)
    ids_to_print3 = list(problem_ids_set3)
    if len(ids_to_print) > 10:
        print(ids_to_print[:10], "...")
        print(ids_to_print2[:10], "...")
        print(ids_to_print3[:10], "...")
    else:
        for pid in ids_to_print:
            print(pid)
elif problem_ids_set is not None: # Only print if analyze_results didn't fail early
    print("No problems found matching the criteria.")


Analyzing file: /sailhome/jshen3/research_projects/reasoning-distillation/results/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6/harp_deepseek_qwen_14b_summ_base_sum_4iter_l6_20250505_004152/results.json

Found (153, 34, 9) problems with exactly 5 complete solutions.
Total problems: 196
Problem IDs:
['H6-085', 'H6-093', 'H6-164', 'H6-006', 'H6-138', 'H6-087', 'H6-142', 'H6-127', 'H6-043', 'H6-046'] ...
['H6-161', 'H6-190', 'H6-184', 'H6-172', 'H6-182', 'H6-191', 'H6-153', 'H6-176', 'H6-189', 'H6-166'] ...
['H6-140', 'H6-167', 'H6-163', 'H6-165', 'H6-168', 'H6-193', 'H6-152', 'H6-129', 'H6-146'] ...


In [5]:
completed_problems = set(list(problem_ids_set) + list(problem_ids_set2) + list(problem_ids_set3))
problems_to_complete = set(l6['id'].tolist()) - completed_problems
print("Number of problems to complete: ", len(problems_to_complete))
print(problems_to_complete)

Number of problems to complete:  1
{'H6-141'}
