# Script to annotate AoPS problems with a subject

In [1]:
import sys
from pathlib import Path

BASEDIR = Path("/workspaces/HARP/") / "src"  # Replace with your own basedir path for the repo

sys.path.insert(0, str(BASEDIR))

In [2]:
from __future__ import annotations

import copy
import json
import math
import os
import pickle
import pprint
import re
import time
import traceback
from collections import Counter
from typing import Any

import numpy as np
from IPython.display import Markdown, clear_output, display
from tqdm.auto import tqdm

from eval.utils import read_jsonl, get_uid

In [3]:
OUTPATH = BASEDIR / "data/raw/aops_subject_labels_test.jsonl"  # Replace with your filename to write subject labels to

In [4]:
# Our subject options, with examples of content that would be part of each
SUBJECTS = [
    "prealgebra",  # arithmetic, speed/distance, unit conversion, simple equations
    "algebra",  # arithmetic/geometric sequences, systems of equations, logs, Vieta's
    "counting_and_probability",  # stars and bars, probability, game theory
    "geometry",  # areas, triangles, circles, polygons
    "number_theory",  # digit sums, modulo/remainders, prime numbers
    "precalculus",  # trig, complex numbers, matrices
    "calculus",  # limits, continuity
    "other",
]

In [5]:
dataset = read_jsonl(BASEDIR / "data/processed/aops_wiki_deduped.jsonl")
len(dataset)

5427

In [6]:
# Skip problems that we have already labeled
existing_label_files = [
    OUTPATH,
    # Add any other files below
]
already_labelled_uids = set()
for fname in existing_label_files:
    if not os.path.exists(fname):
        continue
    
    existing = read_jsonl(fname)
    already_labelled_uids |= {l["uid"] for l in existing}

remaining_dataset = [p for p in dataset if get_uid(p) not in already_labelled_uids]
len(remaining_dataset)

5427

In [7]:
def label(problem):
    uid = get_uid(problem)
    print("UID:", uid)
    print("URL:", problem["url"])
    if problem['choices']:
        choice_text = '\n\n'.join(['{}. {}'.format(c, problem['choices'][c]) for c in problem['choices']])
        text = '### Problem\n\n{}\n\n{}\n\n### Solution\n\n{}'.format(problem['problem'], 
                                                            choice_text,
                                                            problem['solution_1'])
    else:
        text = '### Problem\n\n{}\n\n### Solution\n\n{}'.format(problem['problem'], problem['solution_1'])
    text = text.replace('\\(', '$').replace('\\)', '$').replace('\\[', '$$').replace('\\]', '$$')
    display(Markdown(text))
    
    for i, subj in enumerate(SUBJECTS):
        print(f"({i}): {subj}")
    
    subject_num = input("Subject (input a number, or q to quit)> ")
    if subject_num == "q":
        return None
    subject_num = int(subject_num)
    subject = SUBJECTS[subject_num]
    
    notes = input("Any notes, or q to quit? (standard prefixes are \'need choices\', \'parse problem\', \'parse solution\', etc)")
    if notes == "q":
        return None
    
    res = {"uid": uid, "subject": subject, "notes": notes}
    return res

def loop(out_path=OUTPATH, left=0, right=len(remaining_dataset)):
    for i, problem in enumerate(remaining_dataset[left:right], left):
        res = label(problem)
        
        if res is None:
            print(f"Quitting at index {i}")
            break
        
        with open(out_path, "a+") as f:
            f.write(json.dumps(res) + "\n")
        clear_output()

In [8]:
loop(left=0)

UID: 1950/AHSME/1
URL: https://artofproblemsolving.com/wiki/index.php/1950_AHSME_Problems/Problem_1


### Problem

If $64$ is divided into three parts proportional to $2$, $4$, and $6$, the smallest part is:

A. $5\frac{1}{3}$

B. $11$

C. $10\frac{2}{3}$

D. $5$

E. $\text{None of these answers}$

### Solution

If the three numbers are in proportion to $2:4:6$, then they should also be in proportion to $1:2:3$. This implies that the three numbers can be expressed as $x$, $2x$, and $3x$. Add these values together to get: 
$$x+2x+3x=6x=64$$
Divide each side by 6 and get that 
$$x=\frac{64}{6}=\frac{32}{3}=10 \frac{2}{3}$$
which is $\boxed{\textbf{(C)}}$.

(0): prealgebra
(1): algebra
(2): counting_and_probability
(3): geometry
(4): number_theory
(5): precalculus
(6): calculus
(7): other


Subject (input a number, or q to quit)>  q


Quitting at index 0
