In [1]:
%load_ext autoreload
%autoreload 2

In [29]:
from copy import deepcopy
from dataclasses import dataclass
import deepcompare
import json
import os
from pathlib import Path
from pprint import pprint
import re
import requests
import sys
from typing import Optional

if '..' not in sys.path: sys.path.append('..')

from sagi.config.main import get_main_config, MainConfig

In [26]:
cfg = get_main_config()
pprint(cfg.model_dump())

{'data': {'arc_v1_data_dir_path': PosixPath('/Users/misha/data/arc-v1'),
          'arc_v1_repo_dir_path': PosixPath('/Users/misha/prog/arc-agi'),
          'arc_v2_data_dir_path': PosixPath('/Users/misha/data/arc-v2'),
          'arc_v2_repo_dir_path': PosixPath('/Users/misha/prog/arc-agi-2'),
          'root_dir_path': PosixPath('/Users/misha/data')}}


In [13]:
def read_repo_data(repo_dir_path: Path) -> dict:
    def read_files(dpath: Path) -> dict:
        data = {}
        for fpath in dpath.iterdir():
            content = json.loads(fpath.read_text())
            data[fpath.stem] = content
        return data

    data = {}
    for split in ['training', 'evaluation']:
        split_dpath = repo_dir_path / 'data' / split
        split_data = read_files(split_dpath)
        data[split] = split_data
    return data

repo_data_v1 = read_repo_data(cfg.data.arc_v1_repo_dir_path)
repo_data_v2 = read_repo_data(cfg.data.arc_v2_repo_dir_path)

In [18]:
def read_down_data(data_dir_path: Path) -> dict:
    data = {}
    for fpath in data_dir_path.iterdir():
        if not fpath.name.startswith('arc-agi_'):
            continue
        content = json.loads(fpath.read_text())
        key = fpath.stem[8:]  # remove 'arc-agi_' prefix
        data[key] = content
    return data

down_data_v1 = read_down_data(cfg.data.arc_v1_data_dir_path)
down_data_v2 = read_down_data(cfg.data.arc_v2_data_dir_path)

In [41]:
def compare(repo_data: dict, down_data: dict, split: str) -> None:
    repo = repo_data[split]
    down_challenges = down_data[f'{split}_challenges']
    down_solutions = down_data[f'{split}_solutions']
    assert repo.keys() == down_challenges.keys() == down_solutions.keys()
    for key in repo.keys():
        repo_challenge = deepcopy(repo[key])
        down_challenge = deepcopy(down_challenges[key])
        down_solution = deepcopy(down_solutions[key])
        for i, test in enumerate(down_challenge['test']):
            test['output'] = down_solution[i]
        if 'name' in repo_challenge:
            del repo_challenge['name']
        if 'name' in down_challenge:
            del down_challenge['name']
        if not deepcompare.compare(repo_challenge, down_challenge, strict=True):
            pprint(repo_challenge)
            pprint(down_challenge)
            break

In [43]:
compare(repo_data_v1, down_data_v1, 'training')
compare(repo_data_v1, down_data_v1, 'evaluation')

In [45]:
compare(repo_data_v2, down_data_v2, 'training')
# compare(repo_data_v2, down_data_v2, 'evaluation')