<a href="https://www.kaggle.com/code/ryancardwell/uberorcav2-1?scriptVersionId=271380006" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# UbeOrca v2 — Bi‑Hemispheric Hybrid ARC Solver
Kaggle‑friendly, dependency‑light pipeline with retrieval, tiny per‑task adaptation, DSL search, and robust submission writing.

In [1]:

import os, sys, json, time, math, hashlib, random, itertools
from pathlib import Path
from collections import deque, Counter
import numpy as np
try:
    import torch, torch.nn as nn, torch.nn.functional as F
    TORCH_OK=True
except Exception:
    TORCH_OK=False

random.seed(42); np.random.seed(42)
CFG = {"BEAM_WIDTH":10,"BEAM_DEPTH":2,"MAX_PERIOD":8,"VETO_THRESH":0.12,"FALLBACK_COLOR":0,
       "IMAML_STEPS":5,"IMAML_LR":0.15,"IMAML_HIDDEN":24,"RETR_TOPK":3,"RETR_MIN_SIM":0.35,
       "DSL_BRANCH":6,"DSL_MAX_DEPTH":2,"TIME_BUDGET_S":36000}
print("[INFO] CFG loaded, TORCH_OK =", TORCH_OK)


[INFO] CFG loaded, TORCH_OK = True


In [2]:

ROOT = Path("./artifacts_localhub"); ROOT.mkdir(exist_ok=True, parents=True)
MEM = {"short": ROOT/"short_mem.json", "mid": ROOT/"mid_mem.json", "long": ROOT/"long_mem.json"}
def _mem_load(path):
    try:
        with open(path,"r") as f: return json.load(f)
    except Exception: return {}
def _mem_save(path, obj):
    tmp=str(path)+".tmp"
    with open(tmp,"w") as f: json.dump(obj,f)
    os.replace(tmp, path)
def mem_get(scope, key, default=None):
    M=_mem_load(MEM[scope]); return M.get(key, default)
def mem_put(scope, key, val):
    M=_mem_load(MEM[scope]); M[key]=val; _mem_save(MEM[scope], M)


In [3]:

def clamp_color(x):
    try:
        xi=int(x); 
        return xi if 0<=xi<=9 else 0
    except: return 0
def grid_size(G):
    H=len(G) if G else 1
    W=max((len(r) for r in G if r), default=1)
    return max(1,H), max(1,W)
def validate_grid(G):
    H,W=grid_size(G)
    if H*W<=0: return [[0]]
    return [[clamp_color(x) for x in (r if r else [0]*W)] for r in (G if G else [[0]*W])]
def force_shape(G,H,W,fill=0):
    H,W=max(1,int(H)), max(1,int(W))
    out=[]
    for r in range(H):
        if r<len(G) and G[r]:
            row=[clamp_color(G[r][c] if c<len(G[r]) else fill) for c in range(W)]
        else:
            row=[fill]*W
        out.append(row)
    return out
def grid_score(A,B):
    Ha,Wa=grid_size(A); Hb,Wb=grid_size(B)
    if (Ha,Wa)!=(Hb,Wb): B=force_shape(B,Ha,Wa,fill=0)
    tot=Ha*Wa
    hit=sum(1 for r in range(Ha) for c in range(Wa) if A[r][c]==B[r][c])
    return hit/max(1,tot)


In [4]:

def emb_grid(G):
    H,W=grid_size(G)
    flat=[clamp_color(x) for r in G for x in r]
    hist=(np.bincount(flat, minlength=10)/max(1,len(flat))).astype(float)
    arr=np.array(G, dtype=float); m=arr.mean() if arr.size else 0.0; v=arr.var() if arr.size else 0.0
    mom=np.array([H/30.0, W/30.0, m/9.0, v/81.0], dtype=float)
    return np.hstack([hist, mom])
def cosine(a,b):
    na=np.linalg.norm(a); nb=np.linalg.norm(b)
    if na==0 or nb==0: return 0.0
    return float(np.dot(a,b)/(na*nb))
def build_retr_db(TRAIN):
    cached=mem_get("mid","retr_db",None)
    if cached is not None:
        return [(np.array(ei),np.array(eo),p) for (ei,eo,p) in cached]
    db=[]
    for tid,task in TRAIN.items():
        for pair in task.get("train", []):
            ei=emb_grid(pair["input"]).tolist(); eo=emb_grid(pair["output"]).tolist()
            db.append((ei,eo,pair))
    mem_put("mid","retr_db", db)
    return [(np.array(ei),np.array(eo),p) for (ei,eo,p) in db]
def retr_query(db, test_in, topk=3, min_sim=0.35):
    q=emb_grid(test_in); scored=[]
    for ei,eo,p in db:
        s=cosine(q, ei)
        if s>=min_sim: scored.append((s,p))
    scored.sort(reverse=True, key=lambda x:x[0])
    return [p for _,p in scored[:topk]]


In [5]:

def flip_h(G): return [list(reversed(r)) for r in G]
def flip_v(G): return list(reversed(G))
def rotate90(G):
    H,W=grid_size(G)
    return [[ G[H-1-r][c] for r in range(H)] for c in range(W)]
def estimate_period(G, axis=0, max_period=8):
    H,W=grid_size(G)
    if axis==0:
        for p in range(1, min(H,max_period)+1):
            if all(G[r][c]==G[r%p][c] for r in range(H) for c in range(W)): return p
    else:
        for p in range(1, min(W,max_period)+1):
            if all(G[r][c]==G[r][c%p] for r in range(H) for c in range(W)): return p
    return None
def periodic_tile(G, Ht=None, Wt=None, max_period=8, hint=None):
    Hin,Win=grid_size(G); Ht=Ht or Hin; Wt=Wt or Win
    pv=estimate_period(G,0,max_period) or min(Hin,4)
    ph=estimate_period(G,1,max_period) or min(Win,4)
    if hint:
        pv=max(1, min(int(hint.get("h", pv)), Hin))
        ph=max(1, min(int(hint.get("w", ph)), Win))
    base=[row[:ph] for row in G[:pv]]
    if not base or not base[0]: base=[[0]]
    return [[ base[r%pv][c%ph] for c in range(Wt) ] for r in range(Ht)]
DSL_OPS=[
    ("id", lambda g:g),
    ("flip_h", flip_h),
    ("flip_v", flip_v),
    ("rot90", rotate90),
    ("tile", lambda g: periodic_tile(g,*grid_size(g),max_period=8)),
    ("tile22", lambda g: periodic_tile(g,*grid_size(g),max_period=8,hint={"h":2,"w":2})),
    ("tile33", lambda g: periodic_tile(g,*grid_size(g),max_period=8,hint={"h":3,"w":3})),
]
def dsl_enumerate(initial, target_like, depth=2, branch=6):
    beam=[(0.0, validate_grid(initial), [])]
    for d in range(depth):
        cand=[]
        for score,g,prog in beam:
            for name,op in DSL_OPS[:branch]:
                try:
                    gg=validate_grid(op(g))
                    s=grid_score(gg, target_like)
                    cand.append((s, gg, prog+[name]))
                except: continue
        cand.sort(reverse=True, key=lambda x:x[0])
        beam=cand[:max(1,branch)]
    return max(beam, key=lambda x:x[0])


In [6]:

def imaml_adapt_np(train_pairs, test_in):
    if not train_pairs: return test_in
    out=np.array(test_in, dtype=int)
    from collections import Counter
    in_flat=[x for p in train_pairs for x in (c for r in p["input"] for c in r)]
    out_flat=[x for p in train_pairs for x in (c for r in p["output"] for c in r)]
    if not in_flat or not out_flat: return test_in
    cin=Counter(in_flat).most_common(1)[0][0]
    cout=Counter(out_flat).most_common(1)[0][0]
    out[out==cin]=cout
    return out.tolist()

if TORCH_OK:
    class MicroHead(nn.Module):
        def __init__(self, h=24):
            super().__init__()
            self.conv1=nn.Conv2d(10,h,1); self.conv2=nn.Conv2d(h,10,1)
        def forward(self,x):
            return self.conv2(F.relu(self.conv1(x)))
    def one_hot_grid(G):
        H,W=grid_size(G); x=torch.zeros(10,H,W)
        for r in range(H):
            for c in range(W):
                x[int(G[r][c])][r][c]=1.0
        return x
    def imaml_adapt_torch(train_pairs, test_in, steps=5, lr=0.15, hidden=24):
        if not train_pairs: return test_in
        H,W=grid_size(test_in)
        head=MicroHead(h=hidden)
        opt=torch.optim.SGD(head.parameters(), lr=lr); head.train()
        Xs, Ys = [], []
        for p in train_pairs:
            A,B=p["input"], p["output"]
            Ha,Wa=grid_size(A); Hb,Wb=grid_size(B)
            if (Ha,Wa)!=(Hb,Wb): B=force_shape(B,Ha,Wa,fill=0)
            Xs.append(one_hot_grid(A)); Ys.append(torch.tensor(B).long())
        if not Xs: return test_in
        X=torch.stack(Xs) # [N,10,H,W]
        Y=torch.stack(Ys) # [N,H,W]
        for _ in range(max(1,steps)):
            opt.zero_grad()
            logits=head(X)
            loss=F.cross_entropy(logits, Y)
            loss.backward(); opt.step()
        head.eval()
        Xin=one_hot_grid(test_in).unsqueeze(0)
        with torch.no_grad():
            out=head(Xin).argmax(1)[0].numpy().tolist()
        return out
else:
    def imaml_adapt_torch(train_pairs, test_in, **kw):
        return imaml_adapt_np(train_pairs, test_in)


In [7]:

def smart_veto(G, thresh=0.12, fallback=0):
    H,W=grid_size(G)
    flat=[x for r in G for x in r]
    dom=max((flat.count(c) for c in range(10)), default=0)/max(1,len(flat))
    uniq=len(set(flat))/10.0
    good=(1.0-dom)*0.75+0.25*uniq
    if good < thresh: return [[fallback]*W for _ in range(H)]
    return G

def left_induction(train_pairs, test_in, cfg=CFG):
    try:
        return imaml_adapt_torch(train_pairs, test_in, steps=cfg["IMAML_STEPS"], lr=cfg["IMAML_LR"], hidden=cfg["IMAML_HIDDEN"])
    except Exception:
        return imaml_adapt_np(train_pairs, test_in)

def right_search(test_in, like, cfg=CFG):
    score, grid, prog = dsl_enumerate(
        initial=test_in, target_like=like,
        depth=cfg["DSL_MAX_DEPTH"], branch=cfg["DSL_BRANCH"]
    )
    return grid, prog, score

def hybrid_solver(task, retr_db=None, cfg=CFG):
    test_in=(task.get("test") or [{"input":[[0]]}])[0]["input"]
    like=force_shape(test_in, *grid_size(test_in))
    train=task.get("train", [])
    aug_pairs=list(train) + (retr_query(retr_db, test_in, topk=cfg["RETR_TOPK"], min_sim=cfg["RETR_MIN_SIM"]) if retr_db else [])
    L=left_induction(aug_pairs, test_in, cfg)
    R, prog, s = right_search(test_in, like, cfg)
    pick = L if grid_score(L, like) >= grid_score(R, like) else R
    pick = force_shape(validate_grid(pick), *grid_size(like))
    pick = smart_veto(pick, thresh=cfg["VETO_THRESH"], fallback=cfg["FALLBACK_COLOR"])
    return pick


In [8]:

def load_arc_dict(path):
    with open(path,"r") as f: return json.load(f)
def normalize_tasks(raw):
    tasks={}
    for tid,obj in raw.items():
        train=obj.get("train",[]); test=obj.get("test",[])
        tasks[tid]={"train":train,"test":test}
    return tasks
def resolve_paths(root="/kaggle/input/arc-prize-2025"):
    root=Path(root)
    return {
        "train_ch": root/"arc-agi_training_challenges.json",
        "train_sol": root/"arc-agi_training_solutions.json",
        "eval_ch": root/"arc-agi_evaluation_challenges.json",
        "eval_sol": root/"arc-agi_evaluation_solutions.json",
        "test_ch": root/"arc-agi_test_challenges.json",
        "sample": root/"sample_submission.json",
    }
def load_all(root="/kaggle/input/arc-prize-2025"):
    P=resolve_paths(root)
    TRAIN=normalize_tasks(load_arc_dict(P["train_ch"]))
    EVAL =normalize_tasks(load_arc_dict(P["eval_ch"]))
    TEST =normalize_tasks(load_arc_dict(P["test_ch"]))
    return TRAIN,EVAL,TEST


In [9]:

def canonical_ids(D): return sorted(D.keys())
def write_submission_list(sub_list, path="submission.json"):
    tmp=path+".tmp"
    with open(tmp,"w") as f:
        json.dump(sub_list,f, ensure_ascii=False, separators=(",",":"))
        f.flush(); os.fsync(f.fileno())
    os.replace(tmp, path); return path
def build_and_save_submission(TEST, solver_fn, path="submission.json"):
    order=canonical_ids(TEST); preds={}
    for i,tid in enumerate(order,1):
        task=TEST[tid]
        try:
            out=solver_fn(task)
        except Exception as e:
            like=(task.get("test") or [{"input":[[0]]}])[0]["input"]
            H,W=grid_size(like); out=[[0]*W for _ in range(H)]
        preds[tid]=force_shape(validate_grid(out), *grid_size(out))
        if i % 24 == 0: print(f"[RUN] {i}/{len(order)}", file=sys.stderr)
    sub_list=[{"output": preds[tid]} for tid in order]
    path=write_submission_list(sub_list, path)
    print(f"[SUBMISSION] wrote {path} ({len(sub_list)} items)")
    return path, order, sub_list


In [10]:

def run_e2e(root="/kaggle/input/arc-prize-2025", out_path="submission.json"):
    TRAIN, EVAL, TEST = load_all(root)
    retr_db = build_retr_db(TRAIN)
    def solver_with_retr(task):
        return hybrid_solver(task, retr_db=retr_db)
    path, order, sub = build_and_save_submission(TEST, solver_with_retr, path=out_path)
    ok=True
    for tid,obj in zip(order, sub):
        G=obj["output"]; H,W=grid_size(G); flat=[x for r in G for x in r]
        if not (H>0 and W>0 and all(0<=int(x)<=9 for x in flat)):
            ok=False; print("[CHECK] invalid grid for", tid); break
    print("[CHECK] submission validity:", ok)
    return path, ok

print("[INFO] Cells ready. Call run_e2e('/kaggle/input/arc-prize-2025','submission.json')")


[INFO] Cells ready. Call run_e2e('/kaggle/input/arc-prize-2025','submission.json')
