In [7]:
import json
from collections import defaultdict

path = "mamba-codestral-7b__ctx131072__rep1.jsonl"


counts = defaultdict(int)
correct = defaultdict(int)

with open(path, "r", encoding="utf-8") as f:
    for line in f:
        x = json.loads(line)
        key = (x["difficulty"], x["length"])
        counts[key] += 1
        if x["judge"] is True:
            correct[key] += 1

# print table
for diff in ["easy", "hard"]:
    for length in ["short", "medium", "long"]:
        k = (diff, length)
        if counts[k] == 0:
            acc = float("nan")
        else:
            acc = correct[k] / counts[k]
        print(f"difficulty={diff:4s} length={length:6s} "
              f"acc={acc:.4f} ({correct[k]}/{counts[k]})")

# overall
total = sum(counts.values())
total_correct = sum(correct.values())
print(f"\nOVERALL acc={total_correct/total:.4f} ({total_correct}/{total})")


difficulty=easy length=short  acc=0.2154 (14/65)
difficulty=easy length=medium acc=0.2115 (22/104)
difficulty=easy length=long   acc=0.1897 (11/58)
difficulty=hard length=short  acc=0.2652 (35/132)
difficulty=hard length=medium acc=0.2349 (35/149)
difficulty=hard length=long   acc=0.2500 (19/76)

OVERALL acc=0.2329 (136/584)


In [8]:
items = []
id_counts = defaultdict(int)

# First pass: read and count _id occurrences
with open(path, "r", encoding="utf-8") as f:
    for line in f:
        obj = json.loads(line)
        items.append(obj)
        if "_id" in obj:
            id_counts[obj["_id"]] += 1

# Second pass: print questions with repeated _id
for obj in items:
    _id = obj.get("_id")
    if _id and id_counts[_id] > 1:
        print(obj.get("_id"))

66fcffd9bb02136c067c94c5
66f36490821e116aacb2cc22
66f94f9ebb02136c067c4fde
6723a1ccbb02136c067d70b3
66ebed525a08c7b9b35e1cb4
670aac92bb02136c067d218a
671b3cabbb02136c067d5252
66fa208bbb02136c067c5fc1
66ec0c4c821e116aacb1994a
66fcffd9bb02136c067c94c5
66f36490821e116aacb2cc22
66f94f9ebb02136c067c4fde
6723a1ccbb02136c067d70b3
66ebed525a08c7b9b35e1cb4
670aac92bb02136c067d218a
66fcffd9bb02136c067c94c5
66f36490821e116aacb2cc22
66f94f9ebb02136c067c4fde
6723a1ccbb02136c067d70b3
66ebed525a08c7b9b35e1cb4
670aac92bb02136c067d218a
671b3cabbb02136c067d5252
66fa208bbb02136c067c5fc1
66ec0c4c821e116aacb1994a
66f920d8bb02136c067c4b81
66ec56dd821e116aacb1cd0e
6725dc28bb02136c067d8555
66ebd55f5a08c7b9b35e0698
670cf1c0bb02136c067d26e5
66ec3fa7821e116aacb1c75d
66ed875e821e116aacb2023e
66f8c9febb02136c067c4511
67239fd9bb02136c067d6ff7
66fcffd9bb02136c067c94c5
66f36490821e116aacb2cc22
66f94f9ebb02136c067c4fde
6723a1ccbb02136c067d70b3
66ebed525a08c7b9b35e1cb4
670aac92bb02136c067d218a
671b3cabbb02136c067d5252
