# RotateHDR

In [14]:
from collections import Counter
import random

def paginate_many_to_many(
    subjects,
    hdrs,
    chunk_size,
    repeats_per_pair=1,
    seed=None
):
    """
    Build pages of (subject, hdr) tuples from the full Cartesian pool.
    
    Constraints:
      - No subject repeats within a page.
      - Minimize HDR repetition within each page; balance HDR usage globally.
    
    Args:
      subjects: list of subject ids (e.g., [1..20])
      hdrs: list of hdr names
      chunk_size: number of items per page (must be <= len(subjects))
      repeats_per_pair: how many times to include each (subject, hdr) pair in the total pool
      seed: int, for reproducibility
    
    Returns:
      pages: list[list[tuple]] where each inner list is a page of (subject, hdr)
    """
    assert chunk_size >= 1, "chunk_size must be >= 1"
    assert chunk_size <= len(subjects), "chunk_size cannot exceed #subjects (no subject repeat per page)"

    if seed is not None:
        random.seed(seed)

    # 1) Build the many-to-many pool (Cartesian product) with optional repeats.
    pool = []
    for _ in range(repeats_per_pair):
        for s in subjects:
            for h in hdrs:
                pool.append((s, h))

    # Shuffle to avoid bias
    random.shuffle(pool)

    pages = []
    global_hdr_counts = Counter()

    # 2) Greedily fill pages under constraints
    while pool:
        page = []
        page_subjects = set()
        page_hdr_counts = Counter()

        # Fill up to chunk_size items
        while len(page) < chunk_size:
            # Candidates = any pair whose subject not already on the page
            candidates_idx = [i for i, (s, h) in enumerate(pool) if s not in page_subjects]
            if not candidates_idx:
                break  # can't add more to this page; move on

            # Score candidates:
            #  - Prefer HDR not used in this page (lower page_hdr_counts)
            #  - Then lower global usage (global_hdr_counts)
            #  - Tiny random jitter to break ties fairly
            best_i = None
            best_key = None
            for i in candidates_idx:
                s, h = pool[i]
                key = (page_hdr_counts[h], global_hdr_counts[h])
                if best_key is None or key < best_key or (key == best_key and random.random() < 0.5):
                    best_key = key
                    best_i = i

            # Place the chosen pair
            s, h = pool.pop(best_i)
            page.append((s, h))
            page_subjects.add(s)
            page_hdr_counts[h] += 1
            global_hdr_counts[h] += 1

            if not pool:
                break

        if page:
            pages.append(page)
        else:
            # Safety: if we couldn't place anything (shouldn't happen), break to avoid infinite loop
            break

    return pages

# -----------------------
# Example usage:
if __name__ == "__main__":
    sj_id = list(range(1, 21))
    hdr_set = "064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K".split("#")

    pages = paginate_many_to_many(
        subjects=sj_id,
        hdrs=hdr_set,
        chunk_size=10,        # items per page
        repeats_per_pair=1,  # each (subject, hdr) appears once in the full pool
        seed=42
    )

    # Print a compact summary
    # from collections import Counter
    # for pi, page in enumerate(pages, 1):
    #     hdrs_in_page = [h for _, h in page]
    #     print(f"Page {pi} (n={len(page)}):", page)
    #     print("  HDR counts in page:", dict(Counter(hdrs_in_page)))
    #     print("-" * 60)
    
    if len(pages[-2]) + len(pages[-1]) <= 10:
        # Merge last two pages if combined size <= chunk_size
        pages[-2].extend(pages[-1])
        pages.pop()


    # Print a compact summary
    from collections import Counter
    for pi, page in enumerate(pages, 1):
        hdrs_in_page = [h for _, h in page]
        print(f"Page {pi} (n={len(page)}):", page)
        print("  HDR counts in page:", dict(Counter(hdrs_in_page)))
        print("-" * 60)
        

    # Save to CSV for MTurk
    import pandas as pd
    out = {'sj_name': [], 'hdr_name': []}
    for page in pages:
        sj_tmp = []
        hdr_tmp = []
        for s, h in page:
            sj_tmp.append(f"pair{s}")
            hdr_tmp.append(h)
        out['sj_name'].append("#".join(sj_tmp))
        out['hdr_name'].append("#".join(hdr_tmp))
    pd.DataFrame(out).to_csv('./rotateHDR.csv', index=False, columns=['sj_name', 'hdr_name'])

Page 1 (n=10): [(14, '125_hdrmaps_com_free_2K'), (16, '117_hdrmaps_com_free_2K'), (6, '064_hdrmaps_com_free_2K'), (5, '117_hdrmaps_com_free_2K'), (3, '125_hdrmaps_com_free_2K'), (18, '064_hdrmaps_com_free_2K'), (1, '125_hdrmaps_com_free_2K'), (19, '117_hdrmaps_com_free_2K'), (20, '064_hdrmaps_com_free_2K'), (15, '125_hdrmaps_com_free_2K')]
  HDR counts in page: {'125_hdrmaps_com_free_2K': 4, '117_hdrmaps_com_free_2K': 3, '064_hdrmaps_com_free_2K': 3}
------------------------------------------------------------
Page 2 (n=10): [(6, '117_hdrmaps_com_free_2K'), (3, '064_hdrmaps_com_free_2K'), (12, '125_hdrmaps_com_free_2K'), (10, '064_hdrmaps_com_free_2K'), (1, '117_hdrmaps_com_free_2K'), (16, '125_hdrmaps_com_free_2K'), (2, '117_hdrmaps_com_free_2K'), (15, '064_hdrmaps_com_free_2K'), (5, '125_hdrmaps_com_free_2K'), (18, '117_hdrmaps_com_free_2K')]
  HDR counts in page: {'117_hdrmaps_com_free_2K': 4, '064_hdrmaps_com_free_2K': 3, '125_hdrmaps_com_free_2K': 3}
------------------------------

# Sanity check

In [18]:
import pandas as pd
df = pd.read_csv('./rotateHDR.csv')
sj_dict = {}
for i, row in df.iterrows():
    print(i, row['sj_name'], row['hdr_name'])
    sj = row['sj_name'].split('#')
    hdr = row['hdr_name'].split('#')
    for j, p in enumerate(sj):
        if p not in sj_dict:
            sj_dict[p] = [hdr[j]]
        else:
            sj_dict[p].append(hdr[j])

for k, v in sj_dict.items():
    print(k, v)
    assert len(v) == len(set(v)), f"Subject {k} has repeated HDRs: {v}"

0 pair14#pair16#pair6#pair5#pair3#pair18#pair1#pair19#pair20#pair15 125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K
1 pair6#pair3#pair12#pair10#pair1#pair16#pair2#pair15#pair5#pair18 117_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K
2 pair5#pair3#pair10#pair14#pair13#pair11#pair7#pair12#pair8#pair17 064_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K#117_hdrmaps_com_free_2K#125_hdrmaps_com_free_2K#064_hdrmaps_com_free_2K
3 pair18#pair15#pair1#pair9#pair20#pair4#pair14#pair17#pair2#pair10 125_hdrma