# Prompt Engineering Bingo Randomizer

This notebook creates multiple randomized versions of a DOCX bingo sheet by shuffling table cell texts. The text content stays the same; only positions inside the table change. It also writes CSV summaries to inspect the randomized layouts.

Place your original DOCX (e.g. "Prompt Engineering Bingo.docx") in the repository root. If not found, the notebook will use the included `test.docx`.

## Setup: imports, configuration, and output directory

- Configure input file, output directory, number of versions, and random seed.
- The seed ensures reproducibility.
- If your file is named differently, adjust `candidate_paths` or set `input_path` directly.

In [1]:
from pathlib import Path
import random
import pandas as pd
from docx import Document

# Configure paths and parameters
root = Path('.')
candidate_paths = [
    root / 'Prompt Engineering Bingo.docx',
    root / 'Prompt.Engineering.Bingo.docx',
    root / 'test.docx',
]
input_path = next((p for p in candidate_paths if p.exists()), None)
if input_path is None:
    raise FileNotFoundError(
        "No input DOCX found. Place 'Prompt Engineering Bingo.docx' in the repository root or use 'test.docx'."
    )

outdir = Path('randomized_bingo')
outdir.mkdir(exist_ok=True)

n_versions = 20  # number of randomized outputs
seed = 42        # change this for different reproducible shuffles
all_tables = False  # set True to randomize all tables; False = only largest table

# Save a small config snapshot
pd.DataFrame([
    {
        'input_path': str(input_path),
        'outdir': str(outdir),
        'n_versions': n_versions,
        'seed': seed,
        'all_tables': all_tables,
    }
]).to_csv(outdir / 'config.csv', index=False)

## Helper functions

- Extract texts from a table.
- Assign texts back to a table.
- Select the largest table in a document.
- Enumerate (row, col) pairs for cells for CSV summaries.

In [2]:
def table_cell_texts(table):
    return [cell.text for row in table.rows for cell in row.cells]

def assign_texts_to_table(table, texts):
    cells = [cell for row in table.rows for cell in row.cells]
    if len(cells) != len(texts):
        raise ValueError(
            f"Number of texts ({len(texts)}) does not match number of cells ({len(cells)})."
        )
    for cell, txt in zip(cells, texts):
        cell.text = txt or ''

def index_of_largest_table(doc):
    if not doc.tables:
        return None
    sizes = [sum(1 for _ in (c for r in t.rows for c in r.cells)) for t in doc.tables]
    return max(range(len(sizes)), key=lambda i: sizes[i])

def row_col_pairs(table):
    pairs = []
    for r_idx, row in enumerate(table.rows):
        for c_idx, _cell in enumerate(row.cells):
            pairs.append((r_idx, c_idx))
    return pairs

## Inspect the input document and save a summary

- Count tables and their sizes (number of cells).
- Identify the largest table and save the original texts for reference.
- Results are saved as CSV in the output directory for inspection.

In [3]:
doc = Document(str(input_path))
n_tables = len(doc.tables)

sizes = [sum(1 for _ in (c for r in t.rows for c in r.cells)) for t in doc.tables] if n_tables > 0 else []
summary_df = pd.DataFrame({
    'table_index': list(range(n_tables)),
    'n_cells': sizes,
})
summary_df.to_csv(outdir / 'tables_summary.csv', index=False)

if n_tables > 0:
    # Save original texts from the largest table for auditing
    largest_idx = index_of_largest_table(doc)
    largest_table = doc.tables[largest_idx]
    orig_pairs = row_col_pairs(largest_table)
    orig_texts = table_cell_texts(largest_table)
    orig_df = pd.DataFrame({
        'row': [p[0] for p in orig_pairs],
        'col': [p[1] for p in orig_pairs],
        'text': orig_texts,
    })
    orig_df.to_csv(outdir / 'original_texts.csv', index=False)

    # Determine which table indices to randomize
    table_indices = list(range(n_tables)) if all_tables else [largest_idx]
else:
    # No tables found: write an empty originals file and do not randomize
    pd.DataFrame(columns=['row', 'col', 'text']).to_csv(outdir / 'original_texts.csv', index=False)
    table_indices = []

pd.DataFrame({'randomized_table_indices': table_indices}).to_csv(outdir / 'randomized_tables.csv', index=False)

## Randomize and export multiple versions

- For each version, shuffle the texts within the selected table(s).
- Save each randomized DOCX to the output directory.
- Save a CSV per version describing the shuffled layout (row, col, text).

In [4]:
rng = random.Random(seed)
base = input_path.stem
saved_files = []

for i in range(1, n_versions + 1):
    d = Document(str(input_path))
    per_table_layouts = []
    for idx in table_indices:
        t = d.tables[idx]
        texts = table_cell_texts(t)
        shuffled = texts[:]
        rng.shuffle(shuffled)
        assign_texts_to_table(t, shuffled)

        pairs = row_col_pairs(t)
        layout_df = pd.DataFrame({
            'version': i,
            'table_index': idx,
            'row': [p[0] for p in pairs],
            'col': [p[1] for p in pairs],
            'text': shuffled,
        })
        layout_df.to_csv(outdir / f'layout_{i:02d}_table_{idx}.csv', index=False)
        per_table_layouts.append(layout_df)

    out_path = outdir / f'{base}_randomized_{i:02d}.docx'
    d.save(out_path)
    saved_files.append(str(out_path))

pd.DataFrame({'file': saved_files}).to_csv(outdir / 'randomized_files.csv', index=False)

## Notes

- To use a different input file, change `candidate_paths` or set `input_path` directly.
- To randomize all tables, set `all_tables = True` in the setup cell.
- To get different random layouts, change the `seed` value or set it to `None` for non-deterministic shuffles.
- Outputs written:
  - DOCX files in `randomized_bingo/`
  - `tables_summary.csv` with table sizes
  - `original_texts.csv` with the original texts of the largest table
  - `randomized_tables.csv` showing which tables were randomized
  - `layout_XX_table_T.csv` for each version/table with row/col/text
  - `randomized_files.csv` listing all created DOCX files