In [1]:
# ---               ---
# --- Repo settings ---
# ---               ---
REPO_URL = "https://github.com/cecat/breakout-scheduler.git"
BRANCH   = "main"
REPO_DIR = "/content/breakout-scheduler"

# --- Inputs: public Google Sheets (CSV export) ---
# Working Groups:
WG_SHEET_URL  = "https://docs.google.com/spreadsheets/d/1TkcC0WdcQzhvJSfLllqxNTuagQfbIE4tvfXvoVRlNbo/edit?usp=sharing"
# BOFs:
BOF_SHEET_URL = "https://docs.google.com/spreadsheets/d/1zZJMWT8BlVvDzvyYHE16BYDXjtB-8DaNWh1u_QlnuuU/edit?resourcekey=&gid=1791423820#gid=1791423820"

# --- TEST INPUTS (comment out above assignments before uncommenting these) ---
# --- These files will illustrate behavior when the requests oversubscribe the available slots. ---
# Working Groups:
# WG_SHEET_URL  = "https://docs.google.com/spreadsheets/d/1_E-J2qkQ7qXxGjUzYems3c3nItQS2mPDrmmAvUxt1hg/edit?usp=sharing"
# BOFs:
# BOF_SHEET_URL = "https://docs.google.com/spreadsheets/d/1dLqDGCisflVqSApQ6Y9EECBiAbPn9Z1KMpTUzqQIimc/edit?usp=share_link"

# Optional: override gid if needed (leave None to auto-detect from URL; default is "0")
WG_GID  = None
BOF_GID = None

# --- Runtime paths ---
DATA_DIR = "/content/breakout_inputs"
OUT_DIR  = "/content/breakout_outputs"

# --- Scheduler options ---
PERMUTATIONS      = 5
SCHEDULE_BASENAME = "schedule.csv"   # scheduler.py will number outputs if PERMUTATIONS > 1
CONFIG_PATH       = None             # None => use repo's default config.yaml


In [2]:
# ---               ---
# --- Clone Github  ---
# ---               ---
import os, subprocess

if not os.path.isdir(REPO_DIR):
    subprocess.run(["git", "clone", "--depth", "1", "--branch", BRANCH, REPO_URL, REPO_DIR], check=True)

%cd {REPO_DIR}

# Make sure code is the latest origin/main each runtime
subprocess.run(["git", "fetch", "origin", BRANCH], check=True)
subprocess.run(["git", "reset", "--hard", f"origin/{BRANCH}"], check=True)

print("Repo @", subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip())


/content/breakout-scheduler
Repo @ 34c7760


In [None]:
Download the WG and BOF form response CSVs

**Note:** This notebook expects *Google Sheets* URLs (from Google Forms response sheets or test sheets). Do **not** use Google Drive file links like `drive.google.com/file/d/...`.


In [3]:
import os, re
from typing import Optional
from urllib.request import urlretrieve

os.makedirs(DATA_DIR, exist_ok=True)

def sheet_to_csv_url(sheet_url: str, gid: Optional[str] = None) -> str:
    """
    Convert a *Google Sheets* URL to a direct CSV export URL.

    Supported inputs:
      - https://docs.google.com/spreadsheets/d/<spreadsheet_id>/...#gid=<tab_gid>

    Not supported (and will raise a clear error):
      - Google Drive file links like https://drive.google.com/file/d/<file_id>/view
        If you have a CSV file, open/import it in Google Sheets and use the resulting Sheets URL.
    """
    if "docs.google.com/spreadsheets" not in sheet_url:
        raise ValueError(
            "Please provide a Google *Sheets* URL (docs.google.com/spreadsheets/...), "
            "not a Google Drive file link (drive.google.com/file/d/...).\n"
            "If your test data is a CSV file, open it with Google Sheets (or import it into a Sheet) "
            "and use that Sheet's share URL instead."
        )

    m = re.search(r"/spreadsheets/d/([a-zA-Z0-9-_]+)", sheet_url)
    if not m:
        raise ValueError("Couldn't find /spreadsheets/d/<spreadsheet_id>/ in the Google Sheet URL.")
    sheet_id = m.group(1)

    if gid is None:
        m2 = re.search(r"[#&?]gid=([0-9]+)", sheet_url)
        gid = m2.group(1) if m2 else "0"

    return f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&gid={gid}"

def download_sheet_csv(sheet_url: str, out_path: str, gid: Optional[str] = None) -> str:
    csv_url = sheet_to_csv_url(sheet_url, gid=gid)
    urlretrieve(csv_url, out_path)

    # Guardrail: if sharing is wrong, Google often returns HTML instead of CSV
    with open(out_path, "rb") as f:
        head = f.read(200).lower()
    if b"<html" in head or b"<!doctype html" in head:
        raise RuntimeError(
            "Download did not look like CSV (got HTML). "
            "Check the sheet sharing is 'Anyone with the link can view'.\n"
            f"URL was: {csv_url}"
        )
    return csv_url

WG_CSV  = os.path.join(DATA_DIR, "working_groups.csv")
BOF_CSV = os.path.join(DATA_DIR, "bofs.csv")

wg_export  = download_sheet_csv(WG_SHEET_URL,  WG_CSV,  gid=WG_GID)
bof_export = download_sheet_csv(BOF_SHEET_URL, BOF_CSV, gid=BOF_GID)

print("WG CSV saved to:", WG_CSV)
print("BOF CSV saved to:", BOF_CSV)
print("WG export URL:", wg_export)
print("BOF export URL:", bof_export)


WG CSV saved to: /content/breakout_inputs/working_groups.csv
BOF CSV saved to: /content/breakout_inputs/bofs.csv
WG export URL: https://docs.google.com/spreadsheets/d/1TkcC0WdcQzhvJSfLllqxNTuagQfbIE4tvfXvoVRlNbo/gviz/tq?tqx=out:csv&gid=0
BOF export URL: https://docs.google.com/spreadsheets/d/1zZJMWT8BlVvDzvyYHE16BYDXjtB-8DaNWh1u_QlnuuU/gviz/tq?tqx=out:csv&gid=1791423820


Run the Scheduler

In [4]:
import os, glob, subprocess, shlex, sys, time

os.makedirs(OUT_DIR, exist_ok=True)

# Clean out old schedules so we don't accidentally summarize stale files
for old in glob.glob(os.path.join(OUT_DIR, "schedule*.csv")):
    try:
        os.remove(old)
    except OSError:
        pass

schedule_path = os.path.join(OUT_DIR, SCHEDULE_BASENAME)

cmd = ["python", "scheduler.py", "-w", WG_CSV, "-b", BOF_CSV, "-s", schedule_path, "-p", str(PERMUTATIONS)]
if CONFIG_PATH:
    cmd += ["-c", CONFIG_PATH]

print("Running:", shlex.join(cmd))

# Capture output so oversubscription messages are always visible in Colab
t0 = time.time()
result = subprocess.run(cmd, text=True, capture_output=True)
dt = time.time() - t0

if result.stdout:
    print(result.stdout)
if result.stderr:
    print(result.stderr, file=sys.stderr)

print(f"(scheduler.py exit code: {result.returncode}, elapsed: {dt:.2f}s)")

schedule_files = sorted(glob.glob(os.path.join(OUT_DIR, "schedule*.csv")))
if schedule_files:
    print("Generated schedules:")
    for f in schedule_files:
        print("  ", f)
else:
    print("No schedule files written.")


Running: python scheduler.py -w /content/breakout_inputs/working_groups.csv -b /content/breakout_inputs/bofs.csv -s /content/breakout_outputs/schedule.csv -p 5
Generated schedules:
   /content/breakout_outputs/schedule1.csv
   /content/breakout_outputs/schedule2.csv
   /content/breakout_outputs/schedule3.csv
   /content/breakout_outputs/schedule4.csv
   /content/breakout_outputs/schedule5.csv


Run a summary report

In [5]:
import subprocess, os, sys
import pandas as pd

if not schedule_files:
    print("No schedules to summarize (likely oversubscription or early exit).")
else:
    print("\nSummary report:\n")
    result = subprocess.run(["python", "schedule_summary.py", *schedule_files], text=True, capture_output=True)
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr, file=sys.stderr)

    for f in schedule_files:
        print("\n===", os.path.basename(f), "===")
        display(pd.read_csv(f))



Summary report:


=== schedule1.csv ===


Unnamed: 0,Room 1,Room 2,Room 3,Room 4,Room 5,Room 6,Room 7,Room 8
0,,,,,Fictional Group,Random Working Group,,
1,,,,,,Random Working Group,,
2,,,,,,Random Working Group,,
3,,,,,,,,
4,,,,,,,,



=== schedule2.csv ===


Unnamed: 0,Room 1,Room 2,Room 3,Room 4,Room 5,Room 6,Room 7,Room 8
0,,Fictional Group,,,,,,Random Working Group
1,,,,,,,,Random Working Group
2,,,,,,,,Random Working Group
3,,,,,,,,
4,,,,,,,,



=== schedule3.csv ===


Unnamed: 0,Room 1,Room 2,Room 3,Room 4,Room 5,Room 6,Room 7,Room 8
0,,,,Fictional Group,Random Working Group,,,
1,,,,,Random Working Group,,,
2,,,,,Random Working Group,,,
3,,,,,,,,
4,,,,,,,,



=== schedule4.csv ===


Unnamed: 0,Room 1,Room 2,Room 3,Room 4,Room 5,Room 6,Room 7,Room 8
0,,,,Fictional Group,,,Random Working Group,
1,,,,,,,Random Working Group,
2,,,,,,,Random Working Group,
3,,,,,,,,
4,,,,,,,,



=== schedule5.csv ===


Unnamed: 0,Room 1,Room 2,Room 3,Room 4,Room 5,Room 6,Room 7,Room 8
0,Fictional Group,,,Random Working Group,,,,
1,,,,Random Working Group,,,,
2,,,,Random Working Group,,,,
3,,,,,,,,
4,,,,,,,,
