# Internship Placement Selection System
Allocates internship placements across configurable shifts (`SHIFT_ORDER`) using ranked preferences, per-shift seat caps, and sex requirements. Run cells from top to bottom.

**Files expected**
- **Students CSV** (`student_path`): columns in order `student_name, student_id, sex, shift, rank 1..rank x`; `sex` in `{male,female}`, `shift` in `{1..SHIFT_ORDER}`, each rank cell is a site code.
- **Drugstores CSV** (`drugstore_path`): columns in order `code, branch, sex_require1, seat1, ..., sex_require{SHIFT_ORDER}, seat{SHIFT_ORDER}`; `sex_require{i}` in `{male,female,both}`; seats are integers per shift.
- **Output dir** (`output_path`): directory exists for saving CSVs.

**What the notebook does**
1) Configure run parameters: number of ranks (`x`), shifts (`SHIFT_ORDER`), input paths, output dir, optional RNG seed for reproducible lotteries.
2) Validate column order (case/space tolerant) and normalize sex/shift/site values; rename rank columns if needed.
3) Drop duplicate site codes within a student’s ranks (keep first occurrence).
4) Allocate per shift then per rank; within each site+shift+rank oversubscription, run a local random lottery; enforce sex requirements and available seats.
5) Save results to `<timestamp>_output.csv` with `student_name, student_id, rank_result, result` (original row order).
6) Verification cell checks counts, rank alignment, site existence, shift validity, capacity and sex constraints, duplicate ranks, and choices for unavailable sites; also writes remaining seats + assigned counts to `<timestamp>_remaining_seats.csv`.
7) Result analysis cell prints selection metrics and renders bar charts for assignments by rank, by shift, and top sites (up to 10).
8) Preview cell displays `output_df` if already computed.

**Selection rules (core)**
- Process shifts in order `1 → SHIFT_ORDER`, and within each shift process ranks `1 → x`.
- Skip choices failing sex requirements, missing seats, invalid site codes, or wrong shift.
- Unassigned after all ranks: `rank_result = 0`, `result = "Not selected"`.
- Tie-breaks only within the oversubscribed site+shift+rank group using RNG.

In [None]:
# =======================
# Cell 0 - Import library
# =======================

import os
from datetime import datetime
import random
import numpy as np
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

In [None]:
# =======================
# Cell 1 - Configuration
# =======================

# Number of ranked choices per student (must match your CSV headers: rank 1 ... rank x)
x = 5

# Number of shifts (Run from shift 1 to shift n)
SHIFT_ORDER = 2

# Students CSV path (columns: student_name, student_id, sex, shift, rank 1 ... rank x)
student_path = "student_selection.csv"
print(f"Found Student CSV path?: {os.path.exists(student_path)}")

# Drugstores CSV path (columns: code, branch, sex_require1, seat1 ... sex_require{SHIFT_ORDER}, seat{SHIFT_ORDER})
drugstore_path = "drugstore_path.csv"
print(f"Found Drugstores CSV path?: {os.path.exists(drugstore_path)}")

# Directory to save the output as CSV
output_path = "./output"

# Set a seed for reproducible random tie-breaks (None for non-deterministic)
random_seed = None


In [None]:
# Display student dataframe
student_df = pd.read_csv(student_path)
student_df

In [None]:
# Display drugstore dataframe
drugstore_df = pd.read_csv(drugstore_path)
drugstore_df

In [None]:
# =======================
# Cell 2 - Helpers
# =======================

if random_seed is not None:
    random.seed(random_seed)
    np.random.seed(random_seed)


def _normalize_sex(v):
    if isinstance(v, str):
        s = v.strip().lower()
        if s in {"male","m"}: return "male"
        if s in {"female","f"}: return "female"
        if s in {"both","any"}: return "both"
    return v

def _normalize_shift(v):
    if pd.isna(v): return ""
    return str(v).strip()

def _check_paths(student_path, drugstore_path, output_path):
    if not os.path.isfile(student_path):
        raise FileNotFoundError(f"students CSV not found: {student_path}")
    if not os.path.isfile(drugstore_path):
        raise FileNotFoundError(f"drugstores CSV not found: {drugstore_path}")
    if not os.path.isdir(output_path):
        raise NotADirectoryError(f"output_path is not a directory: {output_path}")

def _normalize_column_name(col):
    """Normalize column names: remove spaces, convert to lowercase"""
    return col.lower().replace(" ", "")

def _validate_student_columns(df, x):
    expected = ["student_name","student_id","sex","shift"] + [f"rank {i}" for i in range(1, x+1)]
    actual = list(df.columns)[:len(expected)]
    
    # Normalize both expected and actual column names for comparison
    expected_normalized = [_normalize_column_name(col) for col in expected]
    actual_normalized = [_normalize_column_name(col) for col in actual]
    
    if actual_normalized != expected_normalized:
        raise ValueError(
            "Students CSV must start with columns: "
            + ", ".join(expected)
            + f"\nFound: {actual}"
        )

def _validate_drugstore_columns(df, shift_order):
    expected = ["code","branch"]
    for i in range(1, shift_order + 1):
        expected.extend([f"sex_require{i}", f"seat{i}"])
    actual = list(df.columns)[:len(expected)]
    
    # Normalize both expected and actual column names for comparison
    expected_normalized = [_normalize_column_name(col) for col in expected]
    actual_normalized = [_normalize_column_name(col) for col in actual]
    
    if actual_normalized != expected_normalized:
        raise ValueError(
            "Drugstores CSV must start with columns: "
            + ", ".join(expected)
            + f"\nFound: {actual}"
        )

def _sex_allowed(req, student_sex):
    if req in (None, "both"): return True
    if req == "male": return student_sex == "male"
    if req == "female": return student_sex == "female"
    return False


In [None]:
# =======================
# Cell 3 — Load Data & Run Selection
# =======================

# 1) Validate paths and load
_check_paths(student_path, drugstore_path, output_path)
students_raw = pd.read_csv(student_path)
drugstores_raw = pd.read_csv(drugstore_path)

# 2) Validate schemas
_validate_student_columns(students_raw, x)
_validate_drugstore_columns(drugstores_raw, SHIFT_ORDER)

# 3) Rename columns to standard format (handle flexible naming)
students = students_raw.copy()
drugstores = drugstores_raw.copy()

# Rename student rank columns (handle both "rank 1" and "rank1" formats)
for i in range(1, x+1):
    expected_col = f"rank {i}"
    actual_col = None
    for col in students.columns:
        if _normalize_column_name(col) == _normalize_column_name(expected_col):
            actual_col = col
            break
    if actual_col and actual_col != expected_col:
        students.rename(columns={actual_col: expected_col}, inplace=True)

# 4) Normalize data
students["sex"] = students["sex"].apply(_normalize_sex)
students["shift"] = students["shift"].apply(_normalize_shift)
for i in range(1, x+1):
    col = f"rank {i}"
    if col in students.columns:
        students[col] = students[col].astype(str).str.strip()

drugstores["code"] = drugstores["code"].astype(str).str.strip()
for i in range(1, SHIFT_ORDER + 1):
    req_col = f"sex_require{i}"
    if req_col in drugstores.columns:
        drugstores[req_col] = drugstores[req_col].apply(_normalize_sex)

# 5) Drop duplicate ranks per student (keep earliest, blank later duplicates)
dup_removed = 0
for idx, row in students.iterrows():
    seen = set()
    for rank_num in range(1, x + 1):
        col = f"rank {rank_num}"
        val = row[col] if col in row else ""
        val = val.strip() if isinstance(val, str) else ""
        if not val or val.lower() == "nan":
            students.at[idx, col] = ""
            continue
        if val in seen:
            students.at[idx, col] = ""
            dup_removed += 1
        else:
            seen.add(val)
print(f"Deduplicated ranks: removed {dup_removed} later duplicates (kept first occurrence per student)")

def safe_int(val):
    if pd.isna(val):
        return 0
    if isinstance(val, str):
        v = val.strip()
        if v == '' or v == '-':
            return 0
        try:
            return int(float(v))
        except Exception:
            return 0
    try:
        return int(val)
    except Exception:
        return 0

# 6) Build capacity & sex_require maps (using STRING keys for shifts consistently)
seats_available = defaultdict(lambda: {str(i): 0 for i in range(1, SHIFT_ORDER + 1)})
sex_requirement = defaultdict(lambda: {str(i): None for i in range(1, SHIFT_ORDER + 1)})

for _, row in drugstores.iterrows():
    code = row["code"]
    for shift_num in range(1, SHIFT_ORDER + 1):
        shift_key = str(shift_num)  # Always use string keys
        seat_col = f"seat{shift_num}"
        req_col = f"sex_require{shift_num}"
        seats_available[code][shift_key] = safe_int(row.get(seat_col, 0))
        sex_requirement[code][shift_key] = row.get(req_col)

valid_sites = set(seats_available.keys())

# 7) Initialize result dataframe
output_df = students[["student_name", "student_id"]].copy()
output_df["rank_result"] = 0
output_df["result"] = "Not selected"

# 8) RNG for tie-breaks (only inside oversubscribed site/shift/rank)
rng = np.random.default_rng(random_seed) if random_seed is not None else np.random.default_rng()

# 9) Run allocation per shift, per rank, with local lotteries
for shift_num_int in range(1, SHIFT_ORDER + 1):
    shift_key = str(shift_num_int)

    for rank_num in range(1, x + 1):
        # Gather eligible, unassigned students for this shift + rank, bucketed by site
        candidates_by_site = defaultdict(list)

        for student_idx, student in students.iterrows():
            # Skip already assigned
            if output_df.at[student_idx, "rank_result"] != 0:
                continue

            student_shift_val = safe_int(student.get("shift", 0))
            if student_shift_val != shift_num_int:
                continue

            rank_col = f"rank {rank_num}"
            site_code = student.get(rank_col, "")
            site_code = site_code.strip() if isinstance(site_code, str) else ""
            if not site_code:
                continue
            if site_code not in valid_sites:
                continue

            req = sex_requirement.get(site_code, {}).get(shift_key, None)
            if not _sex_allowed(req, student.get("sex")):
                continue

            if seats_available[site_code][shift_key] <= 0:
                continue

            candidates_by_site[site_code].append(student_idx)

        # Resolve each site independently for this shift/rank
        for site_code, idxs in candidates_by_site.items():
            seats_left = seats_available[site_code][shift_key]
            if seats_left <= 0:
                continue

            if len(idxs) <= seats_left:
                chosen_idxs = idxs
            else:
                # Local lottery within this site/shift/rank
                chosen_idxs = rng.choice(idxs, size=seats_left, replace=False)

            for chosen_idx in chosen_idxs:
                output_df.at[int(chosen_idx), "rank_result"] = rank_num
                output_df.at[int(chosen_idx), "result"] = site_code

            seats_available[site_code][shift_key] -= len(chosen_idxs)

# 10) Generate timestamp and save
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
output_csv = os.path.join(output_path, f"{ts}_output.csv")
output_df.to_csv(output_csv, index=False)
print(f"\n✓ Selection complete. Output saved to: {output_csv}")
print(f"Summary:")
print(f"  Total students: {len(output_df)}")
print(f"  Selected: {(output_df['rank_result'] > 0).sum()}")
print(f"  Not selected: {(output_df['rank_result'] == 0).sum()}")


In [None]:
# =======================
# Cell 4 - Verification and update seats cell
# =======================

print("=== Verification Start ===")

def _norm_code(s):
    return "" if pd.isna(s) else str(s).strip()

def _norm_shift(s):
    return "" if pd.isna(s) else str(s).strip()

def _norm_sex(s):
    if pd.isna(s): return None
    s = str(s).strip().lower()
    if s in {"male","m"}: return "male"
    if s in {"female","f"}: return "female"
    if s in {"both","any"}: return "both"
    return s

def _sex_allowed(requirement, student_sex):
    req = _norm_sex(requirement)
    sx  = _norm_sex(student_sex)
    if req in (None, "both"): return True
    return req == sx

issues = []

# Normalize fields for robust comparisons
_students = students.copy()
_students["shift"] = _students["shift"].map(_norm_shift)
_students["sex"]   = _students["sex"].map(_norm_sex)
for i in range(1, x+1):
    col = f"rank {i}"
    if col in _students.columns:
        _students[col] = _students[col].map(_norm_code)

_output = output_df.copy()
_output["result"] = _output["result"].map(_norm_code)

_drug = drugstores.copy()
_drug["code"] = _drug["code"].map(_norm_code)
for i in range(1, SHIFT_ORDER + 1):
    req_col = f"sex_require{i}"
    if req_col in _drug.columns:
        _drug[req_col] = _drug[req_col].map(_norm_sex)

valid_codes = set(_drug["code"].tolist())

# ---- 1) Counts & IDs ----
print("\n[1] Counts & IDs")
n_students = len(_students)
n_output   = len(_output)
if n_students != n_output:
    issues.append(f"Row count mismatch: students={n_students}, output={n_output}")
print(f"- Row counts: students={n_students}, output={n_output}")

dup_students = _students[_students.duplicated(subset=["student_id"], keep=False)]
if not dup_students.empty:
    issues.append(f"Duplicate student_id in students: {dup_students['student_id'].tolist()}")
    print("- Duplicate student_id in students:")
    display(dup_students)
else:
    print("- No duplicate student_id in students")

dup_output = _output[_output.duplicated(subset=["student_id"], keep=False)]
if not dup_output.empty:
    issues.append(f"Duplicate student_id in output: {dup_output['student_id'].tolist()}")
    print("- Duplicate student_id in output:")
    display(dup_output)
else:
    print("- No duplicate student_id in output")

missing_in_output = set(_students["student_id"]) - set(_output["student_id"])
missing_in_students = set(_output["student_id"]) - set(_students["student_id"])
if missing_in_output:
    issues.append(f"Students missing in output: {len(missing_in_output)}")
    print(f"- Missing in output: {len(missing_in_output)}")
if missing_in_students:
    issues.append(f"Unexpected student_id in output: {len(missing_in_students)}")
    print(f"- Unexpected IDs in output: {len(missing_in_students)}")

# ---- 2) Rank Correctness & in-any-rank ----
print("\n[2] Rank Correctness (and whether result is in any chosen ranks)")
bad_rank_alignment = []
not_in_any_rank = []
invalid_site_codes = []
invalid_shifts = []

for i in range(len(_output)):
    rr  = _output.iloc[i]["rank_result"]
    res = _output.iloc[i]["result"]
    sid = _output.iloc[i]["student_id"]
    sname = _output.iloc[i]["student_name"]
    sft = _students.iloc[i]["shift"]

    if (pd.isna(rr)) or (int(rr) < 0) or (int(rr) > x):
        issues.append(f"Invalid rank_result for {sid}: {rr}")
        continue
    rr = int(rr)

    # Check shift validity
    sft_int = safe_int(sft)
    if sft_int < 1 or sft_int > SHIFT_ORDER:
        invalid_shifts.append((sid, sname, sft, res, rr))

    # Check site validity
    if rr > 0 and res and res.lower() != "not selected" and res not in valid_codes:
        invalid_site_codes.append((sid, sname, res))

    if rr == 0:
        if res and res.lower() != "not selected":
            issues.append(f"{sid} has rank_result=0 but result='{res}'")
        continue

    chosen = _students.iloc[i].get(f"rank {rr}", "")
    if _norm_code(chosen) != res:
        bad_rank_alignment.append((sid, sname, rr, _norm_code(chosen), res))

    chosen_set = {_students.iloc[i].get(f"rank {k}", "") for k in range(1, x+1)}
    chosen_set = {_norm_code(c) for c in chosen_set if str(c).strip() != ""}
    if res not in chosen_set:
        not_in_any_rank.append((sid, sname, res, sorted(list(chosen_set))))

if bad_rank_alignment:
    print("- Assigned differs from code at assigned rank:")
    for sid, sname, rr, expc, gotc in bad_rank_alignment[:20]:
        print(f"  {sid} ({sname}) rank {rr}: expected {expc}, got {gotc}")
    if len(bad_rank_alignment) > 20:
        print(f"  ... ({len(bad_rank_alignment)-20} more)")
    issues.append(f"{len(bad_rank_alignment)} mismatches between assigned rank and code.")

if not_in_any_rank:
    print("- Assigned site not present in any of the student's ranks:")
    for sid, sname, res, chosen in not_in_any_rank[:20]:
        print(f"  {sid} ({sname}): assigned={res}, ranks={chosen}")
    if len(not_in_any_rank) > 20:
        print(f"  ... ({len(not_in_any_rank)-20} more)")
    issues.append(f"{len(not_in_any_rank)} assigned sites not found in any chosen rank.")

if invalid_site_codes:
    print("- Assigned site not in drugstore list:")
    for sid, sname, res in invalid_site_codes[:20]:
        print(f"  {sid} ({sname}): assigned={res}")
    if len(invalid_site_codes) > 20:
        print(f"  ... ({len(invalid_site_codes)-20} more)")
    issues.append(f"{len(invalid_site_codes)} assignments to non-existent sites.")

if invalid_shifts:
    print("- Invalid shift values detected:")
    for sid, sname, sft, res, rr in invalid_shifts[:20]:
        print(f"  {sid} ({sname}): shift={sft}, rank={rr}, assigned={res}")
    if len(invalid_shifts) > 20:
        print(f"  ... ({len(invalid_shifts)-20} more)")
    issues.append(f"{len(invalid_shifts)} rows with invalid shift values.")

# ---- Build capacities & assigned_counts (used by 3 & 4) ----
# Create dynamic shift keys based on SHIFT_ORDER
shift_keys = {str(i) for i in range(1, SHIFT_ORDER + 1)}
capacities = defaultdict(lambda: {sh: 0 for sh in shift_keys})
sexreq_map  = defaultdict(lambda: {sh: None for sh in shift_keys})
for _, r in _drug.iterrows():
    code = r["code"]
    for sh in shift_keys:
        seat_col = f"seat{sh}"
        req_col = f"sex_require{sh}"
        seats_val = safe_int(r[seat_col]) if seat_col in _drug.columns else 0
        req_val = r[req_col] if req_col in _drug.columns else None
        capacities[code][sh] += seats_val
        sexreq_map[code][sh] = req_val

assigned_counts = defaultdict(lambda: {sh: 0 for sh in shift_keys})
for i in range(len(_output)):
    rr  = int(_output.iloc[i]["rank_result"])
    res = _output.iloc[i]["result"]
    if rr > 0 and res and res.lower() != "not selected":
        sft = _students.iloc[i]["shift"]
        if sft in shift_keys:
            assigned_counts[res][sft] += 1

# ---- 3) Remaining seats by site -> CSV ----
print("\n[3] Remaining Seats by Site (CSV)")

# Wide format matching drugstore_path structure, with seat{sh} replaced by remaining seats and assigned{sh} added
remaining_wide = _drug.copy()
for sh in sorted(shift_keys):
    seat_col = f"seat{sh}"
    assign_col = f"assigned{sh}"
    remaining_wide[assign_col] = remaining_wide["code"].map(lambda code: assigned_counts[code][sh])
    remaining_wide[seat_col] = remaining_wide["code"].map(lambda code: capacities[code][sh] - assigned_counts[code][sh])

remaining_path = os.path.join(output_path, f"{ts}_remaining_seats.csv")
remaining_wide.to_csv(remaining_path, index=False)
print(f"- Remaining/assigned seats saved to: {remaining_path}")

# ---- 4) Capacity violations ----
print("\n[4] Capacity Violations")
viol = []
for code, cap in capacities.items():
    for sh in shift_keys:
        used = assigned_counts[code][sh]
        if used > cap[sh]:
            viol.append((code, sh, used, cap[sh]))

if viol:
    print("- Over-capacity detected:")
    for code, sh, used, capv in viol:
        print(f"  site={code}, shift={sh}: assigned={used}, capacity={capv}")
    issues.append(f"{len(viol)} capacity violations.")
else:
    print("- No capacity violations")
    if dup_removed > 0:
        print(f"  due to removed {dup_removed} duplicated rank(s) before allocation.")

# ---- 5) Duplicate ranks within a student ----
print("\n[5] Duplicate Ranks Detected (in a single student's preference list)")
dup_rank_msgs = []
for i in range(len(_students)):
    sid = _students.iloc[i]["student_id"]
    sname = _students.iloc[i]["student_name"]
    prefs = []
    for k in range(1, x+1):
        val = _norm_code(_students.iloc[i].get(f"rank {k}", ""))
        if val:
            prefs.append((k, val))
    by_code = defaultdict(list)
    for k, code in prefs:
        by_code[code].append(k)
    for code, ks in by_code.items():
        if len(ks) > 1:
            dup_rank_msgs.append((sid, sname, code, ks))

if dup_rank_msgs:
    print("- Found duplicate codes within a student's ranks:")
    for sid, sname, code, ks in dup_rank_msgs[:30]:
        print(f"  {sid}: code={code} appears in ranks {ks}")
    if len(dup_rank_msgs) > 30:
        print(f"  ... ({len(dup_rank_msgs)-30} more)")
    issues.append(f"{len(dup_rank_msgs)} students have duplicate site codes across ranks.")
else:
    print("- No duplicate ranks within students")

# ---- 6) Sex requirement compliance ----
print("\n[6] Sex Requirement Violations")
sex_viol = []
for i in range(len(_output)):
    rr  = int(_output.iloc[i]["rank_result"])
    res = _output.iloc[i]["result"]
    if rr == 0 or not res or res.lower() == "not selected":
        continue
    sft = _students.iloc[i]["shift"]
    sx  = _students.iloc[i]["sex"]
    req = sexreq_map[res][sft] if sft in shift_keys else None
    if not _sex_allowed(req, sx):
        sex_viol.append((
            _students.iloc[i]["student_id"],
            _students.iloc[i]["student_name"],
            sft, res, sx, req
        ))

if sex_viol:
    print("- Sex requirement mismatches:")
    for sid, sname, sft, res, sx, req in sex_viol[:30]:
        print(f"  {sid} ({sname}) shift={sft}, site={res}, student_sex={sx}, required={req}")
    if len(sex_viol) > 30:
        print(f"  ... ({len(sex_viol)-30} more)")
    issues.append(f"{len(sex_viol)} sex requirement violations.")
else:
    print("- No sex requirement violations")

# ---- 7) Students who chose unavailable sites ----
print("\n[7] Chosen Sites NOT Available in Student's Shift")
unavailable_choices = []

for i in range(len(students)):
    student_id = students.iloc[i]["student_id"]
    student_shift_int = safe_int(students.iloc[i]["shift"])
    if student_shift_int < 1 or student_shift_int > SHIFT_ORDER:
        continue
    
    for rank_num in range(1, x + 1):
        rank_col = f"rank {rank_num}"
        site_code = str(students.iloc[i][rank_col]).strip()
        
        if not site_code or site_code.lower() == "nan":
            continue
        
        site_data = drugstores[drugstores["code"].astype(str).str.strip() == site_code]
        
        if site_data.empty:
            continue
        
        seat_col = f"seat{student_shift_int}"
        capacity = site_data.iloc[0][seat_col] if seat_col in site_data.columns else None
        
        if pd.isna(capacity) or safe_int(capacity) <= 0:
            unavailable_choices.append((student_id, site_code, rank_num, student_shift_int))

if unavailable_choices:
    print("- Students who chose sites NOT open in their shift:")
    for sid, site, rank, shift in unavailable_choices[:30]:
        print(f"  {sid}: Rank {rank}={site} (not available in shift {shift})")
    if len(unavailable_choices) > 30:
        print(f"  ... ({len(unavailable_choices)-30} more)")
    issues.append(f"{len(unavailable_choices)} choices for unavailable sites.")
else:
    print("- No unavailable choices")

# ---- Final summary ----
print("\n=== Summary ===")
if issues:
    print("Verification found issues:")
    for it in issues:
        print(" -", it)
else:
    print("All verification checks passed.")

print("=== Verification End ===")


In [None]:
# =======================
# Cell 5 - Preview df
# =======================
try:
    display(output_df)
except NameError:
    print("Run previous cells first.")


In [None]:
# =======================
# Cell 6 - Result analysis cell
# =======================
try:
    if 'output_df' not in globals() or 'students' not in globals():
        raise NameError("Run the selection cells first to create output_df and students.")
    
    # Basic metrics
    selected_mask = output_df["rank_result"] > 0
    selection_rate = float(selected_mask.mean())
    rank_counts = output_df.loc[selected_mask, "rank_result"].value_counts().sort_index()
    rank_counts = rank_counts.reindex(range(1, x + 1), fill_value=0)
    
    # Shift-level summaries
    student_shift = students["shift"].apply(safe_int)
    assigned_shift = output_df[selected_mask].assign(shift=student_shift[selected_mask.index]).groupby("shift").size()
    shift_keys = list(range(1, SHIFT_ORDER + 1))
    assigned_shift = assigned_shift.reindex(shift_keys, fill_value=0)
    total_by_shift = student_shift.value_counts().reindex(shift_keys, fill_value=0)
    fill_rate = (assigned_shift / total_by_shift.replace(0, pd.NA)).fillna(0)
    
    # Top sites by assignments
    top_sites = output_df.loc[selected_mask, "result"].value_counts().head(10)
    
    # Unassigned list
    unassigned_df = output_df.loc[~selected_mask, ["student_name", "student_id"]].copy()
    if not unassigned_df.empty:
        unassigned_df["shift"] = student_shift.loc[unassigned_df.index]
        for rn in range(1, min(3, x) + 1):
            col = f"rank {rn}"
            if col in students.columns:
                unassigned_df[col] = students.loc[unassigned_df.index, col]
    
    print("=== Result Summary ===")
    print(f"Students: {len(output_df)}")
    print(f"Selected: {selected_mask.sum()} ({selection_rate:.1%})")
    print("\nAssignments by rank:")
    display(rank_counts.rename_axis("rank").reset_index(name="count"))
    print("\nAssignments by shift:")
    display(pd.DataFrame({"shift": shift_keys, "assigned": assigned_shift.values, "students": total_by_shift.values, "fill_rate": fill_rate.values}))
    print("\nTop sites by assignments:")
    display(top_sites.rename_axis("site").reset_index(name="count"))
    
    if unassigned_df.empty:
        print("\nNo unassigned students.")
    else:
        print(f"\nUnassigned students: {len(unassigned_df)} student(s).")
        display(unassigned_df)
    
    # Visuals
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    
    axes[0].bar(rank_counts.index, rank_counts.values, color="#4c72b0")
    axes[0].set_title("Assignments by Rank")
    axes[0].set_xlabel("Rank")
    axes[0].set_ylabel("Count")
    axes[0].xaxis.set_major_locator(MaxNLocator(integer=True))
    
    axes[1].bar(shift_keys, assigned_shift.values, color="#55a868")
    axes[1].set_title("Assignments by Shift")
    axes[1].set_xlabel("Shift")
    axes[1].set_ylabel("Count")
    axes[1].xaxis.set_major_locator(MaxNLocator(integer=True))
    
    axes[2].bar(top_sites.index.astype(str), top_sites.values, color="#c44e52")
    axes[2].set_title("Top Sites (up to 10)")
    axes[2].set_xlabel("Site code")
    axes[2].set_ylabel("Count")
    axes[2].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
except Exception as exc:
    print(f"Analysis cell failed: {exc}")