In [1]:
from pathlib import Path

WEIGHTS = {
    "mpid": Path("/home/hep/an1522/dark_tridents_wspace/outputs/weights/DM-CNN_model_20260116-10_22_PM_epoch_4_batch_id_1961_labels_2_title_0.001_AG_GN_LM_TRAINING_step_9821.pwf"),
    # "resnet18_bn": Path("/home/hep/an1522/dark_tridents_wspace/outputs/weights/resnet18_bn/resnet18_bn_model_20260123-05_21_AM_epoch_4_batch_id_1961_labels_2_step_9821.pwf"),
    # "resnet18_gn": Path("/home/hep/an1522/dark_tridents_wspace/outputs/weights/resnet18_gn/resnet18_gn_model_20260123-12_02_AM_epoch_4_batch_id_1961_labels_2_step_9821.pwf"),
    # "resnet34_bn": Path("/home/hep/an1522/dark_tridents_wspace/outputs/weights/resnet34_bn/resnet34_bn_model_20260123-05_38_AM_epoch_4_batch_id_1961_labels_2_step_9821.pwf"),
    "resnet34_gn": Path("/home/hep/an1522/dark_tridents_wspace/outputs/weights/resnet34_gn/resnet34_gn_model_20260123-12_20_AM_epoch_4_batch_id_1961_labels_2_step_9821.pwf"),
}

In [2]:
import pandas as pd
import re
from pathlib import Path

PROJECT = Path("/home/hep/an1522/dark_tridents_wspace")
SEL_DIR = PROJECT / "outputs" / "inference" / "_occlusion_selections"
TASKS_OUT = SEL_DIR / "occlusion_tasks.csv"

LARCV_BASE = Path("/vols/sbn/uboone/darkTridents/data/larcv_files")

def dataset_to_larcv_dir(dataset: str) -> Path:
    # dataset like "run1_samples" or "run3_signal"
    run, kind = dataset.split("_", 1)
    return LARCV_BASE / f"{run}_{kind}"

def scores_to_root(scores_file: str) -> str:
    # e.g. run1_NuMI_nu_overlay_larcv_cropped_scores.csv -> run1_NuMI_nu_overlay_larcv_cropped.root
    return re.sub(r"_scores\.csv$", ".root", scores_file)

def folder_to_model(folder: str) -> str:
    """
    Map inference folder name -> weight key.
    Adjust if you have different naming.
    """
    # if "resnet18_bn" in folder: return "resnet18_bn"
    # if "resnet18_gn" in folder: return "resnet18_gn"
    # if "resnet34_bn" in folder: return "resnet34_bn"
    if "resnet34_gn" in folder: return "resnet34_gn"
    # baseline dm-cnn folder names
    return "mpid"

def outdir_for(tag: str, dataset: str, folder: str) -> Path:
    return PROJECT / "outputs" / "occlusion" / tag / dataset / folder

# Collect all to_occlude files
to_occlude_files = sorted(SEL_DIR.glob("to_occlude__*.csv"))
print("Found", len(to_occlude_files), "to_occlude files")

rows = []
for f in to_occlude_files:
    # filename format: to_occlude__{dataset}__{folder}.csv
    parts = f.stem.split("__")
    if len(parts) < 3:
        continue
    dataset = parts[1]
    folder = "__".join(parts[2:])  # in case folder name had __
    
    df = pd.read_csv(f)
    if "entry_number" not in df.columns:
        print("[skip] no entry_number in", f.name)
        continue
    
    # We need scores_file: prefer ref_scores_file if present, else fall back to per-folder unique if you stored it
    if "scores_file" not in df.columns:
        print("[skip] no ref_scores_file in", f.name, "(you said you have it in masterâ€”ensure it propagates)")
        continue

    model_key = folder_to_model(folder)
    wpath = WEIGHTS[model_key]
    larcv_dir = dataset_to_larcv_dir(dataset)

    for _, r in df.iterrows():
        scores_file = r["scores_file"]
        root_name = scores_to_root(scores_file)
        root_path = larcv_dir / root_name

        entry = int(r["entry_number"])
        n_pixels = int(r["n_pixels"])
        tag = f"{r.get('pick_reason','picked')}"
        
        outdir = outdir_for(tag, dataset, folder)

        rows.append({
            "dataset": dataset,
            "folder": folder,
            "model_key": model_key,
            "weight_file": str(wpath),
            "scores_file": scores_file,
            "root_file": str(root_path),
            "entry_number": entry,
            "n_pixels": n_pixels,
            "out_dir": str(outdir),
            "tag": tag,
        })

tasks = pd.DataFrame(rows)
print("Tasks:", len(tasks))
display(tasks.head())

tasks.to_csv(TASKS_OUT, index=False)
print("Wrote:", TASKS_OUT)

Found 8 to_occlude files
Tasks: 298


Unnamed: 0,dataset,folder,model_key,weight_file,scores_file,root_file,entry_number,n_pixels,out_dir,tag
0,run1_samples,mpid,mpid,/home/hep/an1522/dark_tridents_wspace/outputs/...,run1_NuMI_dirt_larcv_cropped_scores.csv,/vols/sbn/uboone/darkTridents/data/larcv_files...,495,54,/home/hep/an1522/dark_tridents_wspace/outputs/...,A_high_tail+C_weird_lowpix
1,run1_samples,mpid,mpid,/home/hep/an1522/dark_tridents_wspace/outputs/...,run1_NuMI_dirt_larcv_cropped_scores.csv,/vols/sbn/uboone/darkTridents/data/larcv_files...,952,331,/home/hep/an1522/dark_tridents_wspace/outputs/...,A_high_tail
2,run1_samples,mpid,mpid,/home/hep/an1522/dark_tridents_wspace/outputs/...,run1_NuMI_dirt_larcv_cropped_scores.csv,/vols/sbn/uboone/darkTridents/data/larcv_files...,439,1732,/home/hep/an1522/dark_tridents_wspace/outputs/...,A_high_tail
3,run1_samples,mpid,mpid,/home/hep/an1522/dark_tridents_wspace/outputs/...,run1_NuMI_dirt_larcv_cropped_scores.csv,/vols/sbn/uboone/darkTridents/data/larcv_files...,3245,79,/home/hep/an1522/dark_tridents_wspace/outputs/...,A_high_tail
4,run1_samples,mpid,mpid,/home/hep/an1522/dark_tridents_wspace/outputs/...,run1_NuMI_dirt_larcv_cropped_scores.csv,/vols/sbn/uboone/darkTridents/data/larcv_files...,414,729,/home/hep/an1522/dark_tridents_wspace/outputs/...,B_border_0p5


Wrote: /home/hep/an1522/dark_tridents_wspace/outputs/inference/_occlusion_selections/occlusion_tasks.csv


In [3]:
import pandas as pd

csv_in  = "/home/hep/an1522/dark_tridents_wspace/outputs/inference/_occlusion_selections/occlusion_tasks.csv"
list_out = "/home/hep/an1522/dark_tridents_wspace/outputs/inference/_occlusion_selections/occlusion_tasks.list"

df = pd.read_csv(csv_in)

cols = ["root_file","weight_file","out_dir","entry_number","n_pixels","tag"]
df = df[cols]

with open(list_out, "w") as f:
    for r in df.itertuples(index=False):
        f.write(f"{r.root_file} {r.weight_file} {r.out_dir} {int(r.entry_number)} {int(r.n_pixels)} {r.tag}\n")

print("wrote", list_out, "rows=", len(df))

wrote /home/hep/an1522/dark_tridents_wspace/outputs/inference/_occlusion_selections/occlusion_tasks.list rows= 298
