In [5]:
!pip install huggingface_hub
!huggingface-cli login --token "yout_token"
!pip install accelerate
!pip install pandas transformers
!pip install hf_transfer
!pip install matplotlib

In [19]:
import os
import json
import shutil
import subprocess
from datetime import datetime

# === CONFIG =========================================
SCRIPT = "main.py"
DATASET_FLAG = "--dataset"
MODEL_FLAG = "--model_name"
NUM_BINS = 10
OUTDIR = "output_folder"
PLOT_RELIABILITY = True
USE_HF_FAST = False
# ====================================================

models = {
    "Deepseek-R1-8B": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    "Falcon3-7B-base": "tiiuae/Falcon3-7B-Base",
    "GPT-J-6B": "EleutherAI/gpt-j-6b",
    "Gemma-2-9B": "google/gemma-2-9b",
    "LLAMA-3.1-8B": "meta-llama/Llama-3.1-8B",
    "Qwen2.5-7B": "Qwen/Qwen2.5-7B",
}

datasets = [
    "genderLex.csv",
    "genderLex_person.csv",
    "genderLex_someone.csv",
    "winobias.csv",
    "winogender.csv",
    
]

def safe_name(s: str) -> str:
    return "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in s)

def log(line: str, fh):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    fh.write(f"[{ts}] {line}\n"); fh.flush()

def _clean_names(dataset_abs: str, model_name: str):
    ds = os.path.basename(dataset_abs).replace(".csv", "").replace("-", "_").replace(":", "_")
    md = model_name.split("/")[-1].replace("-", "_").replace(":", "_")
    return ds, md

def run_job(dataset_path: str, model_key: str, model_name: str, master_fh):
    script_path = os.path.abspath(SCRIPT)
    if not os.path.exists(script_path):
        raise FileNotFoundError(f"Script not found: {script_path}")
    dataset_abs = os.path.abspath(dataset_path)
    if not os.path.exists(dataset_abs):
        raise FileNotFoundError(f"Dataset not found: {dataset_abs}")

    ds_base = os.path.splitext(os.path.basename(dataset_abs))[0]
    tag = f"{safe_name(ds_base)}_{safe_name(model_key)}"

    outdir_abs = os.path.abspath(OUTDIR)
    run_dir = os.path.join(outdir_abs, tag)
    os.makedirs(run_dir, exist_ok=True)

    # Scratch CWD so the original script’s auto-folder goes here (not project root)
    scratch_dir = os.path.join(run_dir, "_scratch")
    os.makedirs(scratch_dir, exist_ok=True)

    # Output files (absolute) that *we* control
    output_csv  = os.path.join(run_dir, f"results_{tag}.csv")
    summary_txt = os.path.join(run_dir, f"summary_{tag}.txt")
    bins_file   = os.path.join(run_dir, f"ece_bins_{tag}.txt")
    stdout_path = os.path.join(run_dir, "stdout.txt")
    stderr_path = os.path.join(run_dir, "stderr.txt")
    runlog_path = os.path.join(run_dir, "run.log")
    meta_path   = os.path.join(run_dir, "meta.json")

    command = [
        "python", script_path,                 # absolute script path; CWD can be scratch
        DATASET_FLAG, dataset_abs,             # absolute dataset path
        MODEL_FLAG, model_name,
        "--output_file", output_csv,
        "--summary_file", summary_txt,
        "--bin_output_file", bins_file,
        "--num_bins", str(NUM_BINS),
        "--use_actual_labels",
    ]
    if PLOT_RELIABILITY:
        command.append("--plot_reliability")

    env = os.environ.copy()
    env["HF_HUB_ENABLE_HF_TRANSFER"] = "1" if USE_HF_FAST else "0"
    env["IPYTHON_HISTORY_FILE"] = os.devnull
    env["MPLBACKEND"] = "Agg"  # ensure savefig works headlessly

    with open(runlog_path, "a", encoding="utf-8") as rlog, \
         open(stdout_path, "w", encoding="utf-8") as outfh, \
         open(stderr_path, "w", encoding="utf-8") as errfh:

        log(f"START run | dataset={dataset_abs} | model_key={model_key} | model_name={model_name}", rlog)
        log(f"Dir: {run_dir}", rlog)
        log(f"CWD: {scratch_dir}", rlog)
        log(f"Command: {' '.join(command)}", rlog)
        log(f"RUN {tag} -> {' '.join(command)}", master_fh)

        try:
            result = subprocess.run(
                command,
                text=True,
                stdout=outfh,
                stderr=errfh,
                env=env,
                check=False,
                cwd=scratch_dir,   # << run in scratch so auto-folder lands here
            )
            rc = result.returncode
        except Exception as e:
            rc = -999
            log(f"EXCEPTION: {e!r}", rlog)

        # Move reliability PNG (if created) from the auto-folder to run_dir, then remove auto-folder
        ds_clean, md_clean = _clean_names(dataset_abs, model_name)
        auto_folder = os.path.join(scratch_dir, f"results_{ds_clean}")
        rel_name = f"reliability_{ds_clean}_{md_clean}.png"
        src_rel = os.path.join(auto_folder, rel_name)
        if os.path.isfile(src_rel):
            try:
                shutil.move(src_rel, os.path.join(run_dir, rel_name))
                log(f"MOVED reliability plot -> {os.path.join(run_dir, rel_name)}", rlog)
            except Exception as e:
                log(f"WARN moving reliability plot: {e!r}", rlog)

        # Delete the script’s auto-folder (rm -rf)
        if os.path.isdir(auto_folder):
            try:
                shutil.rmtree(auto_folder)
                log(f"REMOVED stray folder: {auto_folder}", rlog)
            except Exception as e:
                log(f"WARN removing {auto_folder}: {e!r}", rlog)

        # Try to remove scratch dir (will succeed if empty)
        try:
            os.rmdir(scratch_dir)
        except OSError:
            pass

        # Meta
        meta = {
            "timestamp": datetime.now().isoformat(timespec="seconds"),
            "returncode": rc,
            "command": command,
            "dataset": dataset_abs,
            "model_key": model_key,
            "model_name": model_name,
            "outputs": {
                "per_item_csv": output_csv,
                "summary_txt": summary_txt,
                "ece_bins_txt": bins_file,
                "stdout": stdout_path,
                "stderr": stderr_path,
                "runlog": runlog_path,
            },
        }
        with open(meta_path, "w", encoding="utf-8") as f:
            json.dump(meta, f, indent=2)

        status = "OK" if rc == 0 else f"FAIL (returncode={rc})"
        log(f"STATUS: {status}", rlog)
        log("END run", rlog)
        log(f"END {tag} (rc={rc})", master_fh)

def main():
    os.makedirs(OUTDIR, exist_ok=True)
    master_log = os.path.join(os.path.abspath(OUTDIR), "run_all.log")
    with open(master_log, "a", encoding="utf-8") as mfh:
        log("="*60, mfh)
        log("RUN_ALL start", mfh)
        for dataset in datasets:
            for model_key, model_name in models.items():
                run_job(dataset, model_key, model_name, mfh)
        log("RUN_ALL end", mfh)
        log("="*60, mfh)

if __name__ == "__main__":
    main()