# 🏁 SpectraMind V50 — Kaggle Submission & Leaderboard Playbook (Notebook 11)

**Goal.** Package, validate, and (optionally) upload a **Kaggle submission** for the NeurIPS Ariel Data Challenge.  
This notebook is **CLI-first** and provides **DRY-RUN** safety if the `kaggle` CLI isn't available.

**What this notebook does**
1. Pre-flight: detect Kaggle CLI, capture env/git info, set run paths  
2. Locate/validate the submission artifacts (CSV/ZIP) produced by `spectramind submit`  
3. Create a **submission bundle** + README/model card and a **manifest** with hashes  
4. (Optional) **Kaggle upload** via CLI/API — with **DRY-RUN fallback**  
5. Record **leaderboard metadata** and a submission log usable in CI and postmortems  
6. Mermaid sketch of the submission workflow


In [None]:
# ░░ Pre-flight ░░
import os, sys, json, shutil, subprocess, datetime, pathlib, hashlib

RUN_TS = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
RUN_ID = f"kaggle_submit_{RUN_TS}"
ROOT_OUT = "/mnt/data/kaggle_submission"
ARTIFACTS = os.path.join(ROOT_OUT, RUN_ID)
LOGS = os.path.join(ARTIFACTS, "logs")
PKG = os.path.join(ARTIFACTS, "package")
for p in (ROOT_OUT, ARTIFACTS, LOGS, PKG):
    os.makedirs(p, exist_ok=True)

def which(cmd:str)->bool: return shutil.which(cmd) is not None
KAGGLE_PRESENT = which("kaggle")
CLI_PRESENT = which("spectramind")

def git_cmd(args):
    try:
        out = subprocess.check_output(["git", *args], stderr=subprocess.STDOUT, timeout=5).decode().strip()
        return out
    except Exception:
        return None

env = {
    "python": sys.version.replace("\n"," "),
    "platform": sys.platform,
    "kaggle_present": KAGGLE_PRESENT,
    "spectramind_present": CLI_PRESENT,
    "run_id": RUN_ID,
    "paths": {"artifacts": ARTIFACTS, "logs": LOGS, "package": PKG},
    "git": {
        "commit": git_cmd(["rev-parse", "HEAD"]),
        "branch": git_cmd(["rev-parse", "--abbrev-ref", "HEAD"]),
        "status": git_cmd(["status", "--porcelain"]),
    },
}
with open(os.path.join(ARTIFACTS, "env.json"), "w") as f:
    json.dump(env, f, indent=2)

print("=== Pre-flight ===")
print(json.dumps(env, indent=2))


## Locate submission artifact (CSV/ZIP)

In [None]:
import glob, os, json, pathlib

# Heuristics: look for a submission file produced earlier (e.g., by `spectramind submit`)
candidate_globs = [
    "/mnt/data/**/submission*.csv",
    "/mnt/data/**/submission*.zip",
    "/mnt/data/**/submission_bundle*.zip",
]
found = []
for pattern in candidate_globs:
    for path in glob.glob(pattern, recursive=True):
        if os.path.isfile(path):
            found.append(path)

found = sorted(set(found), key=lambda p: (os.path.getmtime(p), p), reverse=True)
print("Found candidate submissions:", json.dumps(found[:10], indent=2))

SUBMISSION_FILE = found[0] if found else None
print("Chosen submission file:", SUBMISSION_FILE)


## Validate & stage submission

In [None]:
import shutil, os, csv, zipfile, json, hashlib

def sha256_of_file(path, chunk=1024*1024):
    try:
        h = hashlib.sha256()
        with open(path, "rb") as f:
            while True:
                b = f.read(chunk)
                if not b: break
                h.update(b)
        return h.hexdigest()
    except Exception:
        return None

valid = False
msg = ""

if SUBMISSION_FILE and os.path.isfile(SUBMISSION_FILE):
    # Basic checks: csv or zip
    ext = os.path.splitext(SUBMISSION_FILE)[1].lower()
    if ext == ".csv":
        # Minimal CSV sanity: header present, at least one row
        try:
            with open(SUBMISSION_FILE, newline="") as f:
                reader = csv.reader(f)
                header = next(reader, None)
                row = next(reader, None)
                valid = header is not None and row is not None
                msg = f"CSV header={header} first_row={row[:3] if row else None}"
        except Exception as e:
            msg = f"CSV read error: {e}"
    elif ext == ".zip":
        try:
            with zipfile.ZipFile(SUBMISSION_FILE, "r") as zf:
                namelist = zf.namelist()
                valid = len(namelist) > 0
                msg = f"ZIP contains: {namelist[:5]}..."
        except Exception as e:
            msg = f"ZIP read error: {e}"
    else:
        msg = f"Unsupported extension: {ext}"
else:
    msg = "No submission file found."

print("Valid?", valid, "|", msg)

STAGED = None
if valid:
    STAGED = os.path.join(PKG, os.path.basename(SUBMISSION_FILE))
    shutil.copy2(SUBMISSION_FILE, STAGED)
    print("Staged:", STAGED, "SHA256:", sha256_of_file(STAGED))
else:
    print("Skipping stage; invalid or missing submission file.")


## Write README/model card & manifest

In [None]:
readme = f"""# SpectraMind V50 — Kaggle Submission Package

**Run ID:** {RUN_ID}  
**Timestamp (UTC):** {RUN_TS}

This package was generated by *Notebook 11 — Kaggle Submission & Leaderboard Playbook*.

## Contents
- `{os.path.basename(STAGED) if STAGED else 'MISSING'}` — submission artifact
- `manifest.json` — provenance (git, hashes)
- `notes.md` — optional notes

## Reproducibility
- Code commit: {env['git']['commit']}
- Branch: {env['git']['branch']}
- Python: {env['python']}

This package is designed to be CI-friendly and traceable.
"""

with open(os.path.join(PKG, "README.md"), "w") as f:
    f.write(readme)

manifest = {
    "run_id": RUN_ID,
    "timestamp_utc": RUN_TS,
    "git": env.get("git"),
    "submission_file": STAGED,
    "submission_sha256": sha256_of_file(STAGED) if STAGED else None,
    "kaggle_cli_present": KAGGLE_PRESENT,
}
with open(os.path.join(PKG, "manifest.json"), "w") as f:
    json.dump(manifest, f, indent=2)

with open(os.path.join(PKG, "notes.md"), "w") as f:
    f.write("Add experiment notes or leaderboard observations here.\n")
    
print("Wrote README, manifest, notes into", PKG)


## (Optional) Upload to Kaggle — DRY-RUN safe

In [None]:
import subprocess, shlex, os, json

def run_cmd(cmd_list, log_name):
    log_path = os.path.join(LOGS, f"{log_name}.log")
    err_path = os.path.join(LOGS, f"{log_name}.err")
    if not KAGGLE_PRESENT:
        msg = f"[DRY-RUN] Would execute: {' '.join(shlex.quote(c) for c in cmd_list)}\n"
        open(log_path, "w").write(msg); open(err_path, "w").write("")
        return 0, msg, ""
    try:
        proc = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = proc.communicate()
        open(log_path, "wb").write(out or b""); open(err_path, "wb").write(err or b"")
        return proc.returncode, (out or b"").decode(), (err or b"").decode()
    except Exception as e:
        return 99, "", str(e)

# NOTE: adjust competition slug if needed
COMPETITION = "ariel-data-challenge-2025"

rc, out, err = (0, "", "")
if STAGED and os.path.isfile(STAGED):
    # Kaggle expects: kaggle competitions submit -c <comp> -f <file> -m "<message>"
    msg = f"SpectraMind V50 auto-submit {RUN_ID}"
    cmd = ["kaggle", "competitions", "submit", "-c", COMPETITION, "-f", STAGED, "-m", msg]
    rc, out, err = run_cmd(cmd, log_name="kaggle_submit")
    print("Submit rc:", rc)
    print("stdout (truncated):", out[:300])
    print("stderr (truncated):", err[:300])
else:
    print("[Skip] No staged submission file to upload.")


## Record submission log & leaderboard metadata stub

In [None]:
log = {
    "run_id": RUN_ID,
    "ts_utc": RUN_TS,
    "kaggle_present": KAGGLE_PRESENT,
    "submitted_file": os.path.basename(STAGED) if STAGED else None,
    "submit_rc": rc if 'rc' in locals() else None,
}
with open(os.path.join(ARTIFACTS, "submission_log.json"), "w") as f:
    json.dump(log, f, indent=2)
print("Saved submission log:", os.path.join(ARTIFACTS, "submission_log.json"))


## Browse produced artifacts

In [None]:
import os

def tree(path, prefix=""):
    items = sorted(os.listdir(path))
    lines = []
    for i, name in enumerate(items):
        full = os.path.join(path, name)
        connector = "└── " if i == len(items)-1 else "├── "
        lines.append(prefix + connector + name)
        if os.path.isdir(full):
            extension = "    " if i == len(items)-1 else "│   "
            lines.extend(tree(full, prefix + extension))
    return lines

print("PKG TREE:", PKG)
print("\n".join(tree(PKG)))


## Submission flow (Mermaid)

```mermaid
flowchart LR
  A[Find submission CSV/ZIP] --> B[Validate structure]
  B --> C[Stage into /package]
  C --> D[Write README + manifest]
  D --> E{Kaggle CLI available?}
  E -- Yes --> F[Upload via kaggle competitions submit]
  E -- No --> G[DRY-RUN: log command]
  F --> H[Submission log + LB notes]
  G --> H
```


## Next steps
- Ensure your **Kaggle API token** is configured (`~/.kaggle/kaggle.json`) with proper permissions.
- Verify the **competition slug** (default: `ariel-data-challenge-2025`) before uploading.
- Use this notebook in **CI** after `10_full_pipeline_reproducibility_and_ci.ipynb` to automate packaging and submission.
- Track submissions & scores in a lightweight CSV or use MLflow/Sheets for team visibility.

> Tip: read Kaggle’s platform guide for notebook/CLI usage and submission rules.
