# NumerAI Colab Training Runner

This notebook is intentionally thin: it captures user inputs, optionally injects auth, runs bootstrap, and starts `src/train_colab.py`.

- Edit code on GitHub (or locally), not in Colab.
- Use Colab as a disposable execution environment only.


In [None]:
# What this cell does: captures user-facing configuration and optional GitHub auth.
import os
from pathlib import Path

REPO_URL = os.getenv("REPO_URL", "https://github.com/<your-org-or-user>/Numerai-Re.git")
REPO_REF = os.getenv("REPO_REF", "")  # optional: branch/tag/commit SHA for reproducibility
REPO_DIR = Path(os.getenv("REPO_DIR", "/content/Numerai-Re"))

try:
    from google.colab import userdata
except Exception:
    userdata = None

if userdata and "github.com" in REPO_URL and "@" not in REPO_URL:
    gh_token = userdata.get("GH_TOKEN")
    if gh_token:
        REPO_URL = REPO_URL.replace("https://", f"https://{gh_token}@", 1)

os.environ["REPO_URL"] = REPO_URL
os.environ["REPO_REF"] = REPO_REF
os.environ["REPO_DIR"] = str(REPO_DIR)

masked_repo_url = REPO_URL
if masked_repo_url.startswith("https://") and "@" in masked_repo_url:
    _, rest = masked_repo_url.split("https://", 1)
    creds_and_host = rest.split("@", 1)
    if len(creds_and_host) == 2:
        _, host_and_path = creds_and_host
        masked_repo_url = "https://***@" + host_and_path

print(f"REPO_URL={masked_repo_url}")
print(f"REPO_REF={REPO_REF or '(default branch)'}")
print(f"REPO_DIR={REPO_DIR}")


In [None]:
# What this cell does: fetches and executes bootstrap; bootstrap owns clone/update/install/prereq checks.
import os
import stat
import tempfile
import urllib.parse
import urllib.request
from pathlib import Path

repo_url = os.environ['REPO_URL']
repo_ref = os.environ.get('REPO_REF', '').strip()

parsed = urllib.parse.urlparse(repo_url)
if parsed.scheme != 'https' or 'github.com' not in parsed.netloc:
    raise ValueError('REPO_URL must be an https GitHub URL so Colab can fetch scripts/colab_bootstrap.sh')

path = parsed.path.removeprefix('/')
if path.endswith('.git'):
    path = path[:-4]
if path.count('/') < 1:
    raise ValueError('REPO_URL must include <owner>/<repo>.git')

owner_repo = '/'.join(path.split('/')[:2])
ref_for_script = repo_ref or 'main'
script_url = f"https://raw.githubusercontent.com/{owner_repo}/{ref_for_script}/scripts/colab_bootstrap.sh"

req = urllib.request.Request(script_url)
if '@' in repo_url:
    token = urllib.parse.urlsplit(repo_url).username
    if token:
        req.add_header('Authorization', f'Bearer {token}')

with urllib.request.urlopen(req) as resp:
    script_body = resp.read().decode('utf-8')

tmp_dir = Path(tempfile.mkdtemp(prefix='numerai_bootstrap_'))
bootstrap_path = tmp_dir / 'colab_bootstrap.sh'
bootstrap_path.write_text(script_body)
bootstrap_path.chmod(bootstrap_path.stat().st_mode | stat.S_IXUSR)

print(f"Running bootstrap from: {script_url}")
get_ipython().system(f"bash '{bootstrap_path}'")


In [None]:
# What this cell does: starts training from the prepared repo directory.
%cd $REPO_DIR
!python src/train_colab.py
