# GRAFT - Graph Reduction via Adaptive Feature & sample Trimming

- GRAPE
- LUNAR
- T2G-FORMER

## Requirements and Environment

### PyTorch Geometric

In [None]:
import sys, subprocess
import torch

torch_ver = torch.__version__.split('+')[0]      # 例如 '2.4.0'
cuda_ver = torch.version.cuda                    # 例如 '12.1'；CPU 環境則為 None
suffix = 'cpu' if not cuda_ver else f"cu{cuda_ver.replace('.', '')}"
wheel_idx = f"https://data.pyg.org/whl/torch-{torch_ver}+{suffix}.html"

print(f"偵測到 torch={torch_ver}, cuda={cuda_ver or 'CPU'}")
print("對應的輪子索引：", wheel_idx)

# 安裝 PyG 所需的二進位延伸套件
pkgs = ["pyg-lib", "torch-scatter", "torch-sparse", "torch-cluster", "torch-spline-conv"]
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", *pkgs, "-f", wheel_idx])
# 安裝主套件
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "torch_geometric"])

print("✅ PyTorch Geometric 安裝完成。若出現 404/找不到輪子，請看下方『失敗時的固定版本方案』。")


偵測到 torch=2.8.0, cuda=12.6
對應的輪子索引： https://data.pyg.org/whl/torch-2.8.0+cu126.html
✅ PyTorch Geometric 安裝完成。若出現 404/找不到輪子，請看下方『失敗時的固定版本方案』。


### My version


In [None]:
# ==== Colab 環境快照（精簡版）====
# 產出：
#  - ENVIRONMENT_MIN.md
#  - environment-min.json
#  - requirements-core.txt   # 只含核心套件的釘選清單
import os, sys, json, platform, subprocess
from datetime import datetime
try:
    from importlib import metadata as importlib_metadata  # py3.8+
except Exception:
    import importlib_metadata                          # type: ignore
try:
    from zoneinfo import ZoneInfo
except Exception:
    ZoneInfo = None

PROJECT_ROOT = "/content/GRAFT"  # 視你的專案根目錄調整
os.makedirs(PROJECT_ROOT, exist_ok=True)

# --- 小工具 ---
def sh(cmd: str):
    try:
        return subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT, text=True).strip()
    except subprocess.CalledProcessError as e:
        return f"ERR: {e.output.strip()}"

def nvidia_smi():
    q = "nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits"
    out = sh(q)
    return None if (not out or out.startswith("ERR:")) else out

def cuda_toolkit_version():
    v = sh("nvcc --version | tail -n1")
    if v and not v.startswith("ERR:"):
        return v
    vjson = "/usr/local/cuda/version.json"
    if os.path.exists(vjson):
        try:
            with open(vjson, "r") as f:
                j = json.load(f)
            return f"CUDA Toolkit (version.json): {j.get('cuda',{}).get('version','unknown')}"
        except Exception:
            pass
    vtxt = "/usr/local/cuda/version.txt"
    if os.path.exists(vtxt):
        try:
            return "CUDA Toolkit: " + open(vtxt).read().strip()
        except Exception:
            pass
    return None

def git_info(path: str):
    if not os.path.isdir(path):
        return None
    head   = sh(f'git -C "{path}" rev-parse HEAD')
    remote = sh(f'git -C "{path}" remote get-url origin')
    branch = sh(f'git -C "{path}" rev-parse --abbrev-ref HEAD')
    if head.startswith("ERR:"):
        return None
    return {"path": path, "remote": remote, "branch": branch, "commit": head}

def dist_version(dist_name: str):
    try:
        return importlib_metadata.version(dist_name)
    except Exception:
        return None

# --- 只蒐集「核心」套件 ---
# key: (顯示名, import/dist 名稱 for pip 安裝)
CORE_DISTS = {
    # PyTorch & 友軍
    "torch":                 ("torch", "torch"),
    "torchvision":           ("torchvision", "torchvision"),
    "torchaudio":            ("torchaudio", "torchaudio"),
    # PyG 家族（注意 dist 名稱是 dash）
    "torch_geometric":       ("torch-geometric", "torch-geometric"),
    "torch_scatter":         ("torch-scatter", "torch-scatter"),
    "torch_sparse":          ("torch-sparse", "torch-sparse"),
    "torch_cluster":         ("torch-cluster", "torch-cluster"),
    "torch_spline_conv":     ("torch-spline-conv", "torch-spline-conv"),
    # 科學運算
    "numpy":                 ("numpy", "numpy"),
    "pandas":                ("pandas", "pandas"),
    "scipy":                 ("scipy", "scipy"),
    "scikit-learn":          ("scikit-learn", "scikit-learn"),  # import 為 sklearn
    # 其他常用
    "networkx":              ("networkx", "networkx"),
    "tqdm":                  ("tqdm", "tqdm"),
    "pyyaml":                ("PyYAML", "PyYAML"),              # import 為 yaml
    "hydra-core":            ("hydra-core", "hydra-core"),      # import 為 hydra
    "einops":                ("einops", "einops"),
    "matplotlib":            ("matplotlib", "matplotlib"),
}

# --- 版本資訊（精簡）---
py_ver   = sys.version.split()[0]
pip_ver  = sh("pip --version")
os_name  = sh("sed -n 's/^PRETTY_NAME=//p' /etc/os-release | tr -d '\"'")
uname    = platform.uname()
tz       = "Asia/Taipei"
now_utc  = datetime.utcnow().isoformat(timespec="seconds") + "Z"
now_loc  = (datetime.now(ZoneInfo(tz)).isoformat(timespec="seconds") if ZoneInfo
            else sh("TZ=Asia/Taipei date -Iseconds"))

gpu      = nvidia_smi()
cuda_tool= cuda_toolkit_version()

# Torch CUDA 狀態
torch_cuda = None
try:
    import torch
    torch_cuda = {
        "torch.version.cuda": torch.version.cuda,
        "torch.cuda.is_available": bool(torch.cuda.is_available()),
        "torch.backends.cudnn.version": str(torch.backends.cudnn.version()),
        "device_count": (torch.cuda.device_count() if torch.cuda.is_available() else 0),
    }
except Exception:
    pass

# 第三方 repo（只列三個常見）
third_party_candidates = [
    f"{PROJECT_ROOT}/third_party/GRAPE",
    f"{PROJECT_ROOT}/third_party/LUNAR",
    f"{PROJECT_ROOT}/third_party/t2g-former",
    "/content/GRAPE", "/content/LUNAR", "/content/t2g-former",
]
git_third_party = {}
for p in third_party_candidates:
    info = git_info(p)
    if info:
        git_third_party[os.path.basename(p).lower()] = info

# 收集核心套件版本（dist 名稱）
core_versions = {}
for display_name, dist_name in CORE_DISTS.values():
    v = dist_version(dist_name)
    if v:
        core_versions[display_name] = v

# --- 寫 JSON（精簡）---
env_min = {
    "timestamp_utc": now_utc,
    "timestamp_local_Asia_Taipei": now_loc,
    "python": py_ver,
    "pip": pip_ver,
    "os": os_name,
    "uname": {"system": uname.system, "release": uname.release, "machine": uname.machine},
    "gpu": gpu,
    "cuda_toolkit": cuda_tool,
    "torch_cuda": torch_cuda,
    "core_packages": core_versions,          # 僅核心
    "third_party_git": git_third_party,      # 若有就列
}
json_path = os.path.join(PROJECT_ROOT, "environment-min.json")
with open(json_path, "w") as f:
    json.dump(env_min, f, indent=2, ensure_ascii=False)

# --- 寫 requirements-core.txt（僅核心套件的釘選）---
req_lines = [f"{name}=={ver}" for name, ver in core_versions.items()]
req_path = os.path.join(PROJECT_ROOT, "requirements-core.txt")
with open(req_path, "w") as f:
    f.write("\n".join(req_lines) + ("\n" if req_lines else ""))

# --- 寫 Markdown（精簡）---
def md_table(d: dict):
    if not d: return "_N/A_"
    rows = ["| 套件 | 版本 |", "|---|---|"]
    for k in sorted(d.keys()):
        rows.append(f"| {k} | {d[k]} |")
    return "\n".join(rows)

md = f"""# Environment Snapshot — Lite

- **Captured (UTC)**: {now_utc}
- **Captured (Asia/Taipei)**: {now_loc}
- **OS**: {os_name}
- **Python**: {py_ver}
- **Pip**: {pip_ver.split()[1] if isinstance(pip_ver, str) and pip_ver else 'N/A'}

## GPU / CUDA
- GPU: {gpu or "N/A"}
- CUDA Toolkit: {cuda_tool or "N/A"}
- Torch CUDA: {torch_cuda or "N/A"}

## Core packages (pinned)
{md_table(core_versions)}

## Third-party repos
"""  # noqa

if git_third_party:
    for name, info in git_third_party.items():
        md += f"- **{name}**: {info['remote']} @ `{info['commit']}` (branch: {info['branch']})\n"
else:
    md += "- N/A\n"

md += f"""

---

- Core requirements: [`requirements-core.txt`](./requirements-core.txt)
- Machine-readable snapshot: [`environment-min.json`](./environment-min.json)
"""

md_path = os.path.join(PROJECT_ROOT, "ENVIRONMENT_MIN.md")
with open(md_path, "w") as f:
    f.write(md)

print("Written:")
print(" -", md_path)
print(" -", json_path)
print(" -", req_path)


  now_utc  = datetime.utcnow().isoformat(timespec="seconds") + "Z"


Written:
 - /content/GRAFT/ENVIRONMENT_MIN.md
 - /content/GRAFT/environment-min.json
 - /content/GRAFT/requirements-core.txt


## Baseline - GRAPE

In [None]:
# Load Google Drive
from google.colab import drive
drive.mount('/content/drive')

### Sync colab from drive

In [None]:
!apt-get -qq update && apt-get -qq install -y rsync

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


- 請先載入以下至`/contest`
  - `pipeline/`
  - `GRAPE/`
  - `LUNAR/`
  - `T2G-FORMER/`

In [None]:
%%bash

REPO_NAME="GRAPE"
DRIVE_BASE="{path to}/GRAFT/third_party"
REPO_DRIVE="${DRIVE_BASE}/${REPO_NAME}"
REPO_RAM="/content/${REPO_NAME}"
echo "REPO_DRIVE=${REPO_DRIVE:-<EMPTY>}"
echo "REPO_RAM=${REPO_RAM:-<EMPTY>}"

# 防呆：變數不得為空或根目錄
for p in "${REPO_DRIVE:-}" "${REPO_RAM:-}"; do
  [ -z "$p" ] && { echo "ERROR: empty path"; exit 1; }
  [ "$p" = "/" ] && { echo "ERROR: path is /"; exit 1; }
done

# 把 Drive 的原始碼同步到 /content
# 排除 .git 與以前跑出的測試/輸出資料夾，讓 RAM 乾淨
rsync -a --delete \
  --exclude=".git/" \
  --exclude="uci/test/" \
  --exclude="mc/test/" \
  "$REPO_DRIVE/" "$REPO_RAM/"
rsync -a --delete \
  --exclude=".git/" \
  --exclude="uci/test/" \
  --exclude="mc/test/" \
  "{path to}/GRAFT/pipelines" "/content"
rsync -a --delete \
  --exclude=".git/" \
  "$DRIVE_BASE/LUNAR" "/content"
rsync -a --delete \
  --exclude=".git/" \
  --exclude='results/' \
  "$DRIVE_BASE/T2G-FORMER" "/content"

cd "$REPO_RAM"
ls -la | head

### baseline

In [None]:
# LUNAR → GRAPE
!pip install -q faiss-cpu     # LUNAR
!pip install -q category-encoders # T2G
!pip install -q tomli       # T2G
!pip install -q tomli-w      # T2G

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m71.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.3/242.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!rm -rf /content/GRAPE/uci/test
!rm -rf /content/grapt_artifacts
!rm -rf /content/logs

- UCI: `concrete, energy, housing, kin8nm, naval, power, protein, wine, yacht`

#### Baseline（Only GRAPE)

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules grape --order grape \
    --auto_prep \
    --grape.task both --grape.inject_artifact_flags \
    > "/content/logs/run_grape_seed${s}.log" 2>&1
done


===== SEED = 0 =====
===== SEED = 1 =====
===== SEED = 2 =====


#### LUNAR → GRAPE

In [None]:
--lunar.keep 0.70 \

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  GRAFT_LUNAR_DIR=/content/LUNAR \
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules lunar,grape --order 'lunar>grape' \
    --auto_prep \
    > "/content/logs/run_luner_grape_seed${s}.log" 2>&1
done


===== SEED = 0 =====
===== SEED = 1 =====
===== SEED = 2 =====


#### T2G → GRAPE

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules t2gexp,t2g,grape --order 't2gexp>t2g>grape' \
    --auto_prep --mask_op AND \
    --t2gexp.t2g_repo /content/T2G-FORMER \
    --t2gexp.epochs 50 --t2gexp.train_on overlay \
    --t2g.keep_cols_ratio 0.90 \
    > "/content/logs/run_t2g_grape_seed${s}.log" 2>&1
done


===== SEED = 0 =====
===== SEED = 1 =====
===== SEED = 2 =====


#### LUNAR → T2G → GRAPE

In [None]:
%%bash
set -euo pipefail
mkdir -p /content/logs

# 只需改一次 GRAPE 腳本的小修
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules lunar,t2gexp,t2g,grape --order 'lunar>t2gexp>t2g>grape' \
    --auto_prep --mask_op AND \
    --t2gexp.t2g_repo /content/T2G-FORMER \
    --t2gexp.epochs 50 --t2gexp.train_on overlay \
    --t2g.keep_cols_ratio 0.9 \
    > "/content/logs/run_lunar_t2g_grape_seed${s}.log" 2>&1
done


#### T2G → LUNAR → GRAPE

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules lunar,t2gexp,t2g,grape --order 't2gexp>t2g>lunar>grape' \
    --auto_prep --mask_op AND \
    --t2gexp.t2g_repo /content/T2G-FORMER \
    --t2gexp.epochs 50 --t2gexp.train_on overlay \
    --t2g.keep_cols_ratio 0.9 \
    > "/content/logs/run_t2g_luner_grape_seed${s}.log" 2>&1
done


===== SEED = 0 =====
===== SEED = 1 =====
===== SEED = 2 =====


#### Random Drop → GRAPE

In [None]:
# 依你的環境調整
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

for s in 0 1 2; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset protein --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules random,grape --order 'random>grape' \
    --auto_prep \
    --mask_op AND \
    --random.drop_rows 0.0 \
    --random.drop_cols 0.10 \
    --random.seed "$s" \
    > "/content/logs/run_random_grape_seed${s}.log" 2>&1
done


===== SEED = 0 =====
===== SEED = 1 =====
===== SEED = 2 =====


#### 評估指標

In [None]:
from pathlib import Path
import json

base = Path("/content/grapt_artifacts/baseline/housing/seed0/impute")
mj = base/"metrics.json"
if mj.exists() and mj.read_text().strip() not in ("", "{}"):
    m = json.loads(mj.read_text())
    # 常見鍵：m["test"]["mae"], m["valid"]["mae"]（實際鍵名依訓練腳本而定）
    print(json.dumps(m, indent=2, ensure_ascii=False))
else:
    # 沒有 metrics.json 或為空：直接解析 result.pkl
    import joblib
    pkl = joblib.load(base/"result.pkl")
    curves = pkl.get("curves", {})
    def last(a):
        return (a or [None])[-1] if isinstance(a, list) else a
    print("valid_mae(last):", last(curves.get("valid_l1")))
    print("test_mae(last): ", last(curves.get("test_l1")))
    print("valid_rmse(last):", last(curves.get("valid_rmse")))
    print("test_rmse(last): ", last(curves.get("test_rmse")))


valid_mae(last): None
test_mae(last):  0.15573932230472565
valid_rmse(last): None
test_rmse(last):  0.2481225791808255


In [None]:
from pathlib import Path
import json

base = Path("/content/grapt_artifacts/baseline/housing/seed0/label")
mj = base/"metrics.json"
if mj.exists() and mj.read_text().strip() not in ("", "{}"):
    m = json.loads(mj.read_text())
    # 常見鍵：m["test"]["mae"], m["valid"]["mae"]（實際鍵名依訓練腳本而定）
    print(json.dumps(m, indent=2, ensure_ascii=False))
else:
    # 沒有 metrics.json 或為空：直接解析 result.pkl
    import joblib
    pkl = joblib.load(base/"result.pkl")
    curves = pkl.get("curves", {})
    def last(a):
        return (a or [None])[-1] if isinstance(a, list) else a
    print("valid_mae(last):", last(curves.get("valid_l1")))
    print("test_mae(last): ", last(curves.get("test_l1")))
    print("valid_rmse(last):", last(curves.get("valid_rmse")))
    print("test_rmse(last): ", last(curves.get("test_rmse")))


valid_mae(last): None
test_mae(last):  3.235844373703003
valid_rmse(last): None
test_rmse(last):  4.796092413768343


## Ablation

#### T2G → LUNAR → GRAPE

##### 0.95

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

echo "===== LUNAR_KEEP = 0.95 ====="
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset wine --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules lunar,t2gexp,t2g,grape --order 't2gexp>t2g>lunar>grape' \
  --auto_prep --mask_op AND \
  --t2gexp.t2g_repo /content/T2G-FORMER \
  --t2gexp.epochs 50 --t2gexp.train_on overlay \
  --t2g.keep_cols_ratio 0.9 \
  --lunar.keep 0.95 \
  > "/content/logs/run_t2g_luner_grape_keep095.log" 2>&1

# rename
set -euo pipefail
src="/content/grapt_artifacts/baseline/wine/seed0"
dst="/content/grapt_artifacts/baseline/wine/keep095"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"

# save
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi

===== LUNAR_KEEP = 0.95 =====
21M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


##### 0.90

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

echo "===== LUNAR_KEEP = 0.90 ====="
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset wine --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules lunar,t2gexp,t2g,grape --order 't2gexp>t2g>lunar>grape' \
  --auto_prep --mask_op AND \
  --t2gexp.t2g_repo /content/T2G-FORMER \
  --t2gexp.epochs 50 --t2gexp.train_on overlay \
  --t2g.keep_cols_ratio 0.9 \
  --lunar.keep 0.90 \
  > "/content/logs/run_t2g_luner_grape_keep090.log" 2>&1

# rename
set -euo pipefail
src="/content/grapt_artifacts/baseline/wine/seed0"
dst="/content/grapt_artifacts/baseline/wine/keep090"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"

# save
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi

===== LUNAR_KEEP = 0.90 =====
23M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


##### 0.80

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

echo "===== LUNAR_KEEP = 0.80 ====="
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset energy --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules lunar,t2gexp,t2g,grape --order 't2gexp>t2g>lunar>grape' \
  --auto_prep --mask_op AND \
  --t2gexp.t2g_repo /content/T2G-FORMER \
  --t2gexp.epochs 50 --t2gexp.train_on overlay \
  --t2g.keep_cols_ratio 0.9 \
  --lunar.keep 0.80 \
  > "/content/logs/run_t2g_luner_grape_keep080.log" 2>&1

# rename
set -euo pipefail
src="/content/grapt_artifacts/baseline/energy/seed0"
dst="/content/grapt_artifacts/baseline/energy/keep080"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"

# save
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi

===== LUNAR_KEEP = 0.80 =====
16M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


##### 0.70

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

export GRAFT_LUNAR_DIR=/content/LUNAR

echo "===== LUNAR_KEEP = 0.70 ====="
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset wine --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules lunar,t2gexp,t2g,grape --order 't2gexp>t2g>lunar>grape' \
  --auto_prep --mask_op AND \
  --t2gexp.t2g_repo /content/T2G-FORMER \
  --t2gexp.epochs 50 --t2gexp.train_on overlay \
  --t2g.keep_cols_ratio 0.9 \
  --lunar.keep 0.70 \
  > "/content/logs/run_t2g_luner_grape_keep070.log" 2>&1

# rename
set -euo pipefail
src="/content/grapt_artifacts/baseline/wine/seed0"
dst="/content/grapt_artifacts/baseline/wine/keep070"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"

# save
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi

===== LUNAR_KEEP = 0.70 =====
25M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


## save

In [None]:
%%bash
set -euo pipefail
src="/content/grapt_artifacts/baseline/protein"
dst="/content/grapt_artifacts/baseline/protein_t2g_lunar_grape"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"


In [None]:
%%bash

# 路徑設定
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi


29M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


## more

In [None]:
!rm -rf /content/GRAPE/pack/test
!rm -rf /content/grapt_artifacts
!rm -rf /content/logs

### OOM 問題前

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

for s in 0; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset year_mcar30 --seed "$s" \
    --artifact_dir /content/grapt_artifacts \
    --modules grape --order grape \
    --auto_prep \
    --grape.task y --grape.inject_artifact_flags \
    --grape_domain pack \
    --pack_root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 \
    > "/content/logs/run_grape_seed${s}.log" 2>&1
done

===== SEED = 0 =====

[run_cmd] CWD: /content/GRAPE
[run_cmd] CMD: python /content/GRAPE/train_y.py --seed 0 --artifact_dir /content/grapt_artifacts --dump_intermediate --prep_only pack --root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 --data year_mcar30
[child stdout] Namespace(model_types='EGSAGE_EGSAGE', post_hiddens=None, concat_states=False, norm_embs=None, aggr='mean', node_dim=16, edge_dim=16, edge_mode=1, gnn_activation='relu', impute_hiddens='', impute_activation='relu', predict_hiddens='', epochs=20000, opt='adam', opt_scheduler='none', opt_restart=0, opt_decay_step=1000, opt_decay_rate=0.9, dropout=0.0, weight_decay=0.0, lr=0.001, known=0.7, valid=0.0, seed=0, log_dir='y0', artifact_dir='/content/grapt_artifacts', dump_intermediate=True, prep_only=True, root='/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30', data='year_mcar30', domain='pack')
[child stdout] GPU mem: 12, Select GPU 0
[child stdout] Using GPU 0
[child stdout] ['EGSAGE', 'EGSAGE'] [True

[child stderr] Traceback (most recent call last):
[child stderr]   File "/content/GRAPE/train_y.py", line 91, in <module>
[child stderr]     main()
[child stderr]   File "/content/GRAPE/train_y.py", line 88, in main
[child stderr]     train_gnn_y(data, args, log_path, device)
[child stderr]   File "/content/GRAPE/training/gnn_y.py", line 297, in train_gnn_y
[child stderr]     x_embd = model(x, known_edge_attr, known_edge_index)
[child stderr]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr]   File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
[child stderr]     return self._call_impl(*args, **kwargs)
[child stderr]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr]   File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
[child stderr]     return forward_call(*args, **kwargs)
[child stderr]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr]   File "/

### OOM 方案一: --known 失敗

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

set -e
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

for s in 0; do
  echo "===== SEED = $s ====="
  python pipelines/run_pipeline.py \
    --grape_root /content/GRAPE \
    --dataset year_mcar30 --seed 0 \
    --artifact_dir /content/grapt_artifacts \
    --modules grape --order grape \
    --auto_prep \
    --grape_domain pack \
    --pack_root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 \
    --grape.inject_artifact_flags \
    --grape.y_cmd "python /content/GRAPE/train_y.py \
                  --seed 0 \
                  --node_dim 4 --edge_dim 4 \
                  --known 0.0005 \
                  pack --root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 \
                  --data year_mcar30" # \
    # > "/content/logs/run_grape_seed${s}.log" 2>&1
done

===== SEED = 0 =====

[run_cmd] CWD: /content/GRAPE
[run_cmd] CMD: python /content/GRAPE/train_mdi.py --seed 0 --artifact_dir /content/grapt_artifacts --dump_intermediate pack --root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 --data year_mcar30
[child stdout] Namespace(model_types='EGSAGE_EGSAGE_EGSAGE', post_hiddens=None, concat_states=False, norm_embs=None, aggr='mean', node_dim=64, edge_dim=64, edge_mode=1, gnn_activation='relu', impute_hiddens='64', impute_activation='relu', epochs=20000, opt='adam', opt_scheduler='none', opt_restart=0, opt_decay_step=1000, opt_decay_rate=0.9, dropout=0.0, weight_decay=0.0, lr=0.001, known=0.7, auto_known=False, loss_mode=0, valid=0.0, seed=0, log_dir='0', save_model=False, save_prediction=False, transfer_dir=None, transfer_extra='', mode='train', artifact_dir='/content/grapt_artifacts', dump_intermediate=True, prep_only=False, root='/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30', data='year_mcar30', domain='pack')
[child

[child stderr] Traceback (most recent call last):
[child stderr]   File "/content/GRAPE/train_mdi.py", line 103, in <module>
[child stderr]     main()
[child stderr]   File "/content/GRAPE/train_mdi.py", line 99, in main
[child stderr]     train_gnn_mdi(data, args, log_path, device)
[child stderr]   File "/content/GRAPE/training/gnn_mdi.py", line 324, in train_gnn_mdi
[child stderr]     x_embd = model(x, known_edge_attr, known_edge_index)
[child stderr]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr]   File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
[child stderr]     return self._call_impl(*args, **kwargs)
[child stderr]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr]   File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1784, in _call_impl
[child stderr]     return forward_call(*args, **kwargs)
[child stderr]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[child stderr

CalledProcessError: Command 'b'mkdir -p /content/logs\nsed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py\nsed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py\n\nset -e\nexport PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n\nfor s in 0; do\n  echo "===== SEED = $s ====="\n  python pipelines/run_pipeline.py \\\n    --grape_root /content/GRAPE \\\n    --dataset year_mcar30 --seed 0 \\\n    --artifact_dir /content/grapt_artifacts \\\n    --modules grape --order grape \\\n    --auto_prep \\\n    --grape_domain pack \\\n    --pack_root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 \\\n    --grape.inject_artifact_flags \\\n    --grape.y_cmd "python /content/GRAPE/train_y.py \\\n                  --seed 0 \\\n                  --node_dim 4 --edge_dim 4 \\\n                  --known 0.0005 \\\n                  pack --root /content/drive/MyDrive/data-t2g/year/baseline/year_mcar30 \\\n                  --data year_mcar30" # \\\n    # > "/content/logs/run_grape_seed${s}.log" 2>&1\ndone\n'' returned non-zero exit status 1.

### OOM 方案二 row_cap

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

In [None]:
%%bash
mkdir -p /content/logs
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_mdi.py
sed -i "s/os.makedirs(log_path)/os.makedirs(log_path, exist_ok=True)/" /content/GRAPE/train_y.py

# ===== A) 先做 "row-capped" baseline =====
SRC=/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30
DST=/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0
CAP=10000     # 先試 80k；如果還吃不下，換 60000；穩了再升到 120000
SEED=0

python - <<'PY'
import os, json, numpy as np
from pathlib import Path
SRC = Path("/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30")
DST = Path("/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0")
CAP = 10000
SEED = 0

DST.mkdir(parents=True, exist_ok=True)
X = np.load(SRC/"X_norm.npy")     # (N,d)
y = np.load(SRC/"y.npy")          # (N,)
M = np.load(SRC/"mask.npy")       # (N,d) 0/1
split = json.load(open(SRC/"split_idx.json","r"))

N = X.shape[0]
tr = np.asarray(split["train"], dtype=np.int64)
va = np.asarray(split.get("val", []), dtype=np.int64)
te = np.asarray(split["test"], dtype=np.int64)

n_tr = int(round(CAP * (tr.size / N)))
n_va = int(round(CAP * (va.size / N))) if va.size else 0
n_te = max(CAP - n_tr - n_va, 1)

rng = np.random.default_rng(SEED)
keep_tr = rng.choice(tr, size=min(n_tr, tr.size), replace=False)
keep_va = rng.choice(va, size=min(n_va, va.size), replace=False) if va.size else np.array([], np.int64)
keep_te = rng.choice(te, size=min(n_te, te.size), replace=False)
keep_rows = np.concatenate([keep_tr, keep_va, keep_te]); keep_rows.sort()

X2 = X[keep_rows]; y2 = y[keep_rows]; M2 = M[keep_rows]
remap = {old:i for i,old in enumerate(keep_rows.tolist())}
split2 = {
  "train":[remap[i] for i in keep_tr.tolist()],
  "val":[remap[i] for i in keep_va.tolist()] if keep_va.size else [],
  "test":[remap[i] for i in keep_te.tolist()],
}

np.save(DST/"X_norm.npy", X2)
np.save(DST/"y.npy", y2)
np.save(DST/"mask.npy", M2.astype(np.uint8))
json.dump(split2, open(DST/"split_idx.json","w"), ensure_ascii=False)

# 小提示輸出
m = M2.sum()
d = X2.shape[1]
print(f"[row_cap] rows={X2.shape[0]}, cols={d}, mask_ones={int(m)} (≈{m/(X2.shape[0]*d):.2%})")
PY


[row_cap] rows=10000, cols=90, mask_ones=631245 (≈70.14%)


#### GRAPE

In [None]:
%%bash
set -euo pipefail
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

SRC=/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0
DST=/content/grapt_artifacts/baseline/year_mcar30/seed0

echo "[STEP 1] Rebuild mask = ~isnan(X_norm)"
python - <<'PY'
import numpy as np, os
root = "/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0"
X = np.load(os.path.join(root, "X_norm.npy"))
m = (~np.isnan(X)).astype('uint8')
np.save(os.path.join(root, "mask.npy"), m)
print("[SRC] rows_keep =", int(m.any(axis=1).sum()), "/", m.shape[0],
      "| cols_keep =", int(m.any(axis=0).sum()), "/", m.shape[1])
PY

echo "[STEP 2] Prime artifacts baseline with 4 files"
mkdir -p "$DST"
cp -f "$SRC"/{X_norm.npy,mask.npy,y.npy,split_idx.json} "$DST"/
rm -rf "$DST/variants"
echo "[CHK] baseline primed:"
ls -lh "$DST"/X_norm.npy "$DST"/mask.npy "$DST"/y.npy "$DST"/split_idx.json

echo "[STEP 3] Run pipeline (no error swallowing)"
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset year_mcar30 --seed 1 \
  --artifact_dir /content/grapt_artifacts \
  --modules grape --order grape \
  --auto_prep \
  --grape_domain pack \
  --pack_root "$SRC" \
  --grape.allow_fallback \
  --grape.task y \
  --grape.y_cmd "python /content/GRAPE/train_y.py --seed 0 \
                 --node_dim 8 --edge_dim 8 \
                 --known 0.01 --opt_scheduler none \
                 --dump_intermediate \
                 pack --root $SRC --data year_mcar30" \
  > "/content/logs/run_grape_rowcap10k_seed0.log" 2>&1 || true
# --grape.inject_artifact_flags \ 加了會nan
# --grape.inject_artifact_flags \ 不加會 FileNotFoundError: allow_fallback 啟用，但找不到任何 result.pkl

[STEP 1] Rebuild mask = ~isnan(X_norm)
[SRC] rows_keep = 10000 / 10000 | cols_keep = 90 / 90
[STEP 2] Prime artifacts baseline with 4 files
[CHK] baseline primed:
-rw------- 1 root root 880K Sep  4 11:04 /content/grapt_artifacts/baseline/year_mcar30/seed0/mask.npy
-rw------- 1 root root  58K Sep  4 11:04 /content/grapt_artifacts/baseline/year_mcar30/seed0/split_idx.json
-rw------- 1 root root 3.5M Sep  4 11:04 /content/grapt_artifacts/baseline/year_mcar30/seed0/X_norm.npy
-rw------- 1 root root  79K Sep  4 11:04 /content/grapt_artifacts/baseline/year_mcar30/seed0/y.npy
[STEP 3] Run pipeline (no error swallowing)


#### T2G GRAPE

In [None]:
%%bash
set -euo pipefail
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# === 路徑設定 ===
SRC=/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0   # 你縮小後的 baseline 來源
DST=/content/grapt_artifacts/baseline/year_mcar30/seed0           # orchestrator 的 baseline 目的地
T2G_REPO=/content/T2G-FORMER                        # 你的 T2G-Former 專案根（資料夾）

echo "[STEP 1] Rebuild mask = ~isnan(X_norm)"
python - <<'PY'
import numpy as np, os
root = "/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0"
X = np.load(os.path.join(root, "X_norm.npy"))
m = (~np.isnan(X)).astype('uint8')
np.save(os.path.join(root, "mask.npy"), m)
print("[SRC] rows_keep =", int(m.any(axis=1).sum()), "/", m.shape[0],
      "| cols_keep =", int(m.any(axis=0).sum()), "/", m.shape[1])
PY

echo "[STEP 2] Prime artifacts baseline with 4 files"
mkdir -p "$DST"
cp -f "$SRC"/{X_norm.npy,mask.npy,y.npy,split_idx.json} "$DST"/
rm -rf "$DST/variants"
echo "[CHK] baseline primed:"
ls -lh "$DST"/X_norm.npy "$DST"/mask.npy "$DST"/y.npy "$DST"/split_idx.json

echo "[STEP 3] Run T2G + GRAPE pipeline"
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset year_mcar30 --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules t2gexp,t2g,grape --order 't2gexp>t2g>grape' \
  --mask_op AND \
  --auto_prep \
  --grape_domain pack \
  --pack_root "$SRC" \
  --t2gexp.t2g_repo "$T2G_REPO" \
  --t2gexp.epochs 50 \
  --t2gexp.train_on overlay \
  --t2g.keep_cols_ratio 0.50 \
  --grape.task y \
  --grape.y_cmd "python /content/GRAPE/train_y.py --seed 0 \
                 --node_dim 8 --edge_dim 8 \
                 --known 0.01 --opt_scheduler none \
                 --dump_intermediate \
                 pack --root $SRC --data year_mcar30" \
  > "/content/logs/run_t2g_grape_seed0.log" 2>&1 || true

echo "=== Done. 看 /content/logs/run_t2g_grape_seed0.log ==="


[STEP 1] Rebuild mask = ~isnan(X_norm)
[SRC] rows_keep = 10000 / 10000 | cols_keep = 90 / 90
[STEP 2] Prime artifacts baseline with 4 files
[CHK] baseline primed:
-rw------- 1 root root 880K Sep  4 13:13 /content/grapt_artifacts/baseline/year_mcar30/seed0/mask.npy
-rw------- 1 root root  58K Sep  4 13:13 /content/grapt_artifacts/baseline/year_mcar30/seed0/split_idx.json
-rw------- 1 root root 3.5M Sep  4 13:13 /content/grapt_artifacts/baseline/year_mcar30/seed0/X_norm.npy
-rw------- 1 root root  79K Sep  4 13:13 /content/grapt_artifacts/baseline/year_mcar30/seed0/y.npy
[STEP 3] Run T2G + GRAPE pipeline
=== Done. 看 /content/logs/run_t2g_grape_seed0.log ===


#### random GRAPE

In [None]:
%%bash
set -euo pipefail
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# === 路徑設定 ===
SRC=/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0
DST=/content/grapt_artifacts/baseline/year_mcar30/seed0

echo "[STEP 1] Rebuild mask = ~isnan(X_norm)"
python - <<'PY'
import numpy as np, os
root = "/content/drive/MyDrive/data-t2g/year/baseline/year_mcar30_cap10k_s0"
X = np.load(os.path.join(root, "X_norm.npy"))
m = (~np.isnan(X)).astype('uint8')
np.save(os.path.join(root, "mask.npy"), m)
print("[SRC] rows_keep =", int(m.any(axis=1).sum()), "/", m.shape[0],
      "| cols_keep =", int(m.any(axis=0).sum()), "/", m.shape[1])
PY

echo "[STEP 2] Prime artifacts baseline with 4 files"
mkdir -p "$DST"
cp -f "$SRC"/{X_norm.npy,mask.npy,y.npy,split_idx.json} "$DST"/
rm -rf "$DST/variants"
echo "[CHK] baseline primed:"
ls -lh "$DST"/X_norm.npy "$DST"/mask.npy "$DST"/y.npy "$DST"/split_idx.json

echo "[STEP 3] Run RANDOM(col drop) + GRAPE pipeline"
python pipelines/run_pipeline.py \
  --grape_root /content/GRAPE \
  --dataset year_mcar30 --seed 0 \
  --artifact_dir /content/grapt_artifacts \
  --modules random,grape --order 'random>grape' \
  --mask_op AND \
  --auto_prep \
  --grape_domain pack \
  --pack_root "$SRC" \
  --random.drop_cols 0.50 \
  --random.seed 0 \
  --grape.task y \
  --grape.y_cmd "python /content/GRAPE/train_y.py --seed 0 \
                 --node_dim 8 --edge_dim 8 \
                 --known 0.01 --opt_scheduler none \
                 --dump_intermediate \
                 pack --root $SRC --data year_mcar30" \
  > "/content/logs/run_random_grape_seed0.log" 2>&1 || true

echo "=== Done. 看 /content/logs/run_random_grape_seed0.log ==="


[STEP 1] Rebuild mask = ~isnan(X_norm)
[SRC] rows_keep = 10000 / 10000 | cols_keep = 90 / 90
[STEP 2] Prime artifacts baseline with 4 files
[CHK] baseline primed:
-rw------- 1 root root 880K Sep  4 12:55 /content/grapt_artifacts/baseline/year_mcar30/seed0/mask.npy
-rw------- 1 root root  58K Sep  4 12:55 /content/grapt_artifacts/baseline/year_mcar30/seed0/split_idx.json
-rw------- 1 root root 3.5M Sep  4 12:55 /content/grapt_artifacts/baseline/year_mcar30/seed0/X_norm.npy
-rw------- 1 root root  79K Sep  4 12:55 /content/grapt_artifacts/baseline/year_mcar30/seed0/y.npy
[STEP 3] Run RANDOM(col drop) + GRAPE pipeline
=== Done. 看 /content/logs/run_random_grape_seed0.log ===


In [None]:
%%bash
set -euo pipefail
src="/content/grapt_artifacts/baseline/year_mcar30"
dst="/content/grapt_artifacts/baseline/year_mcar30_random_grape_50"

[ -d "$src" ] || { echo "找不到 $src"; exit 1; }
[ -e "$dst" ] && { echo "目標已存在：$dst"; exit 1; }

mv "$src" "$dst"


In [None]:
%%bash

# 路徑設定
ART_SRC="/content/grapt_artifacts/baseline"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi


6.3M	/content/grapt_artifacts/baseline
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results


In [None]:
%%bash

# 路徑設定
ART_SRC="/content/GRAPE/pack/test"
ART_DST="/content/drive/MyDrive/grapt_artifacts/results"

# 顯示大小（可選）
[ -d "$ART_SRC" ] && du -sh "$ART_SRC" || echo "來源資料夾不存在：$ART_SRC"

# 同步（保留時間/權限；顯示進度；不刪除目的端既有檔）
if [ -d "$ART_SRC" ]; then
  rsync -a "$ART_SRC/" "$ART_DST/"
  echo "✅ 已備份 baseline 到：$ART_DST"
else
  echo "⚠️ 找不到來源資料夾：$ART_SRC"
fi


1.1M	/content/GRAPE/pack/test
✅ 已備份 baseline 到：/content/drive/MyDrive/grapt_artifacts/results
