In [None]:
%%bash
set -euo pipefail

REPO_URL="https://github.com/alonbebchuk/GNN-Pretraining.git"
PROJECT_NAME="gnn-pretraining"
WORKING_DIR="/kaggle/working"
PROJECT_PATH="${WORKING_DIR}/${PROJECT_NAME}"

if [ -d "$PROJECT_PATH" ]; then
    echo "[RERUN] Project already exists, fetching latest changes..."
    cd "$PROJECT_PATH"
    git fetch origin
    git reset --hard origin/master
    echo "[RERUN] Repository updated"
else
    echo "[INITIAL] Cloning repository..."
    cd "$WORKING_DIR"
    git clone --depth=1 "$REPO_URL" "$PROJECT_NAME"
    echo "[INITIAL] Repository cloned"
fi


In [None]:
import os
from pathlib import Path

PROJECT_NAME = "gnn-pretraining"
ENV_MARKER = Path("/kaggle/working/.wandb_configured")

if ENV_MARKER.exists():
    print("[RERUN] Wandb environment already configured")
else:
    from kaggle_secrets import UserSecretsClient

    print("[INITIAL] Setting up Wandb API key...")
    os.environ['WANDB_API_KEY'] = UserSecretsClient().get_secret("WANDB_API_KEY")
    os.environ['WANDB_PROJECT'] = PROJECT_NAME
    ENV_MARKER.write_text("configured")
    print("[INITIAL] Wandb environment configured")


In [None]:
%%bash
set -euo pipefail

WORKING_DIR="/kaggle/working"
VENV_DIR="${WORKING_DIR}/.venv"

if [ -d "$VENV_DIR" ]; then
    echo "[RERUN] Virtual environment already exists"
else
    echo "[INITIAL] Creating virtual environment with GPU support..."
    cd "$WORKING_DIR"
    python -m pip -q install --upgrade pip --break-system-packages
    python -m pip -q install --upgrade virtualenv --break-system-packages
    python -m virtualenv "$VENV_DIR" --no-download
    source "$VENV_DIR/bin/activate"
    python -m pip -q install --upgrade pip wheel setuptools
    python -m pip -q install "torch==2.1.0+cu118" "torchvision==0.16.0+cu118" "torchaudio==2.1.0+cu118" --index-url https://download.pytorch.org/whl/cu118
    python -m pip -q install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-2.1.0+cu118.html
    python -m pip -q install torch-geometric
    python -m pip -q install "numpy==1.26.4" "scipy>=1.12" scikit-learn tqdm pyyaml "wandb>=0.16" "optuna>=3.5" "hydra-core>=1.3" tensorboard
    echo "[INITIAL] Virtual environment setup complete"
fi


In [None]:
%%bash
set -euo pipefail

ROOT="/kaggle/working/gnn-pretraining"
DATA_MARKER="${ROOT}/data/.setup_complete"

if [ -f "$DATA_MARKER" ]; then
    echo "[RERUN] Data setup already completed"
else
    echo "[INITIAL] Running data setup..."
    cd "$ROOT"
    export PYTHONNOUSERSITE=1
    python -m src.data.data_setup
    touch "$DATA_MARKER"
    echo "[INITIAL] Data setup completed"
fi


## Step 5: Run Pretraining Experiments


In [None]:
%%bash
set -euo pipefail

ROOT="/kaggle/working/gnn-pretraining"
PRETRAIN_MARKER="${ROOT}/outputs/pretrain/.experiments_complete"

if [ -f "$PRETRAIN_MARKER" ]; then
    echo "[RERUN] Pretraining experiments already completed"
else
    echo "[INITIAL] Starting pretraining experiments..."
    echo "This will train 8 schemes × 3 seeds = 24 models"
    echo "Expected runtime: ~3-4 hours on GPU"
    
    cd "$ROOT"
    export PYTHONNOUSERSITE=1
    python run_pretrain_experiments.py
    
    mkdir -p "${ROOT}/outputs/pretrain"
    touch "$PRETRAIN_MARKER"
    echo "[INITIAL] Pretraining experiments completed successfully"
fi


## Step 6: Run Finetuning Experiments


In [None]:
%%bash
set -euo pipefail

ROOT="/kaggle/working/gnn-pretraining"
FINETUNE_MARKER="${ROOT}/outputs/finetune/.experiments_complete"

if [ -f "$FINETUNE_MARKER" ]; then
    echo "[RERUN] Finetuning experiments already completed"
else
    echo "[INITIAL] Starting finetuning experiments..."
    echo "This will finetune on 6 domains × 9 schemes × 2 strategies × 3 seeds = 324 experiments"
    echo "Expected runtime: ~5-6 hours on GPU"
    
    cd "$ROOT"
    export PYTHONNOUSERSITE=1
    python run_finetune_experiments.py
    
    mkdir -p "${ROOT}/outputs/finetune"
    touch "$FINETUNE_MARKER"
    echo "[INITIAL] Finetuning experiments completed successfully"
fi
