In [None]:
%%bash
set -euo pipefail

REPO_URL="https://github.com/alonbebchuk/GNN-Pretraining.git"
PROJECT_NAME="gnn-pretraining"
WORKING_DIR="/kaggle/working"
PROJECT_PATH="${WORKING_DIR}/${PROJECT_NAME}"

if [ -d "$PROJECT_PATH" ]; then
    echo "[RERUN] Project already exists, fetching latest changes..."
    cd "$PROJECT_PATH"
    git fetch origin
    git reset --hard origin/master
    echo "[RERUN] Repository updated"
else
    echo "[INITIAL] Cloning repository..."
    cd "$WORKING_DIR"
    git clone --depth=1 "$REPO_URL" "$PROJECT_NAME"
    echo "[INITIAL] Repository cloned"
fi


In [None]:
import os
from pathlib import Path

PROJECT_NAME = "gnn-pretraining"
ENV_MARKER = Path("/kaggle/working/.wandb_configured")

if ENV_MARKER.exists():
    print("[RERUN] Wandb environment already configured")
else:
    from kaggle_secrets import UserSecretsClient

    print("[INITIAL] Setting up Wandb API key...")
    os.environ['WANDB_API_KEY'] = UserSecretsClient().get_secret("WANDB_API_KEY")
    os.environ['WANDB_PROJECT'] = PROJECT_NAME
    ENV_MARKER.write_text("configured")
    print("[INITIAL] Wandb environment configured")


In [None]:
%%bash
set -euo pipefail

WORKING_DIR="/kaggle/working"
VENV_DIR="${WORKING_DIR}/.venv"

if [ -d "$VENV_DIR" ]; then
    echo "[RERUN] Virtual environment already exists"
else
    echo "[INITIAL] Creating virtual environment with GPU support..."
    cd "$WORKING_DIR"
    python -m pip -q install --upgrade pip --break-system-packages
    python -m pip -q install --upgrade virtualenv --break-system-packages
    python -m virtualenv "$VENV_DIR" --no-download
    source "$VENV_DIR/bin/activate"
    python -m pip -q install --upgrade pip wheel setuptools
    python -m pip -q install "torch==2.1.0+cu118" "torchvision==0.16.0+cu118" "torchaudio==2.1.0+cu118" --index-url https://download.pytorch.org/whl/cu118
    python -m pip -q install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-2.1.0+cu118.html
    python -m pip -q install torch-geometric
    python -m pip -q install "numpy==1.26.4" "scipy>=1.12" scikit-learn tqdm pyyaml "wandb>=0.16" "optuna>=3.5" "hydra-core>=1.3" tensorboard
    echo "[INITIAL] Virtual environment setup complete"
fi


In [None]:
%%bash
set -euo pipefail

ROOT="/kaggle/working/gnn-pretraining"
DATA_MARKER="${ROOT}/data/.setup_complete"

if [ -f "$DATA_MARKER" ]; then
    echo "[RERUN] Data setup already completed"
else
    echo "[INITIAL] Running data setup..."
    cd "$ROOT"
    unset PYTHONPATH; export PYTHONNOUSERSITE=1
    python -m src.data.data_setup
    touch "$DATA_MARKER"
    echo "[INITIAL] Data setup completed"
fi


In [None]:
import subprocess
import re
import os
import time
import torch
from concurrent.futures import ThreadPoolExecutor, as_completed

PROJECT_NAME = "gnn-pretraining"
PROJECT_ROOT = f"/kaggle/working/{PROJECT_NAME}"


def create_wandb_sweep():
    print("Creating wandb sweep...")
    env = os.environ.copy()
    result = subprocess.run(
        ['bash', '-c', f'source /kaggle/working/.venv/bin/activate && wandb sweep {PROJECT_ROOT}/configs/sweeps/systematic_pretrain.yaml'],
        capture_output=True, text=True, env=env
    )
    sweep_match = re.search(r'wandb: Created sweep with ID: ([\w\d]+)', result.stdout)
    sweep_id = sweep_match.group(1)
    print(f"Created sweep with ID: {sweep_id}")
    return sweep_id


def run_sweep_agent(sweep_id, agent_id):
    print(f"Starting agent {agent_id} for sweep {sweep_id}")
    env = os.environ.copy()
    cmd = f'source /kaggle/working/.venv/bin/activate && cd {PROJECT_ROOT} && wandb agent {sweep_id}'
    process = subprocess.run(
        ['bash', '-c', cmd],
        env=env, timeout=3 * 60 * 60
    )
    print(f"Agent {agent_id} completed with return code {process.returncode}")
    return process.returncode

num_agents = min(torch.cuda.device_count(), 2)
print(f"Using {num_agents} parallel agents")

sweep_id = create_wandb_sweep()

print(f"\nStarting {num_agents} sweep agents...")
print(f"Sweep URL: https://wandb.ai/{PROJECT_NAME}/sweeps/{sweep_id}")

start_time = time.time()
with ThreadPoolExecutor(max_workers=num_agents) as executor:
    completed_agents = []
    future_to_agent = {
        executor.submit(run_sweep_agent, sweep_id, i+1): i+1
        for i in range(num_agents)
    }
    for future in as_completed(future_to_agent):
        agent_id = future_to_agent[future]
        return_code = future.result()
        completed_agents.append((agent_id, return_code))
        print(f"Agent {agent_id} finished (exit code: {return_code})")
end_time = time.time()
total_time = end_time - start_time

print(f"\nAll sweep agents completed!")
print(f"Total runtime: {total_time/60:.1f} minutes")
print(f"Sweep results: https://wandb.ai/{PROJECT_NAME}/sweeps/{sweep_id}")

successful_agents = sum(1 for _, code in completed_agents if code == 0)
print(f"Successful agents: {successful_agents}/{num_agents}")