In [1]:
import os
from pathlib import Path
import requests
from supabase import create_client
from dotenv import load_dotenv

load_dotenv()
supabase_url = os.environ["SUPABASE_URL"]
supabase_key = os.environ["SUPABASE_KEY"]
supabase_table = os.getenv("SUPABASE_ASSETS_TABLE", "assets")
base_dir = Path("LayoutVLM/objaverse_processed")
base_dir.mkdir(parents=True, exist_ok=True)
client = create_client(supabase_url, supabase_key)
print(f"Assets root: {base_dir.resolve()}")

Assets root: /Users/macoblle/MEGA/Projects/livinit/livinit_pipeline/notebook/LayoutVLM/objaverse_processed


In [2]:
assets = []
chunk = 1000
start = 0
while True:
    resp = client.table(supabase_table).select("name,category,model_url,metadata_url").range(start, start + chunk - 1).execute()
    batch = resp.data or []
    assets.extend(batch)
    if len(batch) < chunk:
        break
    start += chunk
len(assets)

223

In [3]:
def add_download_flag(url: str):
    if not url:
        return None
    if "supabase.co" in url and "download=" not in url:
        return f"{url}{'&' if '?' in url else '?'}download=1"
    return url

for idx, asset in enumerate(assets, 1):
    asset_name = asset["name"]
    category = (asset.get("category") or asset_name.split("_")[0]).lower()
    target_dir = base_dir / asset_name
    target_dir.mkdir(parents=True, exist_ok=True)
    model_path = target_dir / f"{category}.glb"
    metadata_path = target_dir / "data.json"

    model_url = add_download_flag(asset.get("model_url"))
    if model_url and not model_path.exists():
        resp = requests.get(model_url, timeout=60)
        resp.raise_for_status()
        model_path.write_bytes(resp.content)

    metadata_url = add_download_flag(asset.get("metadata_url"))
    if metadata_url and not metadata_path.exists():
        resp = requests.get(metadata_url, timeout=30)
        resp.raise_for_status()
        metadata_path.write_text(resp.text, encoding="utf-8")

    if idx % 25 == 0:
        print(f"{idx}/{len(assets)} done")
print("downloads complete")

25/223 done
50/223 done
75/223 done
100/223 done
125/223 done
150/223 done
175/223 done
200/223 done
downloads complete


In [4]:
total_models = len(list(base_dir.rglob('*.glb')))
total_metadata = len(list(base_dir.rglob('data.json')))
print({'models': total_models, 'metadata': total_metadata})

{'models': 222, 'metadata': 222}
