In [1]:
import objaverse
objaverse.__version__

'0.1.7'

In [2]:
uids = objaverse.load_uids()
len(uids), type(uids)

(798759, list)

In [None]:
annotations = objaverse.load_annotations(uids[:10])
annotations

In [None]:
annotations[uids[0]]

In [None]:
import multiprocessing

processes = multiprocessing.cpu_count()
processes

uids = objaverse.load_uids()
objects = objaverse.load_objects(
    uids=uids,
    download_processes=processes
)
objects

In [None]:
from ollama import chat, ChatResponse
import objaverse
from tqdm import tqdm

def is_furniture(name: str, description: str, tags: list[str]) -> bool:
    prompt = f"""
        You are an expert on object classification.
        Determine if the following 3D object is a piece of *furniture* (like a chair, table, bed, cabinet, etc).
        Respond only with 'Yes' or 'No'.

        Name: {name}
        Description: {description}
        Tags: {', '.join(tags)}
        """
    try:
        response: ChatResponse = chat(model='llama3', messages=[
            {'role': 'user', 'content': prompt},
        ])
        reply = response.message.content.strip().lower()
        return reply.startswith("yes")
    except Exception as e:
        print(f" Error: {e}")
        return False

uids = objaverse.load_uids()

#  開啟儲存檔案（以追加方式即時寫入）
with open("furniture_uids.txt", "w") as f_out:
    with tqdm(total=len(uids), desc="判斷中") as pbar:
        for uid in uids:
            try:
                anno = objaverse.load_annotations([uid])[uid]
                name = anno.get("name", "")
                desc = anno.get("description", "")
                tags = [t["name"] for t in anno.get("tags", [])]

                if is_furniture(name, desc, tags):
                    f_out.write(uid + "\n")
                    f_out.flush()  # 立即寫入磁碟
            except Exception as e:
                print(f" Failed to load or process {uid}: {e}")
            pbar.update(1)

print(" 處理完成,結果已儲存在 furniture_uids.txt")


In [3]:

# build_corpus_from_objaverse.py
import json, objaverse, tqdm

# 讀 UID 清單（剛下載的家具）
with open("furniture_uids.txt") as f:
    uids = [l.strip() for l in f if l.strip()]

# 一次批量抓 annotation
annos = objaverse.load_annotations(uids=uids)        # dict: uid -> annotation

corpus = []
for uid, a in tqdm.tqdm(annos.items()):
    # 取 name / description
    name = a.get("name", "")
    desc = a.get("description", "")
    # 取 tags
    tags = [t["name"] for t in a.get("tags", [])]
    tag_sent = ", ".join(tags) if tags else ""
    # 拼一條可讀 caption
    text_parts = [name, desc, tag_sent]
    caption = ". ".join([p for p in text_parts if p])   # 去空字串
    if caption:
        corpus.append({"id": uid, "text": caption})

print(f"Collected {len(corpus)} captions.")
json.dump(corpus, open("semantic_corpus.json", "w"), indent=2)

 99%|█████████▉| 159/160 [04:16<00:01,  1.61s/it]
100%|██████████| 8632/8632 [00:00<00:00, 356084.35it/s]

Collected 8632 captions.



