In [15]:
# Subsample GLB point cloud (rate=0.06), PRESERVE COLOR_0 (RGB/RGBA), save as GLB.
# Compatible with older pygltflib (no AccessorType). Fixes "NoneType subscriptable" by attaching binary blob properly.

import os, base64, numpy as np
from typing import Optional

from pygltflib import GLTF2, Buffer, BufferView, Accessor, Scene, Node, Mesh, Primitive

# -------- config --------
SRC_PATH = "1.glb"      # change if needed
DST_PATH = "1_subsampled.glb"
RATE     = 0.06
SEED     = None  # set to an int (e.g., 42) for deterministic subsampling
# ------------------------

if SEED is not None:
    np.random.seed(SEED)

# glTF component type IDs
FLOAT          = 5126
UNSIGNED_BYTE  = 5121
BYTE           = 5120
UNSIGNED_SHORT = 5123
SHORT          = 5122
UNSIGNED_INT   = 5125

BYTES_PER_COMPONENT = {
    FLOAT: 4,
    UNSIGNED_BYTE: 1,
    BYTE: 1,
    SHORT: 2,
    UNSIGNED_SHORT: 2,
    UNSIGNED_INT: 4,
}

NUM_COMPONENTS = {"SCALAR": 1, "VEC2": 2, "VEC3": 3, "VEC4": 4}

DTYPE_FOR_COMPONENT = {
    FLOAT: np.float32,
    UNSIGNED_BYTE: np.uint8,
    BYTE: np.int8,
    SHORT: np.int16,
    UNSIGNED_SHORT: np.uint16,
    UNSIGNED_INT: np.uint32,
}

def _get_buffer_data(gltf: GLTF2, buffer_index: int, src_dir: Optional[str]) -> bytes:
    buf = gltf.buffers[buffer_index]

    # Try embedded bytes (newer pygltflib fills this for GLB)
    data = getattr(buf, "byteData", None)
    if isinstance(data, (bytes, bytearray)) and (buf.byteLength is None or len(data) >= buf.byteLength):
        return bytes(data)

    # Try GLB's binary blob
    if hasattr(gltf, "binary_blob"):
        try:
            bb = gltf.binary_blob()
            if isinstance(bb, (bytes, bytearray)):
                return bytes(bb)
        except Exception:
            pass

    # Data URI
    if getattr(buf, "uri", None):
        uri = buf.uri
        if uri.startswith("data:"):
            comma = uri.find(",")
            meta, payload = uri[:comma], uri[comma+1:]
            if ";base64" in meta:
                return base64.b64decode(payload)
            else:
                return payload.encode("utf-8")
        else:
            # external .bin next to gltf
            if src_dir is None:
                raise RuntimeError("External buffer URI found but source directory is unknown.")
            with open(os.path.join(src_dir, uri), "rb") as f:
                return f.read()

    raise RuntimeError("Could not resolve buffer bytes (unexpected GLB/GLTF layout).")

def read_accessor(gltf: GLTF2, accessor_index: int, src_dir: Optional[str]) -> np.ndarray:
    acc: Accessor = gltf.accessors[accessor_index]
    bv: BufferView = gltf.bufferViews[acc.bufferView]
    raw = _get_buffer_data(gltf, bv.buffer, src_dir)

    comp_type = acc.componentType
    ncomp     = NUM_COMPONENTS[acc.type if isinstance(acc.type, str) else str(acc.type)]
    comp_size = BYTES_PER_COMPONENT[comp_type]
    dtype     = DTYPE_FOR_COMPONENT[comp_type]

    base   = (bv.byteOffset or 0) + (acc.byteOffset or 0)
    stride = bv.byteStride or (ncomp * comp_size)
    count  = acc.count

    if stride == ncomp * comp_size:
        start = base
        end   = base + count * stride
        arr   = np.frombuffer(raw[start:end], dtype=dtype, count=count * ncomp)
        out   = arr.reshape((count, ncomp))
    else:
        out = np.empty((count, ncomp), dtype=dtype)
        for i in range(count):
            s = base + i * stride
            e = s + ncomp * comp_size
            out[i] = np.frombuffer(raw[s:e], dtype=dtype, count=ncomp)

    # Normalize if flagged (e.g., normalized integer colors)
    if getattr(acc, "normalized", False) and np.issubdtype(out.dtype, np.integer):
        info = np.iinfo(out.dtype)
        out  = out.astype(np.float32) / float(info.max)

    return out

def set_attr(prim: Primitive, key: str, value: int):
    attrs = getattr(prim, "attributes", None)
    # Try object-style
    try:
        setattr(attrs, key, value)
        prim.attributes = attrs
        return
    except Exception:
        pass
    # Fallback dict-style
    if attrs is None or not isinstance(attrs, dict):
        attrs = {}
    attrs[key] = value
    prim.attributes = attrs

def get_attr(prim: Primitive, key: str):
    attrs = getattr(prim, "attributes", None)
    if attrs is None:
        return None
    try:
        return getattr(attrs, key)
    except Exception:
        pass
    try:
        return attrs.get(key, None)
    except Exception:
        return None

# ---------- Load source ----------
src_dir = os.path.dirname(os.path.abspath(SRC_PATH))
gltf = GLTF2().load(SRC_PATH)

def first_primitive_with_position(gltf: GLTF2):
    scene_index = gltf.scene if gltf.scene is not None else 0
    scene = gltf.scenes[scene_index]
    node_indices = list(scene.nodes or [])
    visited = set()
    while node_indices:
        ni = node_indices.pop(0)
        if ni in visited:
            continue
        visited.add(ni)
        node = gltf.nodes[ni]
        if node.mesh is not None:
            mesh = gltf.meshes[node.mesh]
            for p_idx, prim in enumerate(mesh.primitives):
                pos_idx = get_attr(prim, "POSITION")
                if pos_idx is not None:
                    return ni, node.mesh, p_idx
        if node.children:
            node_indices.extend(node.children)
    raise RuntimeError("No mesh primitive with POSITION found.")

node_idx, mesh_idx, prim_idx = first_primitive_with_position(gltf)
mesh = gltf.meshes[mesh_idx]
prim = mesh.primitives[prim_idx]

pos_acc_idx = get_attr(prim, "POSITION")
positions = read_accessor(gltf, pos_acc_idx, src_dir).astype(np.float32, copy=False)
n_points = positions.shape[0]
if n_points == 0:
    raise RuntimeError("POSITION accessor has zero points.")

col_acc_idx = get_attr(prim, "COLOR_0")
if col_acc_idx is not None:
    colors = read_accessor(gltf, col_acc_idx, src_dir).astype(np.float32, copy=False)
    if colors.shape[1] == 3:
        channels = 3
    elif colors.shape[1] == 4:
        channels = 4
    else:
        # Force into RGB(A)
        if colors.shape[1] < 3:
            pad = np.ones((colors.shape[0], 3), dtype=np.float32)
            pad[:, :colors.shape[1]] = colors
            colors = pad
            channels = 3
        else:
            channels = 4
            a = np.ones((colors.shape[0], 1), dtype=np.float32)
            colors = np.concatenate([colors[:, :3], a], axis=1)
else:
    channels = 3
    colors = np.ones((n_points, 3), dtype=np.float32)

# ---------- Subsample ----------
keep = np.random.rand(n_points) < RATE
if not np.any(keep):
    keep[np.random.randint(0, n_points)] = True

sub_pos = positions[keep].astype(np.float32)
sub_col = colors[keep].astype(np.float32)
if sub_pos.shape[0] == 0:
    raise RuntimeError("No points after subsampling (unexpected).")

# ---------- Build output GLB ----------
out = GLTF2(
    scenes=[], nodes=[], meshes=[], buffers=[], bufferViews=[], accessors=[]
)

# Combine both arrays into a single binary blob and attach at GLTF2 level
data_bytes = bytearray()

# positions accessor
pos_offset = len(data_bytes)
data_bytes += sub_pos.tobytes(order="C")
pos_bv_index = len(out.bufferViews)
out.bufferViews.append(BufferView(buffer=0, byteOffset=pos_offset, byteLength=sub_pos.nbytes))
pos_acc = Accessor(
    bufferView=pos_bv_index, byteOffset=0, componentType=FLOAT,
    count=sub_pos.shape[0], type="VEC3",
    min=np.min(sub_pos, axis=0).astype(float).tolist(),
    max=np.max(sub_pos, axis=0).astype(float).tolist()
)
out.accessors.append(pos_acc)
pos_accessor_index = len(out.accessors) - 1

# colors accessor (FLOAT [0,1] for max viewer compatibility)
col_offset = len(data_bytes)
data_bytes += sub_col.tobytes(order="C")
col_bv_index = len(out.bufferViews)
out.bufferViews.append(BufferView(buffer=0, byteOffset=col_offset, byteLength=sub_col.nbytes))
col_type = "VEC3" if sub_col.shape[1] == 3 else "VEC4"
col_acc = Accessor(
    bufferView=col_bv_index, byteOffset=0, componentType=FLOAT,
    count=sub_col.shape[0], type=col_type
)
out.accessors.append(col_acc)
col_accessor_index = len(out.accessors) - 1

# Single buffer entry; length must match the combined blob. Leave uri unset for GLB.
out.buffers.append(Buffer(byteLength=len(data_bytes)))
# Attach binary to the GLTF2 object (older pygltflib expects this for GLB)
blob = bytes(data_bytes)
attached = False
for attr_name in ("set_binary_blob", "_set_binary_blob", "SetBinaryBlob"):
    if hasattr(out, attr_name):
        try:
            getattr(out, attr_name)(blob)
            attached = True
            break
        except Exception:
            pass
# Common fallback names used across versions
if not attached:
    for attr_name in ("_glb_data", "binary_blob", "_binary_blob"):
        try:
            setattr(out, attr_name, blob)
            attached = True
            break
        except Exception:
            pass
if not attached:
    raise RuntimeError("Could not attach GLB binary blob for this pygltflib version.")

# Primitive as POINTS
prim_new = Primitive()
set_attr(prim_new, "POSITION", pos_accessor_index)
set_attr(prim_new, "COLOR_0", col_accessor_index)
prim_new.mode = 0  # 0 = POINTS

mesh_new = Mesh(primitives=[prim_new])
out.meshes.append(mesh_new)

node_new = Node(mesh=0, name="subsampled_pointcloud")
out.nodes.append(node_new)

scene_new = Scene(nodes=[0])
out.scenes.append(scene_new)
out.scene = 0

# Save GLB
os.makedirs(os.path.dirname(DST_PATH) or ".", exist_ok=True)
out.save_binary(DST_PATH)
print(f"Done. Subsampled GLB written to: {DST_PATH}  |  Points kept: {sub_pos.shape[0]}")


Done. Subsampled GLB written to: 1_subsampled.glb  |  Points kept: 1357943


In [17]:
# Subsample all .glb point cloud files in a folder (rate=0.06), preserving COLOR_0 colors.
# Saves each as <filename>_subsampled.glb in the same folder.
# Compatible with older pygltflib.

import os, base64, numpy as np
from typing import Optional
from pygltflib import GLTF2, Buffer, BufferView, Accessor, Scene, Node, Mesh, Primitive

# -------- config --------
FOLDER  = "public/unik3d"   # folder containing your .glb files
RATE    = 0.06
SEED    = None          # set to int for deterministic sampling
# ------------------------

if SEED is not None:
    np.random.seed(SEED)

# glTF constants
FLOAT          = 5126
UNSIGNED_BYTE  = 5121
BYTE           = 5120
UNSIGNED_SHORT = 5123
SHORT          = 5122
UNSIGNED_INT   = 5125

BYTES_PER_COMPONENT = {
    FLOAT: 4, UNSIGNED_BYTE: 1, BYTE: 1, SHORT: 2, UNSIGNED_SHORT: 2, UNSIGNED_INT: 4,
}
NUM_COMPONENTS = {"SCALAR":1,"VEC2":2,"VEC3":3,"VEC4":4}
DTYPE_FOR_COMPONENT = {
    FLOAT: np.float32, UNSIGNED_BYTE: np.uint8, BYTE: np.int8,
    SHORT: np.int16, UNSIGNED_SHORT: np.uint16, UNSIGNED_INT: np.uint32,
}

# ---------- helpers ----------
def _get_buffer_data(gltf: GLTF2, buffer_index: int, src_dir: Optional[str]) -> bytes:
    buf = gltf.buffers[buffer_index]
    data = getattr(buf, "byteData", None)
    if isinstance(data, (bytes, bytearray)) and (buf.byteLength is None or len(data) >= buf.byteLength):
        return bytes(data)
    if hasattr(gltf, "binary_blob"):
        try:
            bb = gltf.binary_blob()
            if isinstance(bb, (bytes, bytearray)):
                return bytes(bb)
        except: pass
    if getattr(buf, "uri", None):
        uri = buf.uri
        if uri.startswith("data:"):
            comma = uri.find(","); meta, payload = uri[:comma], uri[comma+1:]
            return base64.b64decode(payload) if ";base64" in meta else payload.encode("utf-8")
        else:
            with open(os.path.join(src_dir, uri), "rb") as f: return f.read()
    raise RuntimeError("Could not resolve buffer data")

def read_accessor(gltf: GLTF2, accessor_index: int, src_dir: Optional[str]) -> np.ndarray:
    acc, bv = gltf.accessors[accessor_index], gltf.bufferViews[gltf.accessors[accessor_index].bufferView]
    raw = _get_buffer_data(gltf, bv.buffer, src_dir)
    comp_type, ncomp = acc.componentType, NUM_COMPONENTS[acc.type]
    comp_size, dtype = BYTES_PER_COMPONENT[comp_type], DTYPE_FOR_COMPONENT[comp_type]
    base, stride, count = (bv.byteOffset or 0)+(acc.byteOffset or 0), bv.byteStride or (ncomp*comp_size), acc.count
    if stride == ncomp*comp_size:
        arr = np.frombuffer(raw[base:base+count*stride], dtype=dtype, count=count*ncomp)
        out = arr.reshape((count, ncomp))
    else:
        out = np.empty((count,ncomp),dtype=dtype)
        for i in range(count): out[i]=np.frombuffer(raw[base+i*stride:base+i*stride+ncomp*comp_size],dtype=dtype,count=ncomp)
    if getattr(acc,"normalized",False) and np.issubdtype(out.dtype,np.integer):
        out=out.astype(np.float32)/float(np.iinfo(out.dtype).max)
    return out

def set_attr(prim: Primitive, key: str, value: int):
    attrs = getattr(prim, "attributes", None)
    try: setattr(attrs,key,value); prim.attributes=attrs; return
    except: pass
    if attrs is None or not isinstance(attrs, dict): attrs = {}
    attrs[key]=value; prim.attributes=attrs

def get_attr(prim: Primitive, key: str):
    attrs = getattr(prim,"attributes",None)
    try: return getattr(attrs,key)
    except: pass
    try: return attrs.get(key,None)
    except: return None

def process_file(src_path: str, rate: float):
    src_dir = os.path.dirname(os.path.abspath(src_path))
    gltf = GLTF2().load(src_path)

    # --- find first primitive with POSITION ---
    scene = gltf.scenes[gltf.scene if gltf.scene is not None else 0]
    node_indices, visited = list(scene.nodes or []), set()
    node_idx=mesh_idx=prim_idx=None
    while node_indices:
        ni=node_indices.pop(0)
        if ni in visited: continue
        visited.add(ni); node=gltf.nodes[ni]
        if node.mesh is not None:
            mesh=gltf.meshes[node.mesh]
            for p_idx, prim in enumerate(mesh.primitives):
                if get_attr(prim,"POSITION") is not None:
                    node_idx,mesh_idx,prim_idx=ni,node.mesh,p_idx; break
        if node.children: node_indices.extend(node.children)
        if prim_idx is not None: break
    if prim_idx is None: raise RuntimeError("No primitive with POSITION in "+src_path)

    mesh, prim = gltf.meshes[mesh_idx], gltf.meshes[mesh_idx].primitives[prim_idx]
    pos_acc_idx = get_attr(prim,"POSITION"); positions = read_accessor(gltf,pos_acc_idx,src_dir).astype(np.float32)
    n_points=positions.shape[0]
    col_acc_idx = get_attr(prim,"COLOR_0")
    if col_acc_idx is not None:
        colors=read_accessor(gltf,col_acc_idx,src_dir).astype(np.float32)
        channels=colors.shape[1] if colors.shape[1] in (3,4) else 3
        if channels==3: colors=colors[:,:3]
        if channels==4 and colors.shape[1]==3: 
            a=np.ones((colors.shape[0],1),dtype=np.float32); colors=np.c_[colors,a]
    else: colors=np.ones((n_points,3),dtype=np.float32); channels=3

    keep=np.random.rand(n_points)<rate
    if not np.any(keep): keep[np.random.randint(0,n_points)]=True
    sub_pos,sub_col=positions[keep],colors[keep]

    # --- build new GLB ---
    out=GLTF2(scenes=[],nodes=[],meshes=[],buffers=[],bufferViews=[],accessors=[])
    data_bytes=bytearray()
    pos_offset=len(data_bytes); data_bytes+=sub_pos.astype(np.float32).tobytes()
    out.bufferViews.append(BufferView(buffer=0,byteOffset=pos_offset,byteLength=sub_pos.nbytes))
    out.accessors.append(Accessor(bufferView=len(out.bufferViews)-1,byteOffset=0,componentType=FLOAT,count=sub_pos.shape[0],type="VEC3",min=np.min(sub_pos,axis=0).tolist(),max=np.max(sub_pos,axis=0).tolist()))
    pos_accessor_index=len(out.accessors)-1
    col_offset=len(data_bytes); data_bytes+=sub_col.astype(np.float32).tobytes()
    out.bufferViews.append(BufferView(buffer=0,byteOffset=col_offset,byteLength=sub_col.nbytes))
    out.accessors.append(Accessor(bufferView=len(out.bufferViews)-1,byteOffset=0,componentType=FLOAT,count=sub_col.shape[0],type="VEC3" if channels==3 else "VEC4"))
    col_accessor_index=len(out.accessors)-1
    out.buffers.append(Buffer(byteLength=len(data_bytes)))
    blob=bytes(data_bytes)
    if hasattr(out,"set_binary_blob"): out.set_binary_blob(blob)
    else: setattr(out,"_glb_data",blob)

    prim_new=Primitive(); set_attr(prim_new,"POSITION",pos_accessor_index); set_attr(prim_new,"COLOR_0",col_accessor_index); prim_new.mode=0
    out.meshes.append(Mesh(primitives=[prim_new]))
    out.nodes.append(Node(mesh=0,name="subsampled"))
    out.scenes.append(Scene(nodes=[0])); out.scene=0

    dst_path=os.path.splitext(src_path)[0]+"_subsampled.glb"
    out.save_binary(dst_path)
    print(f"✔ {os.path.basename(src_path)} -> {os.path.basename(dst_path)} (kept {sub_pos.shape[0]} points)")
    return dst_path

# ---------- run on folder ----------
for fname in os.listdir(FOLDER):
    if fname.lower().endswith(".glb"):
        try:
            process_file(os.path.join(FOLDER,fname), RATE)
        except Exception as e:
            print(f"✘ Failed {fname}: {e}")


✔ 10.glb -> 10_subsampled.glb (kept 1414654 points)
✔ 11.glb -> 11_subsampled.glb (kept 1372445 points)
✔ 12.glb -> 12_subsampled.glb (kept 1418076 points)
✔ 13.glb -> 13_subsampled.glb (kept 1372374 points)
✔ 14.glb -> 14_subsampled.glb (kept 1363553 points)
✔ 15.glb -> 15_subsampled.glb (kept 1395021 points)
✔ 16.glb -> 16_subsampled.glb (kept 1349515 points)
✔ 17.glb -> 17_subsampled.glb (kept 1347879 points)
✔ 18.glb -> 18_subsampled.glb (kept 1350110 points)
✔ 19.glb -> 19_subsampled.glb (kept 1390957 points)
✔ 1b.glb -> 1b_subsampled.glb (kept 1360663 points)
✘ Failed 1_subsampled.glb: unpack requires a buffer of 4 bytes
✔ 2.glb -> 2_subsampled.glb (kept 1350471 points)
✔ 20.glb -> 20_subsampled.glb (kept 1406698 points)
✔ 3.glb -> 3_subsampled.glb (kept 1373524 points)
✔ 5.glb -> 5_subsampled.glb (kept 1433529 points)
✔ 7.glb -> 7_subsampled.glb (kept 44070 points)
✔ 8.glb -> 8_subsampled.glb (kept 1395599 points)
