To start running this notebook: in a shell, go to the home directory of the github repo.
```
cd src
conda create geocache python=3.11
conda activate geocache
conda install numpy jupyter
jupyter notebook usd-separate.ipynb
```
Then you can shift-enter to run each cell here.

To run the Alembic parts you'll need to have run the Running Alembic part of the [README](https://github.com/imgspc/geocache-compression/blob/main/README.md#running-alembic).

In [None]:
# Make sure we have USD installed.
!pip install usd-core

In [1]:
# Some file locations. I've put everything in $HOME/projects, but you can change these paths.
from pathlib import Path

home = Path.home()
infile = str(home / "projects/ALab/ALab/entry.usda")
outdir = str(home / "projects/geocache-compression/build")
octopus = str(home / "projects/geocache-compression/build/octopus.json")
octopus_bindir = home / "projects/geocache-compression/build"

kmeans = True
clustersize = 1000
quality = 0.999

In [2]:
from pxr import Usd
from embedding import clustering
from embedding.io import separate_usd
from embedding.io import create_embedding
from embedding.io import read_embedding
from embedding.metric import Report
from embedding.io import read_binfile, parse_json_file, run_all_reports
import os
import math

if kmeans:
    cluster_fn = clustering.cluster_kmeans
else:
    cluster_fn = clustering.cluster_by_index

In [3]:
stage = Usd.Stage.Open(infile)
mpu = stage.GetMetadata("metersPerUnit")
print(mpu)

0.01


In [4]:
package = separate_usd(infile, outdir, verbose=False)
print(f"{package.inputfile} has {len(package.headers)} animations")

/Users/bhudson/projects/ALab/ALab/entry.usda has 355 animations


In [6]:
# Test just doing one embedding.
header = max(package.headers, key=lambda h: h.numbytes())  # compress the biggest file
# header = package.get_header("/root/remi/head_M_hrc/GEO/head_M_hrc/facePlate_M_geo")
print(f"{header}")
files = create_embedding(
    header, cluster_fn=cluster_fn, cluster_size=clustersize, verbose=True
)

print(files)

/root/stoat/backpack_M_hrc/GEO/backpack_M_hrc/transistorRadio_M_hrc/wireAdaptor01_M_geo -- 3 float32_t per point, 26624 points, 106 samples
reducing dimension of /root/stoat/backpack_M_hrc/GEO/backpack_M_hrc/transistorRadio_M_hrc/wireAdaptor01_M_geo -- 3 float32_t per point, 26624 points, 106 samples
  read 33865728 bytes
  created 27 clusters
chose 27 dimensions among [0.42268527 0.6927411  0.93774635 0.96568865 0.968732   0.9712005
 0.9735567  0.9757012  0.9777527  0.97962254 0.9812565  0.98277944
 0.9842339  0.98559976 0.98691905 0.9882155  0.9894337  0.990626
 0.99177295 0.9928766  0.99395293 0.9949954  0.9959906  0.9969605
 0.9979098  0.9987846  0.9995942  1.0000001  1.0000001  1.0000001
 1.0000001  1.0000001  1.0000001  1.0000001  1.0000001  1.0000001
 1.0000001  1.0000001  1.0000001  1.0000001  1.0000001  1.0000001
 1.0000001  1.0000001  1.0000001  1.0000001  1.0000001  1.0000001
 1.0000001  1.0000001  1.0000001  1.0000001  1.0000001  1.0000001
 1.0000001  1.0000001  1.0000001  

In [7]:
postdata = read_embedding(header, files, verbose=True)
predata = read_binfile(header)
report = Report(predata, postdata, sum(os.path.getsize(path) for path in files))
print(f"{header.path}")
report.print_report(mpu)
for path in files:
    print(f"  {os.path.getsize(path)} {path}")

/root/stoat/backpack_M_hrc/GEO/backpack_M_hrc/transistorRadio_M_hrc/wireAdaptor01_M_geo
33865728 reduced to 9361561: 72.36% reduction
Range: 0.01 0.01 0.01 m
Hausdorff (pointwise): 0.009746862469096689 m
Linf: 0.008644123077392579 m
Linf after error-correct: 0.0 m
0 uncorrectable entries; 0.00% of entries
  8945853 /Users/bhudson/projects/geocache-compression/build/root-stoat-backpack_M_hrc-GEO-backpack_M_hrc-transistorRadio_M_hrc-wireAdaptor01_M_geo.embed-header.bin
  309096 /Users/bhudson/projects/geocache-compression/build/root-stoat-backpack_M_hrc-GEO-backpack_M_hrc-transistorRadio_M_hrc-wireAdaptor01_M_geo.embed.bin
  106612 /Users/bhudson/projects/geocache-compression/build/root-stoat-backpack_M_hrc-GEO-backpack_M_hrc-transistorRadio_M_hrc-wireAdaptor01_M_geo.embed-clusters.bin


In [8]:
# Test *all* the animations.
reports = run_all_reports(
    package,
    quality=quality,
    cluster_fn=cluster_fn,
    cluster_size=clustersize,
    verbose=True,
)
print("done")

 6.55% | 773064 | /root/remi/body_M_hrc/GEO/body_M_hrc/flag_M_hrc/flag_M_geo
 93.54% | 4305720 | /root/remi/body_M_hrc/GEO/body_M_hrc/flag_M_hrc/flagPole_M_geo
 95.46% | 288744 | /root/remi/body_M_hrc/GEO/body_M_hrc/flag_M_hrc/ring01_M_geo
 95.46% | 288744 | /root/remi/body_M_hrc/GEO/body_M_hrc/flag_M_hrc/ring02_M_geo
 96.78% | 3608664 | /root/remi/body_M_hrc/GEO/body_M_hrc/body_M_geo
 96.78% | 4296816 | /root/remi/body_M_hrc/GEO/body_M_hrc/bodyBase_M_geo
 96.57% | 305280 | /root/remi/body_M_hrc/GEO/body_M_hrc/label_M_geo
 86.01% | 1193136 | /root/remi/body_M_hrc/GEO/body_M_hrc/tape_M_hrc/tape_M_geo
 95.81% | 1259280 | /root/remi/body_M_hrc/GEO/body_M_hrc/tape_M_hrc/tapeEnd_M_geo
 95.24% | 53424 | /root/remi/body_M_hrc/GEO/body_M_hrc/insideBits_M_hrc/innerWheel_M_geo
 96.70% | 559680 | /root/remi/body_M_hrc/GEO/body_M_hrc/insideBits_M_hrc/innerFrame_M_geo
 96.76% | 1818960 | /root/remi/body_M_hrc/GEO/body_M_hrc/screws_M_hrc/bodyScrew01_M_geo
 96.76% | 1818960 | /root/remi/body_M_hrc/GE

In [9]:
# Play around with the overall set of reports.
orig = sum(r.original_size for r in reports.values())
compressed = sum(r.compressed_size for r in reports.values())
print(f"{orig} vs {compressed} -- {1-compressed/orig:.2%} compression")

h = max(r.hausdorff for r in reports.values()) * mpu
linf = max(r.Linf for r in reports.values()) * mpu
print(f"Max error: {h} m distance, {linf} m any single coordinate")

corrected = max(r.corrected_Linf for r in reports.values()) * mpu
uncorrected = sum(r.numuncorrectable for r in reports.values())
numvalues = sum(r.original_numvalues for r in reports.values())
print(f"{uncorrected} uncorrectable out of {numvalues} ({uncorrected/numvalues:%})")

469454616 vs 94475706 -- 79.88% compression
Max error: 0.33314059165552573 m distance, 0.3249586868286133 m any single coordinate
6978370 uncorrectable out of 117363654 (5.945938%)


In [None]:
############ Alembic fun times
abc_package = parse_json_file(octopus)
for h in abc_package.headers:
    if not os.path.isabs(h.binpath):
        h.binpath = str(octopus_bindir / h.binpath)

mpu = 0.01
reports = run_all_reports(
    abc_package,
    cluster_fn=cluster_fn,
    cluster_size=clustersize,
    quality=quality,
    verbose=True,
)