To start running this notebook: in a shell, go to the home directory of the github repo.
```
cd src
conda create geocache python=3.11
conda activate geocache
conda install numpy jupyter usd-core
jupyter notebook usd-separate.ipynb
```
Then you can shift-enter to run each cell here.

To run the Alembic parts you'll need to have run the Running Alembic part of the [README](https://github.com/imgspc/geocache-compression/blob/main/README.md#running-alembic).

In [None]:
# Settings start here. Evaluate this cell whenever needed.
from pathlib import Path

home = Path.home()
infile = str(home / "projects/ALab/ALab/entry.usda")
outdir = str(home / "projects/geocache-compression/build")
octopus = str(home / "projects/geocache-compression/build/octopus.json")
octopus_bindir = home / "projects/geocache-compression/build"

check_static = False
kmeans = False
clustersize = 1000
quality = 0.999

########################################
from pxr import Usd
from embedding import clustering
from embedding.io import separate_usd
from embedding.io import create_embedding
from embedding.io import read_embedding
from embedding.metric import Report
from embedding.io import read_binfile, parse_json_file, run_all_reports
import numpy as np
import os
import math

if kmeans:
    cluster_fn = clustering.cluster_pca_kmeans
else:
    cluster_fn = clustering.cluster_by_index

if check_static:
    import functools

    cluster_fn = functools.partial(
        clustering.cluster_static_first, cluster_fn=cluster_fn
    )

In [None]:
# Load the files.
stage = Usd.Stage.Open(infile)
mpu = stage.GetMetadata("metersPerUnit")
print(mpu)

usd_package = separate_usd(infile, outdir, verbose=False)
print(f"{usd_package.inputfile} has {len(usd_package.headers)} animations")

In [None]:
# Test just doing one embedding.
header = max(
    usd_package.headers, key=lambda h: h.numbytes()
)  # compress the biggest file
# header = package.get_header("/root/remi/head_M_hrc/GEO/head_M_hrc/facePlate_M_geo")
print(f"{header}")
files = create_embedding(
    header,
    cluster_fn=cluster_fn,
    quality=quality,
    cluster_size=clustersize,
    verbose=True,
)

print(files)

postdata = read_embedding(header, files, verbose=True)
predata = read_binfile(header)
report = Report(predata, postdata, sum(os.path.getsize(path) for path in files))
print(f"{header.path}")
report.print_report(mpu)
for path in files:
    print(f"  {os.path.getsize(path)} {path}")

In [None]:
report.compression_ratio

In [None]:
# Test *all* the animations.
reports = run_all_reports(
    usd_package,
    quality=quality,
    cluster_fn=cluster_fn,
    cluster_size=clustersize,
    verbose=True,
)
overall = Report.combine_reports(reports)
overall.print_report(mpu)

In [None]:
############ Alembic fun times
abc_package = parse_json_file(octopus)
for h in abc_package.headers:
    if not os.path.isabs(h.binpath):
        h.binpath = str(octopus_bindir / h.binpath)

mpu = 0.01
reports = run_all_reports(
    abc_package,
    cluster_fn=cluster_fn,
    cluster_size=clustersize,
    quality=quality,
    verbose=True,
)
overall = Report.combine_reports(reports.values())
overall.print_report(mpu)

In [None]:
### Run the full report for all tests:
class Scenario:
    def __init__(self, package, packagename, quality, cluster_fn):
        self.package = package
        self.packagename = packagename
        self.quality = quality
        self.cluster_fn = cluster_fn

    def run(self):
        reports = run_all_reports(
            self.package,
            cluster_fn=self.cluster_fn,
            cluster_size=clustersize,
            quality=self.quality,
            verbose=True,
        )
        return Summary(self, Report.combine_reports(reports.values()))


class Summary:
    def __init__(self, scenario, report):
        self.packagename = scenario.packagename
        self.quality = scenario.quality
        self.clustername = scenario.cluster_fn.__name__
        self.report = report

    def __str__(self):
        return f"{self.packagename} , {self.clustername} , {self.quality} , {self.report.compression_ratio:.2%} , {self.report.hausdorff}"


def make_scenarios():
    return [
        Scenario(package, packagename, quality, cluster_fn)
        for (package, packagename) in ((usd_package, "usd"), (abc_package, "abc"))
        for quality in (0.99, 0.999)
        for cluster_fn in (
            clustering.cluster_pca_kmeans,
            clustering.cluster_kmeans,
            clustering.cluster_by_index,
        )
    ]


scenarios = [Scenario(usd_package, "usd", 0.99, clustering.cluster_by_index)]
# scenarios = make_scenarios()

In [None]:
# This takes a long time!
summaries = [scenario.run() for scenario in scenarios]

In [None]:
for summary in summaries:
    print(f"{summary}")