To start running this notebook: in a shell, go to the home directory of the github repo.
```
cd src
conda create geocache python=3.11
conda activate geocache
conda install numpy jupyter usd-core
jupyter notebook usd-separate.ipynb
```
Then you can shift-enter to run each cell here.

To run the Alembic parts you'll need to have run the Running Alembic part of the [README](https://github.com/imgspc/geocache-compression/blob/main/README.md#running-alembic).

In [None]:
# Settings start here. Evaluate this cell whenever needed.
from pxr import Usd
from embedding import clustering
from embedding.embedding import PCAEmbedding, RoundedPCAEmbedding, embed_or_raw
from embedding.io import separate_usd
from embedding.io import create_embedding
from embedding.io import read_embedding
from embedding.metric import Report
from embedding.io import read_binfile, parse_json_file, run_all_reports
import numpy as np
import os
import math
from pathlib import Path

############# Settings until the next line of ########
home = Path.home()
infile = str(home / "projects/ALab/ALab/entry.usda")
outdir = str(home / "projects/geocache-compression/build")
octopus = str(home / "projects/geocache-compression/build/octopus.json")
octopus_bindir = home / "projects/geocache-compression/build"

check_static = False
cluster_fn = clustering.cluster_last_axis  # clustering.cluster_near_quaternions
embed_cls = None
clustersize = 1000
quality_meters = 1e-4
quality_fraction = 0.999


########################################
embed_fn = embed_or_raw


def quality(mpu: float):
    if embed_cls is RoundedPCAEmbedding:
        return quality_meters / mpu
    else:
        return quality_fraction


if check_static:
    import functools

    cluster_fn = functools.partial(
        clustering.cluster_static_first, cluster_fn=cluster_fn
    )

In [None]:
# Load the files.
stage = Usd.Stage.Open(infile)
usd_mpu = stage.GetMetadata("metersPerUnit")
usd_package = separate_usd(infile, outdir, verbose=False)
print(
    f"{usd_package.inputfile} has {len(usd_package.headers)} animations, mpu {usd_mpu}"
)

abc_mpu = 0.01
abc_package = parse_json_file(octopus)
for h in abc_package.headers:
    if not os.path.isabs(h.binpath):
        h.binpath = str(octopus_bindir / h.binpath)
print(
    f"{abc_package.inputfile} has {len(abc_package.headers)} animations, mpu {abc_mpu}"
)

In [None]:
# Test just doing one embedding.
# header = max(
#    usd_package.headers, key=lambda h: h.numbytes()
# )  # compress the biggest file
# header = usd_package.get_header("/root/remi/head_M_hrc/GEO/head_M_hrc/facePlate_M_geo")
header = usd_package.get_header(
    "/root/remi/body_M_hrc/GEO/body_M_hrc/flag_M_hrc/flag_M_geo"
)
print(f"{header}")
files = create_embedding(
    header,
    cluster_fn=cluster_fn,
    embed_fn=embed_fn,
    quality=quality(usd_mpu),
    cluster_size=clustersize,
    verbose=True,
)

print(files)

postdata = read_embedding(header, files, verbose=True)
predata = read_binfile(header)
report = Report(predata, postdata, sum(os.path.getsize(path) for path in files))
print(f"{header.path}")
report.print_report(usd_mpu)
for path in files:
    print(f"  {os.path.getsize(path)} {path}")

In [None]:
# Test *all* the animations.
reports = run_all_reports(
    usd_package,
    quality=quality(usd_mpu),
    cluster_fn=cluster_fn,
    embed_fn=embed_fn,
    cluster_size=clustersize,
    verbose=True,
)
overall = Report.combine_reports(reports)
overall.print_report(usd_mpu)

In [None]:
############ Alembic fun times
reports = run_all_reports(
    abc_package,
    cluster_fn=cluster_fn,
    cluster_size=clustersize,
    embed_fn=embed_fn,
    quality=quality(abc_mpu),
    verbose=True,
)
overall = Report.combine_reports(reports.values())
overall.print_report(abc_mpu)

In [None]:
### Run the full report for all tests:
class Scenario:
    def __init__(self, package, packagename, quality, cluster_fn, embed_fn):
        self.package = package
        self.packagename = packagename
        self.quality = quality
        self.cluster_fn = cluster_fn
        self.embed_fn = embed_fn

    def run(self):
        print(
            f"************************************\n"
            f"Running: {self.packagename} with quality {self.quality} {self.cluster_fn.__qualname__} / {self.embed_fn.__qualname__}"
        )
        reports = run_all_reports(
            self.package,
            cluster_fn=self.cluster_fn,
            embed_fn=self.embed_fn,
            cluster_size=clustersize,
            quality=self.quality,
            verbose=True,
        )
        summary = Summary(self, Report.combine_reports(reports.values()))
        print(f"{summary}\n" f"************************************")
        return summary


class Summary:
    def __init__(self, scenario, report):
        self.packagename = scenario.packagename
        self.quality = scenario.quality
        self.clustername = scenario.cluster_fn.__qualname__
        self.embedname = scenario.embed_fn.__qualname__
        self.report = report

    def __str__(self):
        return f"{self.packagename} , {self.clustername} , {self.embedname} , {self.quality} , {self.report.compression_ratio:.2%} , {self.report.hausdorff}"


def make_scenarios():
    return [
        Scenario(package, packagename, quality, cluster_fn, embed_fn)
        for (package, packagename, mpu) in (
            (usd_package, "usd", usd_mpu),
            # (abc_package, "abc", abc_mpu),
        )
        for (embed_fn, quality) in (
            # (RoundedPCAEmbedding.from_data, 1e-2 / mpu),
            (embed_or_raw, 1e-4 / mpu),
            # (RoundedPCAEmbedding.from_data, 1e-4 / mpu),
            # (PCAEmbedding.from_data, 0.99),
            # (PCAEmbedding.from_data, 0.999),
        )
        for cluster_fn in (
            # clustering.cluster_monolithic,
            clustering.cluster_by_index,
            # clustering.cluster_pca_kmeans,
            # clustering.cluster_kmeans,
            # clustering.cluster_near_quaternions,
            clustering.cluster_first_axis,
            clustering.cluster_last_axis,
        )
    ]


# scenarios = [Scenario(abc_package, "abc", quality(abc_mpu), clustering.cluster_by_index, embed_fn)]
scenarios = make_scenarios()

In [None]:
# This takes a long time!
summaries = [scenario.run() for scenario in scenarios]

In [None]:
for summary in summaries:
    print(f"{summary}")