# Calculate Compression Ratios

This notebook aims to calculate differences in file sizes for segmentations and meshes of varying resolution for different data sizes

## Imports and functions

In [37]:
from caveclient import CAVEclient
import imageryclient as ic
import numpy as np
from zmesh import Mesher
import os
import shutil
from tqdm import tqdm
import pandas as pd


In [31]:
def get_size(start_path = '.'):
    # Helper function to calulate total size of all files in a directory (including subdirectory) in bits
    # parameters: start_path: str; directory to calulate size of
    # returns: total_size: int; total size of directory in bytes
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)

    return total_size

## Collect sizes

In [32]:
# initialise dataframe
# BBox: size of volume
# Segmentation: size of segmentation
# Red_000: size of meshes with reduction factor 0
# Red_010: size of meshes with reduction factor 10
# Red_100: size of meshes with reduction factor 100
sizes_base = pd.DataFrame(columns=["BBox", "Segmentation", "Red_000", "Red_010", "Red_100"])

In [27]:
# client setup
client = CAVEclient('minnie65_public')

img_client = ic.ImageryClient(client=client)

In [48]:
# volume to collect
bbox_size = (1024*3, 1024*3, 128*3)

In [None]:
# collect segmentation volume centered on [240640, 207872, 21360]
seg = img_client.segmentation_cutout([240640, 207872, 21360], bbox_size=bbox_size)

In [None]:
# save basic segmentation
os.makedirs(os.path.dirname(f"data/segmentation/"), exist_ok=True)
with open(f"data/segmentation/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}.npy", "wb") as f:
    np.save(f, seg)

In [None]:
# collect size of segmentation and save in dataframe, then free up disk space
sizes_base.loc[2, "BBox"] = bbox_size
sizes_base.loc[2, "Segmentation"] =  get_size("data/segmentation")
shutil.rmtree("data/segmentation/")

In [46]:
# Get meshes
mesher = Mesher((4, 4, 40))
mesher.mesh(seg, close=False)

print("meshing without compression")
meshes = []
for obj_id in tqdm(mesher.ids()):
  meshes.append(
    mesher.get(
      obj_id, 
      normals=False, # whether to calculate normals or not

      # tries to reduce triangles by this factor
      # 0 disables simplification
      reduction_factor=0, 

      # Max tolerable error in physical distance
      # note: if max_error is not set, the max error
      # will be set equivalent to one voxel along the 
      # smallest dimension.
      max_error=8,
      # whether meshes should be centered in the voxel
      # on (0,0,0) [False] or (0.5,0.5,0.5) [True]
      voxel_centered=False, 
    )
  )

# make mesh dir
os.makedirs(os.path.dirname(f"data/meshes/red_000/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/"), exist_ok=True)

print("saving without compression")
# save each mesh as precomputed file
for i, mesh in tqdm(enumerate(meshes)):
    with open(f"data/meshes/red_000/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/10001001:{i:03d}", "wb") as f:
        f.write(mesh.to_precomputed())

# save data size and free disk space
sizes_base.loc[2, "Red_000"] = get_size("data/meshes/red_000/")
shutil.rmtree('data/meshes/red_000')

print("meshing with reduction factor 10")
meshes = []
for obj_id in tqdm(mesher.ids()):
  meshes.append(
    mesher.get(
      obj_id, 
      normals=False, # whether to calculate normals or not
      reduction_factor=10,
      max_error=8,
      voxel_centered=False, 
    )
  )

os.makedirs(os.path.dirname(f"data/meshes/red_010/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/"), exist_ok=True)

print("saving with reduction factor 10")
for i, mesh in tqdm(enumerate(meshes)):
    with open(f"data/meshes/red_010/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/10001001:{i:03d}", "wb") as f:
        f.write(mesh.to_precomputed())

sizes_base.loc[2, "Red_010"] = get_size("data/meshes/red_010/")
shutil.rmtree('data/meshes/red_010')

print("meshing with reduction factor 100")
meshes = []
for obj_id in tqdm(mesher.ids()):
  meshes.append(
    mesher.get(
      obj_id, 
      normals=False, # whether to calculate normals or not
      reduction_factor=100, 
      max_error=8,
      voxel_centered=False, 
    )
  )
  mesher.erase(obj_id) # delete high res mesh

mesher.clear() # clear memory retained by mesher

os.makedirs(os.path.dirname(f"data/meshes/red_100/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/"), exist_ok=True)

print("saving without reduction factor 100")

for i, mesh in tqdm(enumerate(meshes)):
    with open(f"data/meshes/red_100/{bbox_size[0]}_{bbox_size[1]}_{bbox_size[2]}/10001001:{i:03d}", "wb") as f:
        f.write(mesh.to_precomputed())

sizes_base.loc[2, "Red_100"] = get_size("data/meshes/red_100/")
shutil.rmtree('data/meshes/red_100')

meshing without compression


100%|█████████████████████████████████████████████████████████████████████████| 3303/3303 [04:59<00:00, 11.03it/s]


saving without compression


3303it [00:11, 288.10it/s] 


meshing with reduction factor 10


100%|█████████████████████████████████████████████████████████████████████████| 3303/3303 [17:54<00:00,  3.08it/s]


saving with reduction factor 10


3303it [00:00, 8127.07it/s]


meshing with reduction factor 100


100%|█████████████████████████████████████████████████████████████████████████| 3303/3303 [18:53<00:00,  2.91it/s]


saving without reduction factor 100


3303it [00:00, 10666.31it/s]


In [47]:
sizes_base

Unnamed: 0,BBox,Segmentation,Red_000,Red_010,Red_100
0,"(1024, 1024, 128)",268435584,403846696,41103496,26299684
1,"(2048, 2048, 256)",2147483776,3350707824,308354340,173631540
