In [2]:
import varints
import sys
import numpy as np
from skimage.io import imread, imsave
from scipy import sparse
import numpy as np
import gzip
import os
from pathlib import Path
from process import process
from typing import Any


sparse_proto_direct_reduce_root = Path("./results/sparse_proto_direct")
proto_direct_reduce_root = Path("./results/proto_direct")
proto_delta_reduce_root = Path("./results/proto_delta")
proto_raw_reduce_root = Path("./results/proto_raw")
BLOCK_SIZE = 8192
ALGOS = ["sqliteu", "dlugoszu", "leb128s", "leb128u"]
            
        
def run_experiment(png_src: np.ndarray, result_file: Path):
    """
    Apply each candidate varint algoritm to the given condensed byte sequence.  Report on the sizing of each any
    ensure that each algorithm's work is reversible without any knowledge other than which algorithm was used to
    condense the bit sequence.
    """
    print(result_file)
    as_list = png_src.tolist()
    full_size = len(png_src)
    padding = (full_size % BLOCK_SIZE)
    if (padding > 0):
        padding = BLOCK_SIZE - padding
        as_list.extend([0] * padding)
        full_size = full_size + padding
        png_src = np.fromiter(as_list, np.uint16)
    block_count: int = round(full_size / BLOCK_SIZE)
    algo_idx = 0
    for varint_algo in [varints.sqliteu, varints.dlugoszu]:  #, varints.leb128s, varints.leb128u]:
        algo = ALGOS[algo_idx]
        algo_idx = algo_idx + 1
        algo_result_file = f"{str(result_file)}-{algo}.dat"
        approx = 0
        block_idx = 0
        last_block = block_count - 1
        with gzip.open(algo_result_file, 'wb', compresslevel=9) as woo:
            for chunk in np.split(png_src, block_count):
                block_idx += 1
                if block_idx == last_block:
                    chunk = chunk[:(-1 * padding)]
                var = varint_algo.encode(chunk.tolist())
                approx += sys.getsizeof(var)
                woo.write(var)
#         with gzip.open(algo_result_file, 'rb', compresslevel=9) as woo:
#             var = woo.read()
#         decoded_bytes = varint_algo.decode(var)
#         rehydrated_png = np.array(decoded_bytes, np.uint16)
#         if not (png_src == rehydrated_png).all():
#             raise ValueError(f"Comparison on reading back varint encoding failed")
        print(f"|{approx}|{algo}|{result_file}|")

In [None]:
root = Path('./ee19f416b1735c6ec5fc2ff3c1524a761032d001')
for (parent, dirs, files) in os.walk(root):
    for file in files:
        if file.endswith('.png'):
            result = parent + '/' + file
            shaped_data = imread(result)
            raw_data = shaped_data.flatten()
            print(f"Raw size: {sys.getsizeof(raw_data)} bytes in {len(raw_data)} items")
            result_path = Path(result).relative_to(root)
            (compact_direct, direct_key, compact_delta, delta_key) = process(shaped_data, result_path)
            # TODO: Save the direct_key and delta_key contents!
            print(f"Direct png compaction: {sys.getsizeof(compact_direct)} bytes in {len(compact_direct)} items using {direct_key['algorithm']}")
            if compact_delta is not None:
                print(f"Delta png compaction: {sys.getsizeof(compact_delta)} bytes in {len(compact_delta)} items using {delta_key['algorithm']}")
            else:
                print("Delta png compaction was not available on this input")
            print("Raw Variant Encoding")
            run_experiment(raw_data, proto_raw_reduce_root / result_path)
            print("Direct Compact Varint Encoding")
            run_experiment(compact_direct, proto_direct_reduce_root / result_path)
            print("Sparse Direct Compact Varint Encoding")
            bsr_rep = sparse.bsr_matrix(compact_direct)
            run_experiment(bsr_rep.data.flatten(), sparse_proto_direct_reduce_root / result_path)
            if compact_delta is not None:
                print("Delta Compact Varint Encoding")
                run_experiment(compact_delta, proto_delta_reduce_root / result_path)
            sparse.bsr_matrix()
            print("\n")


Raw size: 6096480 bytes in 3048192 items
Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/450_accum16_20200212_163253.png
|3653662|sqliteu|results/proto_raw/450_accum16_20200212_163253.png|
|3965723|dlugoszu|results/proto_raw/450_accum16_20200212_163253.png|
|4064030|leb128s|results/proto_raw/450_accum16_20200212_163253.png|
|3965723|leb128u|results/proto_raw/450_accum16_20200212_163253.png|
Direct Compact Variant Encoding
results/proto_direct/450_accum16_20200212_163253.png
|3217421|sqliteu|results/proto_direct/450_accum16_20200212_163253.png|
|3603350|dlugoszu|results/proto_direct/450_accum16_20200212_163253.png|
|3869942|leb128s|results/proto_direct/450_accum16_20200212_163253.png|
|3603350|leb128u|results/proto_direct/450_accum16_20200212_163253.png|
Delta Compact Variant Encoding
results/proto_delta/450_accum16_20200212_163253.png
|3069202|sqliteu|resu

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/1500_accum16_20200212_163302.png
|3177589|sqliteu|results/proto_raw/1500_accum16_20200212_163302.png|
|3729776|dlugoszu|results/proto_raw/1500_accum16_20200212_163302.png|
|3958620|leb128s|results/proto_raw/1500_accum16_20200212_163302.png|
|3729776|leb128u|results/proto_raw/1500_accum16_20200212_163302.png|
Direct Compact Variant Encoding
results/proto_direct/1500_accum16_20200212_163302.png
|3061088|sqliteu|results/proto_direct/1500_accum16_20200212_163302.png|
|3290410|dlugoszu|results/proto_direct/1500_accum16_20200212_163302.png|
|3682483|leb128s|results/proto_direct/1500_accum16_20200212_163302.png|
|3290410|leb128u|results/proto_direct/1500_accum16_20200212_163302.png|
Delta Compact Variant Encoding
results/proto_delta/1500_accum16_20200212_163302.png
|3063179|sqliteu|results/proto_delta/1500_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/1950_accum16_20200212_163306.png
|3099777|sqliteu|results/proto_raw/1950_accum16_20200212_163306.png|
|3570551|dlugoszu|results/proto_raw/1950_accum16_20200212_163306.png|
|3907021|leb128s|results/proto_raw/1950_accum16_20200212_163306.png|
|3570551|leb128u|results/proto_raw/1950_accum16_20200212_163306.png|
Direct Compact Variant Encoding
results/proto_direct/1950_accum16_20200212_163306.png
|3060844|sqliteu|results/proto_direct/1950_accum16_20200212_163306.png|
|3168293|dlugoszu|results/proto_direct/1950_accum16_20200212_163306.png|
|3613517|leb128s|results/proto_direct/1950_accum16_20200212_163306.png|
|3168293|leb128u|results/proto_direct/1950_accum16_20200212_163306.png|
Delta Compact Variant Encoding
results/proto_delta/1950_accum16_20200212_163306.png
|3062392|sqliteu|results/proto_delta/1950_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/2100_accum16_20200212_163307.png
|3090146|sqliteu|results/proto_raw/2100_accum16_20200212_163307.png|
|3500384|dlugoszu|results/proto_raw/2100_accum16_20200212_163307.png|
|3879066|leb128s|results/proto_raw/2100_accum16_20200212_163307.png|
|3500384|leb128u|results/proto_raw/2100_accum16_20200212_163307.png|
Direct Compact Variant Encoding
results/proto_direct/2100_accum16_20200212_163307.png
|3060715|sqliteu|results/proto_direct/2100_accum16_20200212_163307.png|
|3134382|dlugoszu|results/proto_direct/2100_accum16_20200212_163307.png|
|3581000|leb128s|results/proto_direct/2100_accum16_20200212_163307.png|
|3134382|leb128u|results/proto_direct/2100_accum16_20200212_163307.png|
Delta Compact Variant Encoding
results/proto_delta/2100_accum16_20200212_163307.png
|3062152|sqliteu|results/proto_delta/2100_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/1200_accum16_20200212_163300.png
|3271667|sqliteu|results/proto_raw/1200_accum16_20200212_163300.png|
|3786999|dlugoszu|results/proto_raw/1200_accum16_20200212_163300.png|
|3978187|leb128s|results/proto_raw/1200_accum16_20200212_163300.png|
|3786999|leb128u|results/proto_raw/1200_accum16_20200212_163300.png|
Direct Compact Variant Encoding
results/proto_direct/1200_accum16_20200212_163300.png
|3061306|sqliteu|results/proto_direct/1200_accum16_20200212_163300.png|
|3348489|dlugoszu|results/proto_direct/1200_accum16_20200212_163300.png|
|3721912|leb128s|results/proto_direct/1200_accum16_20200212_163300.png|
|3348489|leb128u|results/proto_direct/1200_accum16_20200212_163300.png|
Delta Compact Variant Encoding
results/proto_delta/1200_accum16_20200212_163300.png
|3063829|sqliteu|results/proto_delta/1200_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/2400_accum16_20200212_163310.png
|3083037|sqliteu|results/proto_raw/2400_accum16_20200212_163310.png|
|3392632|dlugoszu|results/proto_raw/2400_accum16_20200212_163310.png|
|3828725|leb128s|results/proto_raw/2400_accum16_20200212_163310.png|
|3392632|leb128u|results/proto_raw/2400_accum16_20200212_163310.png|
Direct Compact Variant Encoding
results/proto_direct/2400_accum16_20200212_163310.png
|3060580|sqliteu|results/proto_direct/2400_accum16_20200212_163310.png|
|3106227|dlugoszu|results/proto_direct/2400_accum16_20200212_163310.png|
|3524568|leb128s|results/proto_direct/2400_accum16_20200212_163310.png|
|3106227|leb128u|results/proto_direct/2400_accum16_20200212_163310.png|
Delta Compact Variant Encoding
results/proto_delta/2400_accum16_20200212_163310.png
|3061867|sqliteu|results/proto_delta/2400_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/600_accum16_20200212_163254.png
|3528307|sqliteu|results/proto_raw/600_accum16_20200212_163254.png|
|3901969|dlugoszu|results/proto_raw/600_accum16_20200212_163254.png|
|4041992|leb128s|results/proto_raw/600_accum16_20200212_163254.png|
|3901969|leb128u|results/proto_raw/600_accum16_20200212_163254.png|
Direct Compact Variant Encoding
results/proto_direct/600_accum16_20200212_163254.png
|3117619|sqliteu|results/proto_direct/600_accum16_20200212_163254.png|
|3518738|dlugoszu|results/proto_direct/600_accum16_20200212_163254.png|
|3827505|leb128s|results/proto_direct/600_accum16_20200212_163254.png|
|3518738|leb128u|results/proto_direct/600_accum16_20200212_163254.png|
Delta Compact Variant Encoding
results/proto_delta/600_accum16_20200212_163254.png
|3066383|sqliteu|results/proto_delta/600_accum16_20200212_1632

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/150_accum16_20200212_163250.png
|3769369|sqliteu|results/proto_raw/150_accum16_20200212_163250.png|
|3992491|dlugoszu|results/proto_raw/150_accum16_20200212_163250.png|
|4067947|leb128s|results/proto_raw/150_accum16_20200212_163250.png|
|3992491|leb128u|results/proto_raw/150_accum16_20200212_163250.png|
Direct Compact Variant Encoding
results/proto_direct/150_accum16_20200212_163250.png
|3310818|sqliteu|results/proto_direct/150_accum16_20200212_163250.png|
|3685285|dlugoszu|results/proto_direct/150_accum16_20200212_163250.png|
|3917281|leb128s|results/proto_direct/150_accum16_20200212_163250.png|
|3685285|leb128u|results/proto_direct/150_accum16_20200212_163250.png|
Delta Compact Variant Encoding
results/proto_delta/150_accum16_20200212_163250.png
|3071623|sqliteu|results/proto_delta/150_accum16_20200212_1632

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/1650_accum16_20200212_163303.png
|3139929|sqliteu|results/proto_raw/1650_accum16_20200212_163303.png|
|3689266|dlugoszu|results/proto_raw/1650_accum16_20200212_163303.png|
|3947109|leb128s|results/proto_raw/1650_accum16_20200212_163303.png|
|3689266|leb128u|results/proto_raw/1650_accum16_20200212_163303.png|
Direct Compact Variant Encoding
results/proto_direct/1650_accum16_20200212_163303.png
|3060853|sqliteu|results/proto_direct/1650_accum16_20200212_163303.png|
|3254375|dlugoszu|results/proto_direct/1650_accum16_20200212_163303.png|
|3662281|leb128s|results/proto_direct/1650_accum16_20200212_163303.png|
|3254375|leb128u|results/proto_direct/1650_accum16_20200212_163303.png|
Delta Compact Variant Encoding
results/proto_delta/1650_accum16_20200212_163303.png
|3062878|sqliteu|results/proto_delta/1650_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/1050_accum16_20200212_163258.png
|3329853|sqliteu|results/proto_raw/1050_accum16_20200212_163258.png|
|3812345|dlugoszu|results/proto_raw/1050_accum16_20200212_163258.png|
|3991357|leb128s|results/proto_raw/1050_accum16_20200212_163258.png|
|3812345|leb128u|results/proto_raw/1050_accum16_20200212_163258.png|
Direct Compact Variant Encoding
results/proto_direct/1050_accum16_20200212_163258.png
|3062140|sqliteu|results/proto_direct/1050_accum16_20200212_163258.png|
|3376360|dlugoszu|results/proto_direct/1050_accum16_20200212_163258.png|
|3745085|leb128s|results/proto_direct/1050_accum16_20200212_163258.png|
|3376360|leb128u|results/proto_direct/1050_accum16_20200212_163258.png|
Delta Compact Variant Encoding
results/proto_delta/1050_accum16_20200212_163258.png
|3064074|sqliteu|results/proto_delta/1050_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/2250_accum16_20200212_163308.png
|3085486|sqliteu|results/proto_raw/2250_accum16_20200212_163308.png|
|3441009|dlugoszu|results/proto_raw/2250_accum16_20200212_163308.png|
|3852642|leb128s|results/proto_raw/2250_accum16_20200212_163308.png|
|3441009|leb128u|results/proto_raw/2250_accum16_20200212_163308.png|
Direct Compact Variant Encoding
results/proto_direct/2250_accum16_20200212_163308.png
|3060644|sqliteu|results/proto_direct/2250_accum16_20200212_163308.png|
|3116206|dlugoszu|results/proto_direct/2250_accum16_20200212_163308.png|
|3552366|leb128s|results/proto_direct/2250_accum16_20200212_163308.png|
|3116206|leb128u|results/proto_direct/2250_accum16_20200212_163308.png|
Delta Compact Variant Encoding
results/proto_delta/2250_accum16_20200212_163308.png
|3062064|sqliteu|results/proto_delta/2250_accum16_2

  imsave(dst, reduced)
  imsave(dst, reduced)


Direct png compaction: 6096480 bytes in 3048192 items using digitize16
Delta png compaction: 6096480 bytes in 3048192 items using digitize16
Raw Variant Encoding
results/proto_raw/900_accum16_20200212_163257.png
|3355242|sqliteu|results/proto_raw/900_accum16_20200212_163257.png|
|3824909|dlugoszu|results/proto_raw/900_accum16_20200212_163257.png|
|3999523|leb128s|results/proto_raw/900_accum16_20200212_163257.png|


In [2]:
from IPython.display import display
from ipywidgets import FileUpload

f = FileUpload()
display(f)


FileUpload(value={}, description='Upload')