### Compression on npy files

In [7]:
import os
import sys
import numpy as np
from typing import Dict
import gzip
import subprocess


def load_episode(path, compressed=True) -> Dict[str, np.ndarray]:
    with open(path, "rb") as f:
        episode = np.load(f, allow_pickle=not compressed)
        if not compressed:
            episode = episode.item()

        episode = {k: episode[k] for k in episode.keys()}
        return episode


def compare(o, n):
    assert sys.getsizeof(o) == sys.getsizeof(n)
    assert o.keys() == n.keys()
    for k in o.keys():
        assert np.allclose(o[k], n[k])


# npy
PATH = "/checkpoint/qingfeiyou/offlinerl/compressed/temp/original/20230601T164859_1900_1001_142_19.00.npz"
print(f"npz file size: {os.path.getsize(PATH)}")

original = load_episode(PATH, compressed=True)

np.save("original.npy", original)
print(f"npy file size: {os.path.getsize('original.npy')}")

# xz
subprocess.call("xz -9 original.npy", shell=True)
print(f"xz file size: {os.path.getsize('original.npy.xz')}")
subprocess.call("xz -d original.npy.xz", shell=True)
new_episode = load_episode("original.npy", compressed=False)
compare(original, new_episode)

# zip
subprocess.call("zip -9 zip_compressed.zip original.npy", shell=True)
print(f"zip file size: {os.path.getsize('zip_compressed.zip')}")
subprocess.call("unzip -o zip_compressed.zip", shell=True)  # Overwrite existing file
new_episode = load_episode("original.npy", compressed=False)
compare(original, new_episode)

# bzip2
subprocess.call("bzip2 -zf9 original.npy", shell=True)
print(f"bzip2 file size: {os.path.getsize('original.npy.bz2')}")
subprocess.call("bzip2 -d original.npy.bz2", shell=True)
new_episode = load_episode("original.npy", compressed=False)
compare(original, new_episode)

# gzip
subprocess.call("gzip -9 original.npy", shell=True)
print(f"gzip file size: {os.path.getsize('original.npy.gz')}")
subprocess.call("gzip -d original.npy.gz", shell=True)
new_episode = load_episode("original.npy", compressed=False)
compare(original, new_episode)


npz file size: 11087109
npy file size: 49266920
xz file size: 535536
updating: original.npy (deflated 78%)
zip file size: 10869016
Archive:  zip_compressed.zip
  inflating: original.npy            
bzip2 file size: 1476344
gzip file size: 10868873


### Compression on npz files

In [1]:
import os
import sys
import numpy as np
from typing import Dict
import gzip
import subprocess


def load_episode(path, compressed=True) -> Dict[str, np.ndarray]:
    with open(path, "rb") as f:
        episode = np.load(f, allow_pickle=not compressed)
        if not compressed:
            episode = episode.item()

        episode = {k: episode[k] for k in episode.keys()}
        return episode


def compare(o, n):
    assert sys.getsizeof(o) == sys.getsizeof(n)
    assert o.keys() == n.keys()
    for k in o.keys():
        assert np.allclose(o[k], n[k])


# npz
PATH = "/checkpoint/qingfeiyou/offlinerl/compressed/temp/original/20230601T164859_1900_1001_142_19.00.npz"
print(f"npz file size: {os.path.getsize(PATH)}")

original = load_episode(PATH, compressed=True)

np.savez_compressed("original.npz", **original)
print(f"npz file size: {os.path.getsize('original.npz')}")

# xz
subprocess.call("xz -9 original.npz", shell=True)
print(f"xz file size: {os.path.getsize('original.npz.xz')}")
subprocess.call("xz -d original.npz.xz", shell=True)
new_episode = load_episode("original.npz", compressed=True)
compare(original, new_episode)

# zip
subprocess.call("zip -9 zip_compressed.zip original.npz", shell=True)
print(f"zip file size: {os.path.getsize('zip_compressed.zip')}")
subprocess.call("unzip -o zip_compressed.zip", shell=True)  # Overwrite existing file
new_episode = load_episode("original.npz", compressed=True)
compare(original, new_episode)

# bzip2
subprocess.call("bzip2 -zf9 original.npz", shell=True)
print(f"bzip2 file size: {os.path.getsize('original.npz.bz2')}")
subprocess.call("bzip2 -d original.npz.bz2", shell=True)
new_episode = load_episode("original.npz", compressed=True)
compare(original, new_episode)

# gzip
subprocess.call("gzip -9 original.npz", shell=True)
print(f"gzip file size: {os.path.getsize('original.npz.gz')}")
subprocess.call("gzip -d original.npz.gz", shell=True)
new_episode = load_episode("original.npz", compressed=True)
compare(original, new_episode)


npz file size: 11087109
npz file size: 11087109
xz file size: 5410472
  adding: original.npz (deflated 10%)
zip file size: 9927207
Archive:  zip_compressed.zip
  inflating: original.npz            
bzip2 file size: 8375949
gzip file size: 9927065
