In [4]:
%pip install netCDF4
%pip install zstandard

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
from netCDF4 import Dataset
import zstandard as zstd
import numpy as np

In [6]:
def read_file_with_var(read_file_name, var_name):
    data = Dataset("data/" + read_file_name)

    var = data.variables[var_name]
    var_data = var[:]

    output = var_data.tobytes()

    write_path = "data/output/" + read_file_name + "_" + var_name + ".bin"
    os.makedirs(os.path.dirname(write_path), exist_ok=True)

    with open(write_path, "wb") as file:
        file.write(output)
        print(f"File written to {write_path}")

    data.close()

In [7]:
def zstd_compress(file_name, compressed_file_name):
    cctx = zstd.ZstdCompressor()

    with open(file_name, "rb") as file:
        file_data = file.read()
    
    compressed = cctx.compress(file_data)

    os.makedirs(os.path.dirname(compressed_file_name), exist_ok=True)

    with open(compressed_file_name, "wb") as compressed_file:
        compressed_file.write(compressed)
        print(f"File compressed and written to {compressed_file_name}")

In [8]:
def get_file_size(file_name):
    file_size = os.path.getsize(file_name)
    print(f"Size of {file_name}: {file_size} bytes")
    return file_size

In [9]:
def compression_ratio(original_file, compressed_file):
    original_size = get_file_size(original_file)
    compressed_size = get_file_size(compressed_file)

    ratio = original_size / compressed_size
    print(f"Compression ratio: {ratio:.2f}")
    return ratio

In [10]:
def compress_all_vars(read_file_name):
    data = Dataset("data/" + read_file_name)
    compression_ratios = {}

    for var_name in data.variables.keys():
        file_path = "data/output/" + read_file_name + "_" + var_name

        read_file_with_var(read_file_name, var_name)
        zstd_compress(file_path + ".bin", file_path + "_compressed.bin")
        compression_ratios[var_name] = compression_ratio(file_path + ".bin", file_path + "_compressed.bin")

    data.close()
    return compression_ratios   

In [11]:
print(compress_all_vars("GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4"))

File written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lon.bin
File compressed and written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lon_compressed.bin
Size of data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lon.bin: 9216 bytes
Size of data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lon_compressed.bin: 1462 bytes
Compression ratio: 6.30
File written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lat.bin
File compressed and written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lat_compressed.bin
Size of data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lat.bin: 5768 bytes
Size of data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_lat_compressed.bin: 724 bytes
Compression ratio: 7.97
File written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01.nc4_time.bin
File compressed and written to data/output/GEOS.fp.asm.inst1_2d_lfo_Nx.20200303_0000.V01