```json
{
    "mask_name": "mask_00123.npy",
    "mask_height": 672,
    "mask_width": 630,
    "promote_type": "mask",
    "labels": {
        "3": {
            "instance_id": 3,
            "class_name": "table",
            "x1": 156,
            "y1": 333,
            "x2": 299,
            "y2": 414,
            "logit": 0.0
        },
        "5": {
            "instance_id": 5,
            "class_name": "table",
            "x1": 268,
            "y1": 463,
            "x2": 447,
            "y2": 666,
            "logit": 0.0
        }
    }
}
```

In [6]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

def analyze_npy_files(directory):
    directory = Path(directory)
    npy_files = list(directory.glob("*.npy"))
    total_nonzero_count = 0
    
    for npy_file in npy_files:
        mask = np.load(npy_file)
        
        # Get unique values and their counts
        unique_values, counts = np.unique(mask, return_counts=True)
        
        # Sum all values except 0 (corrected indexing)
        total_nonzero_count += counts[unique_values != 0].sum()
        
        # Display histogram of values with integer bins
        plt.figure()
        plt.hist(mask.flatten(), bins=np.arange(min(unique_values), max(unique_values) + 2) - 0.5, edgecolor='black')
        plt.xticks(unique_values)  # Ensure only integer ticks
        plt.title(f"Value Distribution in {npy_file.name}")
        plt.xlabel("Mask Value")
        plt.ylabel("Count")
        plt.show()
    
    print(f"Total count of all values except 0: {total_nonzero_count}")
    return total_nonzero_count

# Example usage
directory = Path("path_to_npy_files")
total_nonzero_count = analyze_npy_files(directory)

Total count of all values except 0: 0


In [None]:
analyze_npy_files("/tmp/mask_data")

In [15]:
import numpy as np
from pathlib import Path
from PIL import Image
import matplotlib.cm as cm
import matplotlib.colors as mcolors

def generate_color_map(unique_values, invert_background=False):
    """Generate distinct colors for unique values."""
    colormap = cm.get_cmap("tab20", len(unique_values))
    norm = mcolors.Normalize(vmin=min(unique_values), vmax=max(unique_values))
    color_map = {val: tuple(int(c * 255) for c in colormap(norm(val))[:3]) for val in unique_values}
    
    if invert_background and 0 in color_map:
        max_val = max(unique_values)
        color_map[0] = (255, 255, 255)  # White background
        color_map[max_val] = (0, 0, 0)  # Black max value
    
    return color_map

def convert_npy_to_png(npy_file, png_file, invert_background=False):
    """Convert a mask .npy file to a color .png image."""
    mask = np.load(npy_file)
    unique_values = np.unique(mask)
    color_map = generate_color_map(unique_values, invert_background)
    
    h, w = mask.shape
    color_mask = np.zeros((h, w, 3), dtype=np.uint8)
    for val, color in color_map.items():
        color_mask[mask == val] = color
    
    img = Image.fromarray(color_mask, mode="RGB")
    img.save(png_file)
    print(f"Saved: {png_file}")

def convert_png_to_npy(png_file, npy_file):
    """Convert a color PNG back to an NPY file using grayscale approximation."""
    img = Image.open(png_file).convert("L")  # Convert to grayscale
    mask = np.array(img, dtype=np.uint16)  # Convert to uint16 to match NPY format
    np.save(npy_file, mask)
    print(f"Saved: {npy_file}")

def process_npy_directory(input_dir, output_dir, invert_background=False):
    """Process all .npy files in a directory and convert them to color PNG."""
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    npy_files = list(input_dir.glob("*.npy"))
    for npy_file in npy_files:
        png_file = output_dir / (npy_file.stem + ".png")
        convert_npy_to_png(npy_file, png_file, invert_background)

def process_png_directory(input_dir, output_dir):
    """Process all .png files in a directory and convert them back to NPY."""
    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    png_files = list(input_dir.glob("*.png"))
    for png_file in png_files:
        npy_file = output_dir / (png_file.stem + ".npy")
        convert_png_to_npy(png_file, npy_file)

# # Example usage
# directory = Path("path_to_npy_files")
# output_directory = Path("path_to_png_files")
# process_npy_directory(directory, output_directory, invert_background=True)  # Set to False to keep standard colors

# # Example reverse conversion
# png_directory = Path("path_to_png_files")
# npy_output_directory = Path("path_to_reconstructed_npy")
# process_png_directory(png_directory, npy_output_directory)


In [None]:
process_npy_directory("/tmp/mask_data/", "/tmp/mask_png", True)

In [19]:
import numpy as np
from pathlib import Path

def convert_npy_to_npz(input_dir, output_npz):
    """Convert all .npy files in a directory into a single .npz archive."""
    input_dir = Path(input_dir)
    npy_files = list(input_dir.glob("*.npy"))
    
    data_dict = {}
    for npy_file in npy_files:
        data = np.load(npy_file)
        data_dict[npy_file.stem] = data
    
    np.savez_compressed(output_npz, **data_dict)
    print(f"Saved NPZ archive: {output_npz}")

def extract_npz_to_npy(npz_file, output_dir):
    """Extract all arrays from an .npz archive back into individual .npy files."""
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    data = np.load(npz_file)
    for key in data.files:
        npy_path = output_dir / f"{key}.npy"
        np.save(npy_path, data[key])
        print(f"Extracted: {npy_path}")

# # Example usage
# input_directory = Path("path_to_npy_files")
# output_npz_file = Path("output_dataset.npz")
# convert_npy_to_npz(input_directory, output_npz_file)

# # Example extraction
# extracted_npy_directory = Path("extracted_npy_files")
# extract_npz_to_npy(output_npz_file, extracted_npy_directory)

In [22]:
convert_npy_to_npz("/tmp/mask_data/", "/tmp/mask.npz")

Saved NPZ archive: /tmp/mask.npz
