In [5]:
import os
import glob
import numpy as np

from skimage.transform import resize
import gwyfile
from gwyfile import util as gwyutil


# ==========================
# CONFIGURATION
# ==========================
GWY_DIR = ".//data_sergio_12.10.25//topography"      # folder with .gwy files
MASK_DIR = ".//data_sergio_12.10.25//masks"          # folder with .npy masks

OUT_TOPO_DIR = ".//data_sergio_12.10.25//unet_dataset//topography"   # where to save 256x256 topography npy
OUT_MASK_DIR = ".//data_sergio_12.10.25//unet_dataset//masks"   # where to save 256x256 mask npy

TARGET_SIZE = (256, 256)  # (H, W)

os.makedirs(OUT_TOPO_DIR, exist_ok=True)
os.makedirs(OUT_MASK_DIR, exist_ok=True)


In [7]:

def log_transform_sign_preserving(arr, eps=1e-9):
    """
    Apply a log-like transform that works with negative values.

    y = sign(x) * log(1 + |x|)
    """
    arr = arr.astype(np.float32)
    sign = np.sign(arr)
    return sign * np.log1p(np.abs(arr) + eps)


def process_gwy_files():
    gwy_paths = sorted(glob.glob(os.path.join(GWY_DIR, "*.gwy")))
    print(f"Found {len(gwy_paths)} .gwy files.")

    for path in gwy_paths:
        base = os.path.splitext(os.path.basename(path))[0]
        print(f"\nProcessing GWY: {base}")

        # Load file
        root = gwyfile.load(path)

        # Get all datafields as a dict: {title: GwyDataField}
        channels = gwyutil.get_datafields(root)
        print("  Available channels:", list(channels.keys()))

        # Try to get the "Topography" channel by title
        if "Topography" in channels:
            topo_field = channels["Topography"]
        else:
            # Fallback: take the first channel if "Topography" not found
            # (you can remove this if you want to fail loudly instead)
            topo_field = next(iter(channels.values()))
            print("  'Topography' not found, using first channel instead.")

        # GwyDataField → NumPy array
        topo = topo_field.data  # this is already a 2D numpy array
        topo = topo.astype(np.float32)

        # Log transform (sign-preserving)
        topo_log = log_transform_sign_preserving(topo)

        # Resize to 256×256
        topo_resized = resize(
            topo_log,
            TARGET_SIZE,
            order=1,              # bilinear interpolation
            mode="reflect",
            anti_aliasing=True,
            preserve_range=True,
        ).astype(np.float32)

        # Save as .npy
        out_path = os.path.join(OUT_TOPO_DIR, base + "_topo256.npy")
        np.save(out_path, topo_resized)
        print(f"  -> Saved: {out_path}")


# ==========================
# PROCESS MASK FILES (resize -> npy)
# ==========================
def process_mask_files():
    mask_paths = sorted(glob.glob(os.path.join(MASK_DIR, "*.npy")))
    print(f"Found {len(mask_paths)} mask .npy files.")

    for path in mask_paths:
        base = os.path.splitext(os.path.basename(path))[0]
        print(f"Processing mask: {base}")

        mask = np.load(path)

        # Ensure mask is 2D (H, W). If it has channel dim, squeeze it.
        if mask.ndim == 3 and mask.shape[0] == 1:
            mask = mask[0]
        elif mask.ndim == 3 and mask.shape[-1] == 1:
            mask = mask[..., 0]

        if mask.ndim != 2:
            raise ValueError(f"Mask {path} is not 2D after squeezing; got shape {mask.shape}")

        # Resize mask with NEAREST-NEIGHBOR to preserve labels
        mask_resized = resize(
            mask,
            TARGET_SIZE,
            order=0,              # nearest
            mode="edge",
            anti_aliasing=False,
            preserve_range=True,
        )

        # Preserve dtype and unique labels (e.g. 0/1 or 0/1/2...)
        # Usually masks are integer-labeled:
        if np.issubdtype(mask.dtype, np.integer):
            mask_resized = np.rint(mask_resized).astype(mask.dtype)
        else:
            # if float mask (0/1), you can threshold or cast as float32
            mask_resized = mask_resized.astype(np.float32)

        out_path = os.path.join(OUT_MASK_DIR, base + "_mask256.npy")
        np.save(out_path, mask_resized)
        print(f"  -> Saved: {out_path}")


if __name__ == "__main__":
    process_gwy_files()
    process_mask_files()
    print("Done.")


Found 10 .gwy files.

Processing GWY: CdTe_0per_350C_2
  Available channels: ['Topography', 'Filtered Data 1']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topography\CdTe_0per_350C_2_topo256.npy

Processing GWY: CdTe_0per_350C_3_T
  Available channels: ['Topography']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topography\CdTe_0per_350C_3_T_topo256.npy

Processing GWY: CdTe_7per_350C_5_T
  Available channels: ['Topography']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topography\CdTe_7per_350C_5_T_topo256.npy

Processing GWY: CdTe_7per_350C_6_T
  Available channels: ['Topography']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topography\CdTe_7per_350C_6_T_topo256.npy

Processing GWY: CdTe_7per_350C_8_T
  Available channels: ['Topography']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topography\CdTe_7per_350C_8_T_topo256.npy

Processing GWY: CdTe_7per_400C_16_T
  Available channels: ['Topography']
  -> Saved: .//data_sergio_12.10.25//unet_dataset//topograph