In [79]:
import numpy as np
import sys
import os

def inspect_npz(filepath):
    """
    Load and inspect an npz file, printing names and shapes of all arrays.
    
    Args:
        filepath (str): Path to the npz file
    """
    try:
        # Check if file exists
        if not os.path.exists(filepath):
            print(f"Error: File '{filepath}' not found.")
            return
        
        # Load the npz file
        with np.load(filepath) as data:
            print(f"Inspecting NPZ file: {filepath}")
            print("-" * 50)
            
            # Get all array names
            array_names = list(data.files)
            
            if not array_names:
                print("No arrays found in the file.")
                return
            
            print(f"Total arrays: {len(array_names)}")
            print()
            
            # Print details for each array
            for name in array_names:
                array = data[name]
                print(f"Array name: '{name}'")
                print(f"  Shape: {array.shape}")
                print(f"  Data type: {array.dtype}")
                print(f"  Size: {array.size} elements")
                print(f"  Memory usage: {array.nbytes / 1024:.2f} KB")
                print()
            
            # Summary statistics
            total_memory = sum(data[name].nbytes for name in array_names)
            print("-" * 50)
            print(f"Total memory usage: {total_memory / 1024:.2f} KB ({total_memory / (1024*1024):.2f} MB)")
            
    except Exception as e:
        print(f"Error loading npz file: {e}")



In [80]:
inspect_npz('fly001.npz')

Inspecting NPZ file: fly001.npz
--------------------------------------------------
Total arrays: 8

Array name: 'xcoords'
  Shape: (10304,)
  Data type: float64
  Size: 10304 elements
  Memory usage: 80.50 KB

Array name: 'ycoords'
  Shape: (10304,)
  Data type: float64
  Size: 10304 elements
  Memory usage: 80.50 KB

Array name: 'xcoords_start'
  Shape: (10304,)
  Data type: float64
  Size: 10304 elements
  Memory usage: 80.50 KB

Array name: 'ycoords_start'
  Shape: (10304,)
  Data type: float64
  Size: 10304 elements
  Memory usage: 80.50 KB

Array name: 'diff3d'
  Shape: (10304, 64, 64)
  Data type: uint16
  Size: 42205184 elements
  Memory usage: 82432.00 KB

Array name: 'probeGuess'
  Shape: (64, 64)
  Data type: complex128
  Size: 4096 elements
  Memory usage: 64.00 KB

Array name: 'objectGuess'
  Shape: (232, 232)
  Data type: complex128
  Size: 53824 elements
  Memory usage: 841.00 KB

Array name: 'scan_index'
  Shape: (10304,)
  Data type: int64
  Size: 10304 elements
  Memor

In [1]:
# #!/usr/bin/env python3
# """
# transpose_diffraction.py

# Swap the (row, col) image dimensions of the ‘diff3d’ array
# while preserving every other field in the NPZ archive.
# Usage:
#     python transpose_diffraction.py fly001.npz           # -> fly001_transposed.npz
#     python transpose_diffraction.py in.npz out.npz       # -> custom output name
# """
# import sys
# from pathlib import Path
# import numpy as np


# def transpose_diffraction(in_file: str | Path, out_file: str | Path | None = None) -> None:
#     in_path = Path(in_file)
#     if out_file is None:
#         out_file = in_path.with_stem(in_path.stem + "_transposed")

#     # Load existing archive (no pickles for safety)
#     npz = np.load(in_path, allow_pickle=False)

#     # Transpose last two axes of diffraction stack: (scan, H, W) → (scan, W, H)
#     diff_t = npz["diff3d"].transpose(0, 2, 1)

#     # Re-assemble the archive, replacing only ‘diff3d’
#     save_dict = {k: (diff_t if k == "diff3d" else npz[k]) for k in npz.files}

#     # Save compressed to keep file size reasonable
#     np.savez_compressed(out_file, **save_dict)
#     print(f"✔ Saved transposed data → {out_file}")


# # if __name__ == "__main__":
# #     if len(sys.argv) < 2:
# #         print(__doc__)
# #         sys.exit(1)

# #     transpose_diffraction(*sys.argv[1:])


In [153]:
#!/usr/bin/env python3
"""
transpose_rename_convert.py

- Loads an NPZ archive.
- Transposes the last two axes of 'diff3d'.
- Renames 'diff3d' → 'diffraction'.
- Converts all uint16 arrays to float32.
- Writes out a new compressed NPZ.

Usage:
    python transpose_rename_convert.py input.npz
        → input_transposed.npz

    python transpose_rename_convert.py input.npz output.npz
        → custom output filename
"""
import sys
from pathlib import Path
import numpy as np

"""
transpose_rename_convert.py

- Loads an NPZ archive.
- Transposes the last two axes of 'diff3d'.
- Renames 'diff3d' → 'diffraction'.
- Converts all uint16 arrays to float32.
- Optionally flips X and/or Y coordinate arrays.
- Writes out a new compressed NPZ.

Usage:
    python transpose_rename_convert.py input.npz
        → input_transposed.npz

    python transpose_rename_convert.py input.npz output.npz --flipx --flipy
        → custom output, with both axes flipped in coords
"""
import sys
from pathlib import Path
import numpy as np
import argparse

def transpose_rename_convert(
    in_file: str | Path,
    out_file: str | Path | None = None,
    flipx: bool = False,
    flipy: bool = False,
) -> None:
    in_path = Path(in_file)
    if out_file is None:
        out_file = in_path.with_stem(in_path.stem + "_transposed")

    npz = np.load(in_path, allow_pickle=False)
    save_dict: dict[str, np.ndarray] = {}

    for key in npz.files:
        arr = npz[key]

        # 1) Transpose & rename diff3d → diffraction
        if key == "diff3d":
            arr = arr.transpose(1, 2, 0)
            new_key = "diffraction"
        else:
            new_key = key

        # 2) Convert uint16 → float32
        if arr.dtype == np.uint16:
            arr = arr.astype(np.float32)

        # 3) Flip coords if requested
        #   - any array with shape[1]==2 is treated as (x, y) pairs
        #   - any 1D array named exactly 'x' or 'y'
        if flipx or flipy:
            # 2-column arrays: flip last-axis components
            if arr.ndim == 2 and arr.shape[1] == 2 and arr.dtype.kind in "if":
                if flipx:
                    arr[:, 0] = -arr[:, 0]
                if flipy:
                    arr[:, 1] = -arr[:, 1]

            # 1D arrays named 'x' or 'y'
            elif arr.ndim == 1 and new_key.lower() in ("x", "y") and arr.dtype.kind in "if":
                if new_key.lower() == "x" and flipx:
                    arr = -arr
                if new_key.lower() == "y" and flipy:
                    arr = -arr

        save_dict[new_key] = arr

    np.savez_compressed(out_file, **save_dict)
    print(f"✔ Saved converted data → {out_file}")


In [151]:
# transpose_diffraction('fly001.npz')

In [158]:
# transpose_and_rename('fly001.npz')
transpose_rename_convert('fly/fly001.npz', flipx = True, flipy = True)

✔ Saved converted data → fly/fly001_transposed.npz


In [None]:
# prepare_probe_data.py
import numpy as np
import os
import argparse

def extract_probes(source_dir, probe_output_dir):
    """
    Extracts 'probeGuess' from NPZ files in a source directory and saves them
    into a new directory, formatted for the ptycho_torch loader.

    Args:
        source_dir (str): The directory containing the main ptychography data NPZ files.
        probe_output_dir (str): The directory where the extracted probes will be saved.
    """
    # Create the output directory if it doesn't exist
    os.makedirs(probe_output_dir, exist_ok=True)
    print(f"Ensured probe output directory exists: {probe_output_dir}")

    # Get the list of files from the source directory
    try:
        filenames = os.listdir(source_dir)
    except FileNotFoundError:
        print(f"Error: Source directory not found at '{source_dir}'")
        return

    print(f"Found {len(filenames)} file(s) in {source_dir}")

    # Iterate over each file in the source directory
    for filename in filenames:
        if not filename.endswith('.npz'):
            continue

        source_path = os.path.join(source_dir, filename)
        
        try:
            # Load the data from the source file
            with np.load(source_path) as data:
                # Check if 'probeGuess' exists
                if 'probeGuess' not in data:
                    print(f"Warning: 'probeGuess' not found in {source_path}. Skipping this file.")
                    continue
                
                # Extract the probe array
                probe_array = data['probeGuess']

            # Define the path for the new probe file
            output_path = os.path.join(probe_output_dir, filename)

            # Save the probe into a new NPZ file.
            # The loader expects the key to be 'probe', not 'probeGuess'.
            np.savez(output_path, probe=probe_array)
            
            print(f"Successfully extracted probe from {filename} and saved to {output_path}")

        except Exception as e:
            print(f"An error occurred while processing {filename}: {e}")


In [None]:
!mkdir probes

In [94]:
extract_probes('.', 'probes')

Ensured probe output directory exists: probes
Found 5 file(s) in .
Successfully extracted probe from fly001.npz and saved to probes/fly001.npz
Successfully extracted probe from fly001_transposed.npz and saved to probes/fly001_transposed.npz
