In [1]:
from astropy.io import fits
import numpy as np
import re
from astropy.io.fits import Header

In [2]:
hdul_cube = fits.open("M33_cube.fits")
hdul_maps = fits.open("M33_MAPS.fits")
hdul_wl = fits.open("M33_whitelight.fits")

In [3]:
for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    print(hdul.info())

Filename: M33_cube.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     238   ()      
  1  DATA          1 ImageHDU        46   (20, 20, 3761)   float32   
  2  STAT          1 ImageHDU        47   (20, 20, 3761)   float32   
  3  DQ            1 ImageHDU        46   (20, 20, 3761)   uint8   
None
Filename: M33_MAPS.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     273   ()      
  1  ID            1 ImageHDU        25   (20, 20)   float64   
  2  FLUX          1 ImageHDU        26   (20, 20)   float64   
  3  SNR           1 ImageHDU        25   (20, 20)   float64   
  4  SNRBIN        1 ImageHDU        25   (20, 20)   float64   
  5  BIN_ID        1 ImageHDU        25   (20, 20)   float64   
  6  V_STARS       1 ImageHDU        26   (20, 20)   float64   
  7  FORM_ERR_V_STARS    1 ImageHDU        26   (20, 20)   float64   
  8  SIGMA_STARS    1 ImageHDU        26   (20, 20)   float

# Check and remove duplicates for OBID and PROV keywords in FITS headers

In [4]:
def dedupe_compact_prefix_keywords(
    header: Header,
    prefix: str = "OBID",
    normalize=lambda v: v.strip() if isinstance(v, str) else v,
):
    """
    De-duplicate and compact numbered FITS keywords with a common prefix.

    Example (prefix="OBID"):
      OBID1="A", OBID2="A", OBID3="B"  ->  OBID1="A", OBID2="B"
      OBID1="A", OBID3="A", OBID5="C"  ->  OBID1="A", OBID2="C"

    Rules:
      - Operates on keywords matching r'^{prefix}(\\d+)$'
      - Keeps the *first* occurrence of each value (after `normalize`)
      - Removes all duplicates
      - Renumbers sequentially starting at 1
      - Preserves the comment from the first kept occurrence of each value
      - Inserts the compacted block back at (approximately) the location of the
        first matching keyword in the original header.

    Parameters
    ----------
    header : astropy.io.fits.Header
        Header to modify in-place.
    prefix : str
        Keyword prefix, e.g. "OBID" or "PROV" (for "PROV1", "PROV2", ...).
    normalize : callable
        Function applied to values before comparing for duplicates.
        Default strips whitespace for strings.

    Returns
    -------
    header : Header
        The same header object (modified in-place).
    changed : bool
        True if any modifications were made.
    """
    pat = re.compile(rf"^{re.escape(prefix)}(\d+)$")

    # Collect matching cards (in header order), keeping original keyword and comment
    matches = []
    indices = []
    for idx, card in enumerate(header.cards):
        m = pat.match(card.keyword)
        if m:
            matches.append((card.keyword, card.value, card.comment))
            indices.append(idx)

    if not matches:
        return header, False

    # Determine reinsertion anchor: immediately after the card preceding the first match
    first_idx = min(indices)
    anchor_key = header.cards[first_idx - 1].keyword if first_idx > 0 else None

    # Build de-duplicated list (preserve first occurrence)
    seen = set()
    kept = []
    for key, val, com in matches:
        norm = normalize(val)
        if norm in seen:
            continue
        seen.add(norm)
        kept.append((val, com))

    # If already compact and unique with sequential numbering, we can still leave as-is,
    # but easiest is to rewrite deterministically.
    # Remove all matching keys first
    for key, _, _ in matches:
        if key in header:
            del header[key]

    # Insert back compacted keys with new numbering
    changed = True
    for i, (val, com) in enumerate(kept, start=1):
        newkey = f"{prefix}{i}"
        card_tuple = (newkey, val, com)

        if anchor_key is None:
            # insert at top, preserving order as a block
            header.insert(i - 1, card_tuple)
        else:
            # insert after anchor; update anchor so the block stays ordered
            header.insert(anchor_key, card_tuple, after=True)
            anchor_key = newkey

    return header, changed


def dedupe_compact_prefix_in_hdulist(hdulist, prefix: str = "OBID", **kwargs):
    """
    Apply `dedupe_compact_prefix_keywords` to every HDU header in an HDUList-like object.

    Returns a list of booleans indicating which HDUs were changed.
    """
    changed_flags = []
    for hdu in hdulist:
        _, changed = dedupe_compact_prefix_keywords(hdu.header, prefix=prefix, **kwargs)
        changed_flags.append(changed)
    return changed_flags

hdul_cube[0].header, changed = dedupe_compact_prefix_keywords(hdul_cube[0].header, prefix="OBID")
hdul_maps[0].header, changed = dedupe_compact_prefix_keywords(hdul_maps[0].header, prefix="OBID")
hdul_wl[0].header, changed = dedupe_compact_prefix_keywords(hdul_wl[0].header, prefix="OBID")

hdul_cube[0].header, changed = dedupe_compact_prefix_keywords(hdul_cube[0].header, prefix="PROV")
hdul_maps[0].header, changed = dedupe_compact_prefix_keywords(hdul_maps[0].header, prefix="PROV")
hdul_wl[0].header, changed = dedupe_compact_prefix_keywords(hdul_wl[0].header, prefix="PROV")

# Remove and replace keywords from all HDUs

Updates: 
- Remove `COMMENT` 
- Update `PROCSOFT` -> "pymusepipe" (take from the paper)
- Update `REFERENC` -> "doi:10.48550/arXiv.2509.15305"
- `ABMAGLIM` -> **Need to get value from code provided**
- `PIXNOISE` -> **Need to get value from code provided**
- Update `BUNIT` -> following ESO convensions for cube and whitelight, and also maps (where needed)
- Remove `PCi_j` in all HDUs (I think they were only present in the MAPS file?)
- Update `WCSAXES` -> 2 (for whitelight image)
- Update `RADESYS` -> "FK5" (in primary header of all files)

This need updating after the cube and whitelight files have the same number of extensions: 
- Update `HDUDOC` -> "SPD" (for cube and whitelight headers - except primary)
- Update `HDUVERS` -> "SDP Version 8" (for cube and whitelight headers - except primary)

In [5]:
for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    for hdu in hdul:
        if "COMMENT" in hdu.header:
            del hdu.header["COMMENT"]

PROCSOFT = "pymusepipe"
REFERENC =  "doi:10.48550/arXiv.2509.15305"

ABMAGLIM = 10
PIXNOISE = 10

HDUDOC  = 'SDP'
HDUVERS = "SDP Version 8"

for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    hdul[0].header["PROCSOFT"] = PROCSOFT
    hdul[0].header["REFERENC"] = REFERENC
    hdul[0].header["ABMAGLIM"] = ABMAGLIM
    hdul[0].header["PIXNOISE"] = PIXNOISE

# Remove PC1_1 and PC1_2 from all HDUs
for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    for hdu in hdul:
        for PC in ["PC1_1", "PC1_2", "PC2_1", "PC2_2"]:
            if PC in hdu.header:
                del hdu.header[PC]

# This won't work as the number of HDUs differs bewteen the cube and the whitelight
for hdul in [hdul_cube, hdul_wl]:
    try:
        for hdu in [hdul[1], hdul[2], hdul[3]]:
            hdu.header["HDUDOC"] = HDUDOC
            hdu.header["HDUVERS"] = HDUVERS
    except IndexError:
        print("HDU index out of range, skipping...")
        pass

# Update format for units for cube and whitelight
for hdul in [hdul_cube, hdul_wl]:
    hdul[1].header["10**(-20)angstrom**(-1).cm**(-2).erg.s**(-1)"]
    hdul[2].header["10**(-40)angstrom**(-2).cm**(-4).erg**(2).s**(-2)"]

# Update format for unit format for maps
for hdu in hdul_maps[1:]:
    if "1e-20 erg/s/cm2/spaxel" in hdu.header.get("BUNIT", ""):
        hdu.header["BUNIT"] = "10**(-20)erg.s**(-1).cm**(-2).spaxel**(-1)"

# Set RADESYS to FK5 in primary headers
for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    hdul[0].header["RADESYS"] = "FK5"
    hdul[0].header["EQUINOX"] = 2000.0

# Set WCSAXES to 2 in relevant HDUs
hdul_wl[1].header["WCSAXES"] = 2
hdul_wl[2].header["WCSAXES"] = 2

HDU index out of range, skipping...


In [6]:
## REMOVE -- ONLY FOR TESTING 
CRVAL1 = 23.462454565353
CRVAL2 = 30.624825604782

for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    for hdu in hdul:
        if "RA" in hdu.header:
            hdu.header["RA"] = CRVAL1
        if "DEC" in hdu.header:
            hdu.header["DEC"] = CRVAL2
        if "CRPIX1" in hdu.header:
            hdu.header["CRPIX1"] = 1.0
        if "CRPIX2" in hdu.header:
            hdu.header["CRPIX2"] = 1.0

In [7]:
for hdul in [hdul_cube, hdul_maps, hdul_wl]:
    for hdu in hdul:
        hdu.add_datasum()
        hdu.add_checksum()

# Save to disk
hdul_cube.writeto("M33_cube_updated.fits", overwrite=True)
hdul_maps.writeto("M33_MAPS_updated.fits", overwrite=True)
hdul_wl.writeto("M33_whitelight_updated.fits", overwrite=True)