# Generate mask data files

To self-generate files containing mask data, download the following files for each mask type. Warning: these files are large and may take a while to download.

- ["BRIGHT" and "MEDIUM-bright" stars](https://portal.nersc.gov/cfs/cosmo/data/legacysurvey/dr10/masking/gaia-mask-dr10.fits.gz) (2.68 GB)
- [Globular Cluters and Planetary Nebulae](https://portal.nersc.gov/cfs/cosmo/data/legacysurvey/dr10/masking/NGC-star-clusters.fits) (33 KB)
- [Large Galaxies](https://portal.nersc.gov/cfs/cosmo/data/legacysurvey/dr10/masking/SGA-ellipse-v3.0.kd.fits) (5.78 GB)

Import required libraries.

In [None]:
from astropy.coodinates import SkyCoord
from astropy.io import fits as fits

import numpy as np
import pandas as pd
import os

## Mask Data File for Stars
Load downloaded file into workspace. Clean data such that only ra, dec, and radius columns remain and all objects above a declination of 30 are removed. Warning: the below cell takes around ~4 mins to run due to large file size.

In [None]:
path = os.path.abspath('gaia-mask-dr10.fits')

print("Loading file...")
hdu = fits.open(path, memmap=True)

print("File loaded. Assigning header and data...")
mask_header = hdu[1].header
mask_data = hdu[1].data

# check if sources in desi and isbright / is medium
print("Checking for bright/medium stars in DESI footprint...")
mask_data_cut = mask_data[(mask_data['isbright'] | mask_data['ismedium'])]

# drop unneeded columns first
print("Dropping unneeded columns...(Retaining ra, dec, radius)")
drop_fields = mask_data.names
allowed_fields = ['ra', 'dec', 'radius']
drop_fields = [field for field in drop_fields if field not in allowed_fields]
mask_data_cut = np.lib.recfunctions.rec_drop_fields(mask_data, drop_fields)

# Cut out all stars at declinations above DEC
dec = 30
print(f"Removing stars above DEC = {dec}")
mask_data_cut = mask_data_cut[mask_data_cut['dec'] < dec]

# close everything
print("Data cleaned!")
hdu.close()
del hdu

Divide the clean data into more manageable chunks by defining ra_divs and dec_divs for the number of divisions across the sky. We then save these chunks into separate compressed files to be combined and extracted from when generating the sky catalogue.

In [None]:
ra_divs = 2
dec_divs = 6

ra_ranges = np.linspace(0, 360, ra_divs)
dec_ranges = np.linspace(-90, 30, dec_divs)

print(ra_ranges)
print(dec_ranges)

mask_box = []

for ra in ra_ranges:
    ra_bounds = (mask_data_cut['ra'] > ra) & (mask_data_cut['ra'] < ra+ra_divs)
    
    for dec in dec_ranges:
        dec_bounds = (mask_data_cut['dec'] > dec) & (mask_data_cut['dec'] < dec+dec_divs)
        
        print(f"RA: {ra}, DEC: {dec}")
        mask_box.append(mask_data_cut[ra_bounds & dec_bounds])
        
for i, box in enumerate(mask_box):
    print(f"Saving mask_data_{i}")
    np.savez_compressed(f'mask_data_{i}', box)
    
print("Done")

## Mask Data File for Large Galaxies
Load downloaded file into workspace. Select objects with measured magntidues brighter than the 21 mag limit. Remove unneeded columns and convert galaxy major axis diameters to radii such such that only ra, dec, radius values remain in the dataframe. Save the resultant dataframe. Warning: the below cell takes a while to run due to large file size.

In [None]:
path=  os.path.abspath('SGA-ellipse-v3.0.kd.fits')

print("Loading file...")
hdu = fits.open(path, memmap=True)

print("File loaded. Assigning header and data...")
mask_header = hdu[1].header
mask_data = hdu[1].data

# check for unmeasured g magnitude values and extract rows with magnitude brighter than 21
print("Selecting rows with g magnitude < 21...")
array = mask_data['G_MAG_SB22']
non_neg_one = []
for i in range(0,len(array)):
    if array[i] > -1:
        non_neg_one.append(array[i])
mask_data_cut = mask_data[(mask_data['G_MAG_SB22']<=21) & (mask_data['G_MAG_SB22']>=0)|(mask_data['R_MAG_SB22']<=21) & (mask_data['R_MAG_SB22']>=0)|(mask_data['Z_MAG_SB22']<=21) & (mask_data['Z_MAG_SB22']>=0)]

print("Dropping unneeded columns...(Retaining ra, dec, diam)")
drop_fields = mask_data.names
allowed_fields = ['RA', 'DEC', 'DIAM']
drop_fields = [field for field in drop_fields if field not in allowed_fields]
mask_data_cut = np.lib.recfunctions.rec_drop_fields(mask_data_cut, drop_fields)

# Cut out all stars at declinations above dec = 30
dec = 30
print(f"Removing galaxies above DEC = {dec}")
mask_data_cut = mask_data_cut[mask_data_cut['DEC'] < dec]

# Halve the diameter values (in arcmin) to get the radius of the semi-major axis
mask_data_cut['DIAM']=mask_data_cut['DIAM']/2

# Change the name 'DIAM' to 'RADIUS'
mask_data_cut.dtype.names='RADIUS', 'RA', 'DEC'

# close everything
print("Data cleaned!")
hdu.close()
del hdu

# Generate mask data file
print("Saving mask_data_galaxies...")
np.savez_compressed(f'mask_data_galaxies', mask_data_cut)

## Mask Data File for Globular Clusters and Planetary Nebulae
Load downloaded file into the workspace. Do not conduct magnitude check, since all objects are brighter than 21 mag. Remove unneeded columns and retain only ra, dec, radius. Save the resultant dataframe.

In [None]:
path=os.path.abspath('NGC-star-clusters.fits')

print("Loading file...")
hdu = fits.open(path, memmap=True)

print("File loaded. Assigning header and data...")
mask_header = hdu[1].header
mask_data = hdu[1].data

# drop unneeded columns
print("Dropping unneeded columns...(Retaining ra, dec, radius)")
drop_fields = mask_data.names
allowed_fields = ['ra', 'dec', 'radius']
drop_fields = [field for field in drop_fields if field not in allowed_fields]
mask_data_cut = np.lib.recfunctions.rec_drop_fields(mask_data, drop_fields)

# Cut out all stars at declinations above DEC
dec = 30
print(f"Removing stars above DEC = {dec}")
mask_data_cut = mask_data_cut[mask_data_cut['dec'] < dec]

# close everything
print("Data cleaned!")
hdu.close()
del hdu

# generate file
print("Saving mask_data_clusters...")
np.savez(f'mask_data_clusters', mask_data_cut)

To speed up the cataloguing process, proactively remove object masks that fall within the Galactic Plane, LMC, or SMC. These regions are automatically designated as forbidden in the catalogue pipeline.

In [None]:
# reload clusters file and assign byte order
cluster_masks = []
with np.load(f"mask_data_clusters.npz", mmap_mode='r') as mask_data:
    mask_array = mask_data['arr_0']
    mask_array_byteswap = mask_array.byteswap().newbyteorder()
    masked_stars = pd.DataFrame(mask_array_byteswap)
    cluster_masks.append(masked_stars)

# remove old cluster masks file
print("Deleting mask_data_clusters...")
os.remove("mask_data_clusters.npz")

# create new dataframe to work with
mask_df = pd.DataFrame(cluster_masks[0])

# remove points in the magellanic clouds and galactic plane
print("Removing objects in Galactic Plane...")
c_icrs = SkyCoord(ra=mask_df['ra'], dec=mask_df['dec'], frame='icrs', unit='degree')
c_gal = c_icrs.galactic
for row, b in enumerate(c_gal.b.value):
    if abs(b) <= 19:
        mask_df = mask_df.drop(row)
mask_df = mask_df.reset_index(drop=True)

print("Removing objects in LMC and SMC...")
for n in range(len(mask_df)):
    if (mask_df['ra'][n] >= 76) and (mask_df['ra'][n] <= 86) and (mask_df['dec'][n] >= -76) and (mask_df['dec'][n] <= -64):
        mask_df = mask_df.drop(index=n)
    elif (mask_df['ra'][n] >= 11) and (mask_df['ra'][n] <= 16) and (mask_df['dec'][n] >= -76) and (mask_df['dec'][n] <= -70):
        mask_df = mask_df.drop(index=n)
mask_df = mask_df.reset_index(drop=True)

# save new mask data file
np.savez(f'mask_data_clusters', mask_df)