**Author:** Anowar Shajib

*With thanks to David Law.*

In this notebook, we process our data through the Spec2 pipeline to produce Lvl2b data products, including calibrated slope images, quick-look data cubes, and 1D spectra.

For more details, see the [Spec2 pipeline documentation](https://jwst-pipeline.readthedocs.io/en/latest/jwst/pipeline/calwebb_spec2.html).

During stage 2 of the pipeline, the countrate (slope) image products from stage 1, which have units of DN/s, are converted to units of surface brightness (MJy/sr) for both extended and point sources.

If pixel-based background subtraction is chosen, it will be applied during Spec2. This requires creating an association file to associate the background files with individual science files.

In [None]:
import pathlib

import numpy as np
from scipy.ndimage import median_filter
from tqdm import tqdm

from util import *

In [None]:
run_pipeline = True
subtract_leakcal_manual = True
do_extra_cleaning = True
subtract_pixel_based_background = False
# Pixel-based background subtraction in spec2 (direct pixel subtraction) -Deep background exposures needed to not add noise.

msa_leakage_file_ids = ["10101", "12101", "14101", "16101"]

In [None]:
# Define a function that will call the spec2 pipeline with our desired set of parameters
# We'll list the individual steps just to make it clear what's running


def run_spec2_pipeline(
    file_name, output_directory, no_cubes=False, is_background=False
):
    """
    Run the Spec2Pipeline on a given file.

    :param file_name: str, name of fits file to run pipeline on
    :param output_directory: str, path to output directory
    :param no_cubes: bool, if True, skip cube building and 1d spectral extraction
    :return: None
    :outputs:
    """
    print("Running Detector2Pipeline on {0:s}...".format(file_name))

    # Set default configuration from CRDS param reference files.
    # -This is required when running the pipeline in a function.
    crds_config = Spec2Pipeline.get_config_from_reference(file_name)
    spec2 = Spec2Pipeline.from_config_section(crds_config)

    spec2.output_dir = output_directory

    # Assign_wcs overrides
    # spec2.assign_wcs.override_distortion = 'myfile.asdf'
    # spec2.assign_wcs.override_regions = 'myfile.asdf'
    # spec2.assign_wcs.override_specwcs = 'myfile.asdf'
    # spec2.assign_wcs.override_wavelengthrange = 'myfile.asdf'

    # Background overrides were set up above
    if subtract_pixel_based_background:
        spec2.bkg_subtract.skip = False
    else:
        spec2.bkg_subtract.skip = True

    # Flatfield overrides
    # spec2.flat_field.override_flat = 'myfile.fits'

    # Straylight overrides
    # spec2.straylight.override_mrsxartcorr = 'myfile.fits'

    # Fringe overrides
    # spec2.fringe.override_fringe = 'myfile.fits'

    # Photom overrides
    # spec2.photom.override_photom = 'myfile.fits'

    # Cubepar overrides
    # spec2.cube_build.override_cubepar = 'myfile.fits'

    # Extract1D overrides
    # spec2.extract_1d.override_extract1d = 'myfile.asdf'
    # spec2.extract_1d.override_apcorr = 'myfile.asdf'

    # Overrides for whether or not certain steps should be skipped
    # spec2.assign_wcs.skip = False
    # spec2.bkg_subtract.skip = True
    # spec2.flat_field.skip = False
    # spec2.srctype.skip = False
    # spec2.straylight.skip = False
    # spec2.fringe.skip = False
    # spec2.photom.skip = False
    # spec2.residual_fringe.skip = False #does a residual fringe correction across the entire cube
    # spec2.cube_build.skip = False
    # spec2.extract_1d.skip = False
    spec2.nsclean.skip = True

    # spec2.flat_field.override_sflat = "./jwst_nirspec_sflat_0194_cleaned.fits"

    # Run pixel replacement code to extrapolate values for otherwise bad pixels.
    # This can help mitigate small 5-10% negative dips in spectra of bright sources.
    # spec2.pixel_replace.skip = True
    # spec2.pixel_replace.algorithm='mingrad'

    # This nocubes option allows us to skip the cube building and 1d spectral extraction for individual
    # science data frames, but run it for the background data (as the 1d spectra are needed later
    # for the master background step in Spec3)
    if no_cubes:
        spec2.cube_build.skip = True
        spec2.extract_1d.skip = True

    if is_background:
        spec2.cube_build.skip = True
        spec2.extract_1d.skip = True
        spec2.bkg_subtract.skip = True

    # Some cube building options
    # spec2.cube_build.weighting='drizzle'
    # spec2.cube_build.coord_system='ifualign' # If aligning cubes with IFU axes instead of sky

    spec2.save_results = True
    spec2(file_name)

In [None]:
if subtract_leakcal_manual:
    # Make a median leak image
    all_files = np.array(
        # sorted(glob.glob(rate_file_directory_for_processing + "*nrs1_rate.fits"))
        sorted(glob.glob(stage1_nsclean_directory + "*nrs1_rate.*fits"))
    )
    leak_files = []
    for file in all_files:
        for msa_leakage_file_id in msa_leakage_file_ids:
            if msa_leakage_file_id in file:
                leak_files.append(file)
                break

    print("LeakCal files:")
    print(leak_files)

    # Read in all the leak files
    big_array = np.zeros([len(leak_files), 2048, 2048])
    for i, leak_file in enumerate(leak_files):
        with fits.open(leak_file) as hdu:
            big_array[i, :, :] = hdu["SCI"].data

    median_leak = np.nanmedian(big_array, axis=0)

    # Clean up the leak image
    leak2 = median_leak.copy()

    # First mask anything with counts > 0.5 DN/s
    leak2[median_leak > 0.5] = np.nanmedian(leak2)

    # Then deal with NaNs
    leak2[np.isnan(leak2)] = np.nanmedian(leak2)

    # Then apply a horizontal median filter to take out RMS noise
    leak2 = median_filter(leak2, size=(1, 51))

    # Subtract leak image from science files
    sci_files = []
    for file in all_files:
        if file not in leak_files:
            sci_files.append(file)

    for sci_file in sci_files:
        with fits.open(sci_file) as hdu:
            sci = hdu["SCI"].data
            sci = sci - leak2
            hdu["SCI"].data = sci
            name = pathlib.Path(sci_file).name
            hdu.writeto(stage2_leakcal_directory + name, overwrite=True)

In [None]:
rate_files = sorted(glob.glob(stage2_leakcal_directory + "/*nrs1_rate.*fits"))

len(rate_files)

In [None]:
# Step through each of the science files, using relevant associated backgrounds in spec2 processing
# The background files are used in this step to perform pixel-based background subtraction (if desired)
# Otherwise Background subtraction is done later with Spec3 files
if run_pipeline:
    for file in tqdm(rate_files):
        if "01794002001" in file:
            is_background = True
        else:
            is_background = False

        run_spec2_pipeline(
            file,
            output_directory=stage2_directory,
            no_cubes=True,
            is_background=is_background,
        )
else:
    print("Skipping Spec2 processing for SCI data")

# Extra processing to flag bad pixels again

In [None]:
if do_extra_cleaning:
    files = np.array(sorted(glob.glob(stage2_directory + "/*nrs1_cal.fits")))

    for file in files:
        with fits.open(file) as hdu:
            sci = hdu["SCI"].data

            # Flag positive pixels
            temp = sci.copy()
            temp[np.isfinite(temp) != True] = 0.0
            sci2 = median_filter(temp, size=(1, 11))
            diff = temp - sci2 * 2
            index = np.where(diff > 100)
            sci[index] = np.nan
            # Grow by 1 pixel in both directions
            y_index = index[1]
            x_index = index[0]
            n_index = len(y_index)
            for i in range(n_index):
                sci[x_index[i] - 1, y_index[i]] = np.nan
                sci[x_index[i] + 1, y_index[i]] = np.nan
                sci[x_index[i], y_index[i] - 1] = np.nan
                sci[x_index[i], y_index[i] + 1] = np.nan
                for u in range(-1, 2):
                    for v in range(-1, 2):
                        sci[x_index[i] + u, y_index[i] + v] = np.nan

            # Flag negative pixels
            temp = sci.copy()
            temp[np.isfinite(temp) != True] = 0.0
            sci2 = median_filter(temp, size=(1, 11))
            diff = temp - sci2 * 0.5
            index = np.where(diff < -100)
            sci[index] = np.nan
            # Grow by 1 pixel in both directions
            y_index = index[1]
            x_index = index[0]
            n_index = len(y_index)
            for i in range(n_index):
                sci[x_index[i] - 1, y_index[i]] = np.nan
                sci[x_index[i] + 1, y_index[i]] = np.nan
                sci[x_index[i], y_index[i] - 1] = np.nan
                sci[x_index[i], y_index[i] + 1] = np.nan
                for u in range(-1, 2):
                    for v in range(-1, 2):
                        sci[x_index[i] + u, y_index[i] + v] = np.nan

            hdu["SCI"].data = sci
            name = pathlib.Path(file).name
            hdu.writeto(stage2_processed_directory + name, overwrite=True)