In [None]:
DL_PATH_NAME = 'mastDownload/flat/'
SLIT_NAME = 's00066'
PROPOSAL_ID = 2609
REDUX_BASE_NAME = 'miscdata/jwst_outputs/'
TARGET_NAME = 'NGC6791-NEWCATALOG-GAIAEDR3' # If None, matches against the whole program
CONTENT_DIR = '/containerapp/content'

In [None]:
import re
import os
import json
import datetime
from pathlib import Path
from copy import deepcopy

import numpy as np

from astroquery.mast import Observations

from astropy.io import fits
from astropy import table

from jwst import datamodels
from jwst.pipeline import Spec2Pipeline

%matplotlib inline
from matplotlib import pyplot as plt

The below `chdir` command makes it so that all paths are relative to the content directory, regardless of where this particular notebook is

In [None]:
os.chdir(CONTENT_DIR)

In [None]:
flat_dl_root = Path(f'{DL_PATH_NAME}')
flat_dl_root.mkdir(parents=True, exist_ok=True)

The `login` below might succeed even if a token is invalid - it just means you wont have proprietary access. But you will see a warning in the cell output if this happens.

In [None]:
Observations.login(os.environ['MAST_TOKEN'])

# Download all the necessary files for the particular target

In [None]:
obses_table_path = flat_dl_root / f'obses{PROPOSAL_ID}.ecsv'
if not obses_table_path.exists():
    obses = Observations.query_criteria(proposal_id=PROPOSAL_ID)
    obses.write(obses_table_path, format='ascii.ecsv')
obses = table.Table.read(obses_table_path, format='ascii.ecsv')

obses

In [None]:
specs3 = obses[(obses['calib_level']==3) & (obses['dataproduct_type'] != 'image')]

msk = []
for row in specs3:
    m = re.match('jw.*_(s.*?)_.*', row['obs_id'])
    match = m and m.group(1) == SLIT_NAME
    if TARGET_NAME is not None:
        match = match and (row['target_name']==TARGET_NAME)
    msk.append(match)
    
spec_tab = specs3[np.array(msk)]

spec_tab

In [None]:
spec_prods = Observations.get_product_list(spec_tab)
spec_prods

The below cell is purely informational, it doesn't actually need to be present to run the pipeline.

In [None]:
spec3_asn_prod = spec_prods[(spec_prods['productSubGroupDescription']=='ASN') & (spec_prods['type']=='D')]
assert len(spec3_asn_prod) == 1

spec3_asn_path = flat_dl_root / spec3_asn_prod['productFilename'][0]
dl_result = Observations.download_file(spec3_asn_prod['dataURI'][0], local_path=spec3_asn_path)

with spec3_asn_path.open() as f:
    spec3_asn = json.load(f)
spec3_asn

In [None]:
spec2_asn_prods = spec_prods[(spec_prods['productSubGroupDescription']=='ASN') & (np.char.find(spec_prods['productFilename'], 'spec2')!=-1)]

spec2_asns = {}
for prod in spec2_asn_prods:
    spec2_asn_path = flat_dl_root / prod['productFilename']
    Observations.download_file(prod['dataURI'], local_path=spec2_asn_path)

    spec2_asns[prod['productFilename']] = json.load(spec2_asn_path.open())

In [None]:
to_dl_fns = []
for asn in spec2_asns.values():
    assert len(asn['products']) == 1
    for memb in asn['products'][0]['members']:
        if memb['exptype'] in ('science', 'background'):
            to_dl_fns.append(memb['expname'])
to_dl_fns

In [None]:
fn_to_uri = {}
for fn in to_dl_fns:
    matches = spec_prods[spec_prods['productFilename'] == fn]
    assert len(matches)>0,f"no files found with name {fn}!  This shouldn't happen, freaking out"
    assert len(np.unique(matches['dataURI'])) == 1, "two different dataURIs match!  This shouldn't happen, freaking out"
    fn_to_uri[fn] = matches[0]['dataURI']
    
fn_to_uri

In [None]:
#papermill_description=spec2_input_download

input_paths = []
for nm, uri in fn_to_uri.items():
    res = Observations.download_file(uri, local_path=flat_dl_root / nm)
    if res[0] == 'COMPLETE':
        input_paths.append(flat_dl_root / nm)
    else:
        input_paths.append(None)
        
assert all([pth is not None for pth in input_paths])

input_paths

This might be overkill, but just assume we need all of the MSAs in the matched product set

In [None]:
#papermill_description=MSA_download

for row in spec_prods[spec_prods['productSubGroupDescription']=='MSA']:
    Observations.download_file(row['dataURI'], local_path=flat_dl_root / row['productFilename'])

# Set up the pipeline

In [None]:
os.environ.setdefault('CRDS_PATH', '/containerapp/crds_cache')
os.environ['CRDS_SERVER_URL'] = 'https://jwst-crds.stsci.edu'

In [None]:
redux_base = Path(REDUX_BASE_NAME) 
redux_path = redux_base / f'redux_{SLIT_NAME}'

redux_path.mkdir(exist_ok=True)

In [None]:
def setup_paths(asn_fn):
    asn = spec2_asns[asn_fn]
    
    asn = deepcopy(asn)  # this is not necessary if we are not modifying, but is safe just in case
    assert len(asn['products']) == 1
    
    msas = []
    prod = asn['products'][0]
    for member in prod['members']:
        link = redux_path / member['expname']
        target = flat_dl_root / member['expname']
        if not link.is_symlink():
            link.symlink_to(target.resolve())
        msas.append(fits.getheader(link).get('MSAMETFL', None))
    
    for msa in msas:
        if msa is not None:
            link = redux_path / msa
            target = flat_dl_root / msa
            if not link.is_symlink():
                link.symlink_to(target.resolve())
                
    with (redux_path / asn_fn).open('w') as f:
        json.dump(asn, f)

def setup_spec2d():
    spec2 = Spec2Pipeline()
    spec2.save_results = True
    spec2.output_dir = str(redux_path)
    
    spec2.srctype.source_type = 'POINT'
    spec2.flat_field.save_interpolated_flat = True
    
    spec2.bkg_subtract.skip = False
    spec2.resample_spec.skip = True
    spec2.extract_1d.skip = True
    
    spec2.assign_wcs.slit_y_high *= 2
    spec2.assign_wcs.slit_y_low *= 2
    
    return spec2

In [None]:
for asn_fn in spec2_asns:
    setup_paths(asn_fn)

In [None]:
#papermill_description=Running_spec2

outputs = []
dts = []
for asn_fn, asn in spec2_asns.items():
    print(f'\n\n\nSTARTING redux for {asn_fn}')
    dts.append((asn_fn, datetime.datetime.utcnow()))

    assert len(asn['products']) == 1
    expected_outputs = [asn['products'][0]['name'] + '_cal.fits', 
                        asn['products'][0]['name'] + '_interpolatedflat.fits']

    if all((redux_base / outname).exists() for outname in expected_outputs):
        print("Ouputs", expected_outputs, 'already exist, symlinking')
        for outname in expected_outputs:
            output = redux_path / outname
            target = redux_base / outname
            if output.is_file():
                print(output, 'already exists, not symlinking')
                outputs.append((output, 'real'))
            else:
                output.symlink_to(os.path.relpath(target, output.parent))
                outputs.append((output, 'symlink'))
                
    else:
        # we do this step to "reserve" the outputs, because otherwise another process might try to create it while we are waiting for this one to finish
        for outname in expected_outputs:
            target = redux_base / outname
            with target.open('w') as f:
                f.write('placeholder')
            
        spec2 = setup_spec2d()
        result = spec2(redux_path / asn_fn)

        for outname in expected_outputs:
            output = redux_path / outname
            target = redux_base / outname
            
            if not output.is_file():
                raise IOError(f'Output {output} is missing! this is highly irregular...')
                
            if target.exists():
                # critical because the "reserved" outputs need to be removed
                target.unlink()
            target.symlink_to(output.relative_to(target.parent))

            outputs.append((output, 'real'))

dts.append(('Done', datetime.datetime.utcnow()))
with open(redux_path / 'completed_pipeline_run', 'w') as f:
    for label, dt in dts:
        dtstr = dt.strftime('%Y_%m_%dZ%H_%M_%S')
        f.write(f'{label},{dtstr}\n')

In [None]:
for o in outputs:
    print(o)