In [1]:
# Basic import necessary for configuration.
import os
import warnings
import requests

warnings.simplefilter("ignore", RuntimeWarning)

maindir = os.path.join(os.getcwd(), '../Data/')

# Set CRDS cache directory to user home if not already set.
if os.getenv('CRDS_PATH') is None:
    os.environ['CRDS_PATH'] = os.path.join(os.path.expanduser('~'), 'crds_cache')

# Check whether the CRDS server URL has been set. If not, set it.
if os.getenv('CRDS_SERVER_URL') is None:
    os.environ['CRDS_SERVER_URL'] = 'https://jwst-crds.stsci.edu'

# Output the current CRDS path and server URL in use.
print('CRDS local filepath:', os.environ['CRDS_PATH'])
print('CRDS file server:', os.environ['CRDS_SERVER_URL'])

CRDS local filepath: /home/fran/crds_cache
CRDS file server: https://jwst-crds.stsci.edu


In [2]:
# ----------------------General Imports----------------------
import time
import glob
import json
import itertools
import numpy as np
import pandas as pd

# --------------------Astroquery Imports---------------------
from astroquery.mast import Observations

# ----------------------Astropy Imports----------------------
# Astropy utilities for opening FITS files, downloading demo files, etc.
from astropy.table import Table
from astropy.stats import sigma_clip
from astropy.visualization import ImageNormalize, ManualInterval, LogStretch
from astropy.visualization import LinearStretch, AsinhStretch, simple_norm

# ----------------------Plotting Imports---------------------
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection

# -------------------File Download Imports-------------------
from tqdm.auto import tqdm
from multiprocessing import Pool, cpu_count
from functools import partial

In [3]:
# ----------------------JWST Calibration Pipeline Imports----------------------
import jwst  # Import the base JWST and CRDS packages.
import crds
from crds.client import api
from stpipe import crds_client

# JWST pipelines (each encompassing many steps).
from jwst.pipeline import Detector1Pipeline  # calwebb_detector1
from jwst.pipeline import Spec2Pipeline  # calwebb_spec2
from jwst.pipeline import Tso3Pipeline  # calwebb_tso3
from jwst.extract_1d import Extract1dStep  # Extract1D Step

# JWST pipeline utilities
from jwst import datamodels  # JWST pipeline utilities: datamodels.
from jwst.associations import asn_from_list  # Tools for creating association files.
from jwst.associations.lib.rules_level2b import Asn_Lv2SpecTSO
from jwst.associations.lib.rules_level3 import DMS_Level3_Base

# Check the default context for the Pipeline version
default_context = crds.get_default_context('jwst', state='build')
print("JWST Calibration Pipeline Version = {}".format(jwst.__version__))
print(f"Default CRDS Context for JWST Version {jwst.__version__}: {default_context}")
print(f"Using CRDS Context: {os.environ.get('CRDS_CONTEXT', default_context)}")

CRDS - INFO -  Calibration SW Found: jwst 1.18.0 (/home/fran/.local/lib/python3.13/site-packages/jwst-1.18.0.dist-info)


JWST Calibration Pipeline Version = 1.18.0
Default CRDS Context for JWST Version 1.18.0: jwst_1364.pmap
Using CRDS Context: jwst_1364.pmap


In [4]:
# --------------Program and observation information--------------
program = "01366"
sci_observtn = "003"
bg_observtn = None
filters = ["G395H"]

# ----------Define the base and observation directories----------
basedir = os.path.join(maindir, f'data_{program}')
sci_dir = os.path.join(basedir, f'Obs{sci_observtn}')
uncal_dir = os.path.join(sci_dir, 'uncal/')

os.makedirs(uncal_dir, exist_ok=True)

In [5]:
sci_obs_id_table = Observations.query_criteria(instrument_name=['NIRSPEC/SLIT'],
                                                provenance_name=["CALJWST"],
                                                obs_id=[f'*{program}*{sci_observtn}*'])
sci_downloads, asn_downloads = [], []

file_criteria = {'filters': filters, 'calib_level': [1],
                'productSubGroupDescription': ['UNCAL']}



In [6]:
def matches_criteria(prod, criteria):
    '''
    Check if a product matches the given criteria.
    :param prod: The product to check.
    :param criteria: The criteria to match against.
    :return: True if the product matches the criteria, False otherwise.
    '''

    for key, values in criteria.items():
        prod_value = prod.get(key)
        if prod_value is None:
            return False

        # For string matching inside string (e.g., 'UNCAL' in 'PRODUCT_UNCAL')
        if isinstance(prod_value, str):
            if not any(val in prod_value for val in values):
                return False
        # For list matching (e.g., ['F200W'] in ['F200W', 'F444W'])
        elif isinstance(prod_value, list):
            if not any(val in prod_value for val in values):
                return False
        else:
            # Direct equality (e.g., calib_level)
            if prod_value not in values:
                return False

    return True

matching_unique = [
    prod
    for exposure in sci_obs_id_table
    for prod in Observations.get_product_list(exposure)
    if matches_criteria(prod, file_criteria)
]

pd.DataFrame(matching_unique)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,87769340,JWST,timeseries,jw01366003001_04101_00001-seg001_nrs2,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg001_nrs2_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H
1,87769341,JWST,timeseries,jw01366003001_04101_00001-seg003_nrs1,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg003_nrs1_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H
2,87769342,JWST,timeseries,jw01366003001_04101_00001-seg002_nrs2,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg002_nrs2_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H
3,87769343,JWST,timeseries,jw01366003001_04101_00001-seg001_nrs1,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg001_nrs1_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H
4,87769344,JWST,timeseries,jw01366003001_04101_00001-seg003_nrs2,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg003_nrs2_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H
5,87769345,JWST,timeseries,jw01366003001_04101_00001-seg002_nrs1,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01366003001_04101_00001-se...,SCIENCE,--,UNCAL,--,CALJWST,--,1366,jw01366003001_04101_00001-seg002_nrs1_uncal.fits,1422204480,233644122,PUBLIC,1,F290LP;G395H


In [7]:
sci_downloads = [matching_unique[i]['dataURI'] for i in range(len(matching_unique))]

# Filter out other observations and remove duplicates.
sci_downloads = {f for f in sci_downloads if f"jw{program}{sci_observtn}" in f}

print(f"Science files selected for downloading: {len(sci_downloads)}")

Science files selected for downloading: 6


In [8]:
# Constants
BASE_URL = "https://mast.stsci.edu/api/v0.1/Download/file?uri="

# ---------- Worker function for one file ----------
def download_file(uri, target_dir):
    fname = os.path.basename(uri)
    target_path = os.path.join(target_dir, fname)

    # Skip if file already exists
    if os.path.exists(target_path):
        print(f"→ {fname} already exists. Skipping.")
        return

    # HEAD request to get size
    url = BASE_URL + uri
    head = requests.head(url)
    total_size = int(head.headers.get('Content-Length', 0))
    size_mb = total_size / (1024 * 1024)
    print(f"Downloading {fname} ({size_mb:.2f} MB)")

    # GET request with streaming & progress bar
    with requests.get(url, stream=True) as r, \
         open(target_path, 'wb') as f, \
         tqdm(total=total_size, unit='B', unit_scale=True, desc=fname) as pbar:

        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
                pbar.update(len(chunk))

    print(f"✔ Finished {fname}")

# ---------- Main Execution ----------
def main(sci_downloads, uncal_dir):
    # 1. Filter files that don't exist yet
    to_download = [uri for uri in sci_downloads
                   if not os.path.exists(os.path.join(uncal_dir, os.path.basename(uri)))]

    print(f"\nTotal files to download: {len(to_download)}\n")

    if not to_download:
        print("All files already present. Nothing to do.")
        return

    # 2. Set up multiprocessing pool
    num_workers = cpu_count()
    print(f"Using {num_workers} parallel workers.\n")

    # 3. Use partial to fix the target directory argument
    download_func = partial(download_file, target_dir=uncal_dir)

    with Pool(num_workers) as pool:
        pool.map(download_func, to_download)

# ---------- Example usage ----------
main(sci_downloads, uncal_dir)  # <-- Call this when you're ready



Total files to download: 6

Using 16 parallel workers.



Downloading jw01366003001_04101_00001-seg003_nrs1_uncal.fits (1356.32 MB)Downloading jw01366003001_04101_00001-seg003_nrs2_uncal.fits (1356.32 MB)

Downloading jw01366003001_04101_00001-seg001_nrs2_uncal.fits (1356.32 MB)
Downloading jw01366003001_04101_00001-seg002_nrs2_uncal.fits (1356.32 MB)
Downloading jw01366003001_04101_00001-seg001_nrs1_uncal.fits (1356.32 MB)
Downloading jw01366003001_04101_00001-seg002_nrs1_uncal.fits (1356.32 MB)


jw01366003001_04101_00001-seg003_nrs2_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

jw01366003001_04101_00001-seg001_nrs2_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

jw01366003001_04101_00001-seg003_nrs1_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

jw01366003001_04101_00001-seg002_nrs1_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

jw01366003001_04101_00001-seg002_nrs2_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

jw01366003001_04101_00001-seg001_nrs1_uncal.fits:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

✔ Finished jw01366003001_04101_00001-seg001_nrs2_uncal.fits
✔ Finished jw01366003001_04101_00001-seg001_nrs1_uncal.fits
✔ Finished jw01366003001_04101_00001-seg003_nrs1_uncal.fits
✔ Finished jw01366003001_04101_00001-seg002_nrs1_uncal.fits
✔ Finished jw01366003001_04101_00001-seg003_nrs2_uncal.fits
✔ Finished jw01366003001_04101_00001-seg002_nrs2_uncal.fits
