In [6]:
# -*- coding: utf-8 -*-
# Notebook Setup Cell

import argparse
import os
import sys
import glob
import time
import numpy as np
import pandas as pd
import yaml
from datetime import datetime, timedelta
import logging

# Astropy imports
from astropy.time import Time, TimeDelta
from astropy.coordinates import SkyCoord, Angle, EarthLocation
import astropy.units as u
from astropy.table import Table
from astropy.io import fits
from astropy.wcs import WCS

# --- IMPORTANT: Adjust sys.path if needed ---
# If your notebook is NOT in the same directory as the 'pipeline' folder,
# add the parent directory to the path so Python can find the modules.
# Example: If notebook is in '/data/jfaber/dsa110-contimg/' and pipeline
# modules are in '/data/jfaber/dsa110-contimg/pipeline/', this should work.
# If notebook is elsewhere, adjust the path accordingly.
pipeline_parent_dir = '/data/jfaber/dsa110-contimg/' # Adjust if needed
if pipeline_parent_dir not in sys.path:
    sys.path.insert(0, pipeline_parent_dir)

# Pipeline module imports
try:
    from pipeline import config_parser
    from pipeline import pipeline_utils
    from pipeline import ms_creation
    from pipeline import calibration
    from pipeline import skymodel
    from pipeline import imaging
    from pipeline import mosaicking
    from pipeline import photometry
    from pipeline import dsa110_utils # Needed for location
except ImportError as e:
    print(f"ERROR: Failed to import pipeline modules. Check sys.path.")
    print(f"Current sys.path: {sys.path}")
    raise e

# pyuvdata needed for reading header
try:
    from pyuvdata import UVData
    pyuvdata_available = True
except ImportError:
     print("ERROR: pyuvdata is required to read HDF5 metadata.")
     pyuvdata_available = False # Script will likely fail later

# --- Define Paths and Parameters ---
# These replace command-line arguments
CONFIG_PATH = 'config/pipeline_config.yaml' # Relative path from notebook location
HDF5_DIR = '/data/incoming/'
# Optional: Force a specific BPCAL name for testing, otherwise set to None
BCAL_NAME_OVERRIDE = None
VERBOSE_LOGGING = True # Set True for DEBUG level, False for INFO

# --- Setup Logging ---
# Load config minimally just to get log path
try:
    with open(CONFIG_PATH, 'r') as f:
        temp_config = yaml.safe_load(f)
    log_dir = temp_config.get('paths', {}).get('log_dir', 'logs')
    # Resolve log_dir relative to pipeline parent dir if needed
    if not os.path.isabs(log_dir):
        log_dir = os.path.join(pipeline_parent_dir, log_dir)
    os.makedirs(log_dir, exist_ok=True)
    log_level = logging.DEBUG if VERBOSE_LOGGING else logging.INFO
    # Setup logger (might print duplicate messages if run multiple times in notebook kernel)
    logger = pipeline_utils.setup_logging(log_dir, config_name=f"notebook_test_{datetime.now().strftime('%H%M%S')}")
    logger.setLevel(log_level)
    # Suppress overly verbose CASA logs if desired
    # from casatasks import casalog
    # casalog.filter('INFO') # Filter to show only INFO and above for CASA tasks
    logger.info("Setup cell executed.")
except Exception as e:
    print(f"ERROR during setup: {e}")
    # Stop execution if setup fails
    raise RuntimeError("Setup failed")

2025-05-09 18:14:45 [INFO ] [MainThread] [root] CASA log file set to: /data/jfaber/dsa110-contimg/../logs/casa_20250509_181445.log
2025-05-09 18:14:45 [INFO ] [MainThread] [root] Pipeline logging configured. Log file: /data/jfaber/dsa110-contimg/../logs/notebook_test_181445_20250509_181445.log
2025-05-09 18:14:45 [INFO ] [MainThread] [root] Setup cell executed.


In [None]:
# Notebook Cell: Helper Function Definitions

# --- Copy the function definitions from test_pipeline_10min.py ---
# get_obs_declination(config, hdf5_dir)
# select_bcal_for_test(config, fixed_dec_deg, bcal_name_override=None)
# calculate_next_transit(bcal_info, telescope_loc)
# find_hdf5_chunks_around_time(config, hdf5_dir, target_time)
# --- Make sure they use 'logging.' instead of 'logger.' if logger wasn't passed ---
# Or modify them to accept logger as an argument

# Example (showing one function, copy others similarly):
def get_obs_declination(config, hdf5_dir):
    """Reads the fixed declination from an arbitrary HDF5 file's metadata."""
    if not pyuvdata_available: return None
    logging.info("Attempting to determine observation declination from HDF5 metadata...")
    try:
        pattern = os.path.join(hdf5_dir, "20*_sb00.hdf5")
        hdf5_files = glob.glob(pattern)
        if not hdf5_files:
            raise FileNotFoundError(f"No '*_sb00.hdf5' files found in {hdf5_dir} to read metadata.")
        uvd = UVData()
        logging.debug(f"Reading metadata from: {hdf5_files[0]}")
        uvd.read(hdf5_files[0], read_data=False)
        fixed_dec_rad = uvd.extra_keywords['phase_center_dec']
        fixed_dec_deg = np.rad2deg(fixed_dec_rad)
        logging.info(f"Determined observation Declination: {fixed_dec_deg:.4f} degrees")
        return fixed_dec_deg
    except KeyError:
        logging.error(f"Metadata key 'phase_center_dec' not found in {hdf5_files[0]}. Cannot determine Dec.")
        return None
    except Exception as e:
        logging.error(f"Failed to read HDF5 metadata to determine Declination: {e}", exc_info=True)
        return None

# --- PASTE THE OTHER HELPER FUNCTIONS HERE ---
# select_bcal_for_test(...)
# calculate_next_transit(...)
# find_hdf5_chunks_around_time(...)

logging.info("Helper functions defined.")

In [None]:
# Notebook Cell: Load Config and Auto-Select Data

config = config_parser.load_config(CONFIG_PATH)
if not config:
    raise ValueError("Failed to load configuration.")

# Ensure HDF5 handling is correct for test
config['services']['hdf5_post_handle'] = 'none'
logging.info("Ensuring HDF5 post_handle is set to 'none' for test.")

# --- Stage 0: Determine Dec, Select BPCAL, Find Chunks ---
logging.info("--- Stage 0: Determine Dec, Select BPCAL, Find Chunks ---")
fixed_dec_deg = get_obs_declination(config, HDF5_DIR)
if fixed_dec_deg is None: raise RuntimeError("Failed to get observation declination.")
config['calibration']['fixed_declination_deg'] = fixed_dec_deg # Update in memory

selected_bcal_info = select_bcal_for_test(config, fixed_dec_deg, BCAL_NAME_OVERRIDE)
if selected_bcal_info is None: raise RuntimeError("Failed to select BPCAL for test.")

transit_time = calculate_next_transit(selected_bcal_info, utils_dsa110.loc_dsa110)
if transit_time is None: raise RuntimeError("Failed to calculate transit time.")

hdf5_files_1, hdf5_files_2, start_time_1, start_time_2 = find_hdf5_chunks_around_time(config, HDF5_DIR, transit_time)
if not hdf5_files_1 or not hdf5_files_2: raise RuntimeError("Failed to find HDF5 chunks around transit time.")

ts1_str = start_time_1.strftime("%Y%m%dT%H%M%S")
ts2_str = start_time_2.strftime("%Y%m%dT%H%M%S") # This is the transit chunk

logging.info(f"Selected HDF5 sets for processing: {ts1_str} and {ts2_str}")

# Store paths for next cell
%store ts1_str ts2_str hdf5_files_1 hdf5_files_2 selected_bcal_info config

In [None]:
# Notebook Cell: MS Creation
%store -r ts1_str ts2_str hdf5_files_1 hdf5_files_2 selected_bcal_info config # Load variables

logging.info("--- Stage 1: MS Creation ---")
ms_path_1 = ms_creation.process_hdf5_set(config, ts1_str, hdf5_files_1)
ms_path_2 = ms_creation.process_hdf5_set(config, ts2_str, hdf5_files_2)

if not ms_path_1 or not ms_path_2:
    raise RuntimeError("MS Creation failed for one or both chunks.")

logging.info(f"Created MS files: {os.path.basename(ms_path_1)}, {os.path.basename(ms_path_2)}")
ms_files_to_process = [ms_path_1, ms_path_2]

# Store paths for next cell
%store ms_files_to_process selected_bcal_info config

In [None]:
# Notebook Cell: Calibration and Imaging
%store -r ms_files_to_process selected_bcal_info config # Load variables

logging.info("--- Stage 2: Calibration and Imaging ---")
processed_images = []
processed_pbs = []
block_mask_path = None
template_image_path = None
gcal_table_path = None
cl_path_bcal = None
paths_config = config['paths'] # Get paths config

# 2a. Find latest BPCAL table
try:
    cal_tables_dir = paths_config['cal_tables_dir']
    bcal_files = sorted(glob.glob(os.path.join(cal_tables_dir, "*.bcal")))
    if not bcal_files: raise RuntimeError(f"No BPCAL tables (*.bcal) found in {cal_tables_dir}.")
    latest_bcal_table = bcal_files[-1]
    logging.info(f"Using BPCAL table: {os.path.basename(latest_bcal_table)}")
except Exception as e:
    logging.critical(f"Failed to find BPCAL table: {e}. Aborting test.")
    raise e # Stop execution

# 2b. Generate Calibrator Model & Gain Cal Table (using transit chunk only)
try:
    skymodels_dir = paths_config['skymodels_dir']
    cl_bcal_filename = f"bcal_sky_{selected_bcal_info['name']}_test.cl" # Add suffix
    cl_bcal_output_path = os.path.join(skymodels_dir, cl_bcal_filename)
    cl_path_bcal, _ = skymodel.create_calibrator_component_list(config, selected_bcal_info, cl_bcal_output_path)
    if not cl_path_bcal: raise RuntimeError("Failed to create BPCAL sky model.")

    # Use the second MS (transit chunk) for gain cal
    ms_path_transit = ms_files_to_process[1]
    ts_transit = os.path.basename(ms_path_transit).split('_')[1].replace('.ms', '')
    logging.info(f"Performing gain calibration on transit chunk: {os.path.basename(ms_path_transit)}")
    gcal_time_str = f"bcal_test_{ts_transit}"
    gcal_table_path = calibration.perform_gain_calibration(config, [ms_path_transit], cl_path_bcal, gcal_time_str, solint='inf')
    if not gcal_table_path: raise RuntimeError("Gain calibration on BPCAL failed.")
    logging.info(f"Gain table generated: {os.path.basename(gcal_table_path)}")
except Exception as e:
    logging.error(f"Failed during gain calibration setup stage: {e}", exc_info=True)
    logging.warning("Proceeding without gain calibration solutions.")
    gcal_table_path = []

# 2c. Prepare Mask (using BPCAL model, defer creation until template exists)
use_mask_config = config.get('imaging',{}).get('use_clean_mask', False)
mask_output_path = None
if use_mask_config and cl_path_bcal:
    mask_output_path = os.path.join(skymodels_dir, f"mask_bcal_test_{selected_bcal_info['name']}.mask")
    logging.info(f"Will attempt to create mask: {mask_output_path}")
else:
    logging.info("Masking disabled or BPCAL model missing, skipping mask.")
mask_created = False

# 2d. Loop through MS files
images_dir = paths_config['images_dir']
for i, ms_path in enumerate(ms_files_to_process):
    logging.info(f"Processing MS {i+1}/{len(ms_files_to_process)}: {os.path.basename(ms_path)}")
    ms_base = os.path.splitext(os.path.basename(ms_path))[0]
    image_base = os.path.join(images_dir, f"{ms_base}_test")

    try:
        if not calibration.flag_rfi(config, ms_path): raise RuntimeError("RFI Flagging failed.")
        if not calibration.flag_general(config, ms_path): raise RuntimeError("General Flagging failed.")

        gcal_list = [gcal_table_path] if gcal_table_path and isinstance(gcal_table_path, str) else []
        if not calibration.apply_calibration(config, ms_path, latest_bcal_table, gcal_list):
            raise RuntimeError("ApplyCal failed.")

        ms_to_image = ms_path
        current_mask_path = None
        if use_mask_config and mask_output_path:
            if not mask_created:
                if template_image_path:
                    logging.info(f"Creating block mask {mask_output_path} using template {template_image_path}")
                    if imaging.create_clean_mask(config, cl_path_bcal, template_image_path, mask_output_path):
                        mask_created = True
                    else: logging.warning("Failed to create mask. Proceeding without.")
                else: logging.debug("Template image not yet available for mask creation.")
            if mask_created: current_mask_path = mask_output_path

        logging.info("Running tclean...")
        tclean_image_basename = imaging.run_tclean(config, ms_to_image, image_base, cl_path=None, mask_path=current_mask_path)

        if tclean_image_basename:
            img_path = f"{tclean_image_basename}.image"
            pb_path = f"{tclean_image_basename}.pb"
            if os.path.exists(img_path) and os.path.exists(pb_path):
                processed_images.append(img_path); processed_pbs.append(pb_path)
                logging.info(f"Successfully imaged {ms_path}")
                if template_image_path is None: template_image_path = img_path
            else: raise RuntimeError(f"tclean image/pb missing for {tclean_image_basename}")
        else: raise RuntimeError("tclean failed.")

    except Exception as e_ms:
        logging.error(f"Failed processing MS {ms_path}: {e_ms}", exc_info=True)
        raise e_ms # Stop execution on failure

# Store results for next cell
%store processed_images processed_pbs config selected_bcal_info ts1_str ts2_str

In [None]:
# Notebook Cell: Mosaicking
%store -r processed_images processed_pbs config selected_bcal_info ts1_str ts2_str # Load variables

mosaic_img_path = None
if len(processed_images) == 2:
    logging.info("--- Stage 3: Mosaicking ---")
    # Use timestamps from original chunks for naming
    mosaic_basename = f"mosaic_test_{ts1_str}_{ts2_str}"
    try:
        mosaic_img_path, _ = mosaicking.create_mosaic(config, processed_images, processed_pbs, mosaic_basename)
        if not mosaic_img_path: raise RuntimeError("Mosaicking function returned None.")
        logging.info(f"Mosaic created: {mosaic_img_path}")
        # Store for next cell
        %store mosaic_img_path config selected_bcal_info ts1_str ts2_str
    except Exception as e_mosaic:
        logging.error(f"Mosaicking failed: {e_mosaic}", exc_info=True)
        raise e_mosaic # Stop execution
else:
    raise RuntimeError(f"Could not proceed to mosaicking: Only {len(processed_images)} images were created.")

In [None]:
# Notebook Cell: Photometry
%store -r mosaic_img_path config selected_bcal_info ts1_str ts2_str # Load variables

if mosaic_img_path:
    logging.info("--- Stage 4: Photometry ---")
    mosaic_fits_path = f"{os.path.splitext(mosaic_img_path)[0]}.linmos.fits"
    if not os.path.exists(mosaic_fits_path):
         logging.warning(f"Mosaic FITS {mosaic_fits_path} not found, attempting export...")
         mosaic_fits_path = imaging.export_image_to_fits(config, mosaic_img_path, suffix='.linmos')

    if mosaic_fits_path and os.path.exists(mosaic_fits_path):
        logging.info(f"Running photometry on mosaic: {mosaic_fits_path}")
        try:
            targets, references = photometry.identify_sources(config, mosaic_fits_path)
            # Convert to pandas DataFrames for easier handling below
            phot_targets_df = pd.DataFrame(targets) if targets is not None else pd.DataFrame()
            phot_references_df = pd.DataFrame(references) if references is not None else pd.DataFrame()

            # Add BPCAL to targets list if not already there
            if selected_bcal_info and selected_bcal_info['name'] not in phot_targets_df['name'].values:
                 try:
                      bcal_coord = SkyCoord(ra=selected_bcal_info['ra'], dec=selected_bcal_info['dec'], unit=(u.hourangle, u.deg), frame='icrs')
                      with fits.open(mosaic_fits_path) as hdul: wcs = WCS(hdul[0].header).celestial
                      xpix, ypix = wcs.world_to_pixel(bcal_coord)
                      # Create row ensuring necessary columns exist
                      bcal_row_data = {'name': selected_bcal_info['name'], 'source_id': selected_bcal_info['name'],
                                      'RAJ2000': selected_bcal_info['ra'], 'DEC_J2000': selected_bcal_info['dec'],
                                      'xpix': xpix, 'ypix': ypix}
                      for col in phot_targets_df.columns:
                           if col not in bcal_row_data: bcal_row_data[col] = np.nan
                      phot_targets_df = pd.concat([phot_targets_df, pd.DataFrame([bcal_row_data])], ignore_index=True)
                      logging.info(f"Added BPCAL {selected_bcal_info['name']} to target list for photometry.")
                 except Exception as e_add: logging.warning(f"Could not add BPCAL to target list: {e_add}")


            if not phot_targets_df.empty and not phot_references_df.empty:
                phot_table = photometry.perform_aperture_photometry(config, mosaic_fits_path, phot_targets_df, phot_references_df)
                if phot_table is not None:
                    rel_flux_table = photometry.calculate_relative_fluxes(config, phot_table) # Assumes returns DF
                    if rel_flux_table is not None:
                        logging.info("Photometry successful. Relative flux results:")
                        print("\n--- Relative Photometry Results ---")
                        # Display relevant columns using pandas display
                        display_cols = ['source_id', 'relative_flux', 'relative_flux_error', 'median_reference_flux', 'reference_source_ids']
                        # Ensure columns exist before displaying
                        display_cols = [col for col in display_cols if col in rel_flux_table.columns]
                        display(rel_flux_table[display_cols]) # Use IPython display

                        # Save to a test CSV
                        test_output_csv = os.path.join(config['paths']['photometry_dir'], f"test_photometry_{ts1_str}_{ts2_str}.csv")
                        rel_flux_table.to_csv(test_output_csv, index=False, float_format='%.4f', na_rep='NaN')
                        logging.info(f"Saved test photometry results to: {test_output_csv}")
                    else: logging.error("Relative flux calculation failed.")
                else: logging.error("Aperture photometry failed.")
            elif phot_targets_df.empty: logging.warning("No target sources identified/valid for photometry.")
            else: logging.error("Reference source identification failed or references missing.")
        except Exception as e_phot: logging.error(f"Photometry stage failed: {e_phot}", exc_info=True)
    else: logging.error(f"Mosaic FITS file missing: {mosaic_fits_path}. Cannot run photometry.")

logging.info("--- Notebook Test Run Finished ---")