In [1]:
from astropy.io import fits
from astropy import stats
from pyvo.dal import tap
from astropy.time import Time
import numpy as np

In [2]:
### DEFINE CONSTANTS AND PATHS ###

input_path = "../data/raw/" # Path to the input data files
output_path = "../data/processed/" # Path to the output data files

ORIGFILE = "cra-extended-map2.fits" # Original file name

PROG_ID = "081.C-0204(A)" # Program ID
REFERENC = "2011ApJ...736..137S" # Reference for the data

In [3]:
### PROCESSING THE DATA ###

# Open the FITS file and extract header and data
hdu = fits.open(f"{input_path}{ORIGFILE}")
hdr = hdu[0].header
data = hdu[0].data

# Get the object name from the header
OBJECT = hdu[0].header["OBJECT"]

# Make TAP query to get observation details
def tap_query(query, ESO_TAP_OBS="https://archive.eso.org/tap_obs"):
    """Function to perform a TAP query to the ESO archive."""
    tapobs = tap.TAPService(ESO_TAP_OBS)
    result = tapobs.search(query=query, maxrec=1000).to_qtable()
    return result

query = f"""SELECT dp_id, exposure, prog_id, object, dp_tech, instrument, ra, dec, exp_start, origfile
            FROM dbo.raw
            WHERE dp_id like 'APEXBOL.%%'
                AND object = '{OBJECT}'
                AND prog_id = '{PROG_ID}'
                AND dp_cat = 'SCIENCE'""" 

result = tap_query(query)
result

dp_id,exposure,prog_id,object,dp_tech,instrument,ra,dec,exp_start,origfile
Unnamed: 0_level_1,s,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,deg,deg,Unnamed: 8_level_1,Unnamed: 9_level_1
object,float32,object,object,object,object,float64,float64,object,object
APEXBOL.2008-06-21T07:49:56.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T07:49:55.997Z,APEX-23705-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:01:47.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:01:46.997Z,APEX-23706-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:08:39.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:08:39.150Z,APEX-23707-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:20:31.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:20:31.150Z,APEX-23708-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:27:27.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:27:27.120Z,APEX-23709-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:39:23.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:39:22.997Z,APEX-23710-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:46:16.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:46:16.400Z,APEX-23711-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:58:07.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:58:07.100Z,APEX-23712-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-22T02:52:32.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-22T02:52:32.160Z,APEX-23964-2008-06-22-E-081.C-0204A-2008
...,...,...,...,...,...,...,...,...,...


In [4]:
scans = [int(x.split("-")[1]) for x in result['origfile']]

# Scans from notes - where there are 110, and 2 have been removed (bad scans)
# 39345: server restarted
# 40165, 40166: look weird
scans_notes = [23705,23706,23707,23708,23709,23710,23711,23712,23964,23965,23966,23967,23968,23969,23970,23971,26044,
                26045,26046,26047,26048,26049,26050,26051,26052,26055,26289,26290,26291,26292,26293,26294,26295,26296,
                26299,26300,26301,26302,26303,26304,39219,39220,39221,39222,39223,39224,39225,39226,39227,39228,39342,
                39343,39344,39348,39349,39350,40161,40162,40163,40164,40167,40168,40169,40170,40452,40453,40454,40455,
                40456,40457,40458,40459,40461,40462,40463,40464,40465,40576,40577,40578,40579,40580,40581,40582,40583,
                40585,40586,40587,40589,40590,40591,40592,40595,40596,40597,40598,40599,40600,40601,40602,40605,40606,
                40607,40608,40609,40610,40611,40612]

for i in range(len(scans)):
    if scans[i] not in scans_notes:
        print(f"Scan {scans[i]} is missing from the notes.")
    # Remove scans row from result table
        result.remove_row(i) 

result

Scan 39345 is missing from the notes.
Scan 40165 is missing from the notes.
Scan 40166 is missing from the notes.


dp_id,exposure,prog_id,object,dp_tech,instrument,ra,dec,exp_start,origfile
Unnamed: 0_level_1,s,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,deg,deg,Unnamed: 8_level_1,Unnamed: 9_level_1
object,float32,object,object,object,object,float64,float64,object,object
APEXBOL.2008-06-21T07:49:56.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T07:49:55.997Z,APEX-23705-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:01:47.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:01:46.997Z,APEX-23706-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:08:39.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:08:39.150Z,APEX-23707-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:20:31.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:20:31.150Z,APEX-23708-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:27:27.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:27:27.120Z,APEX-23709-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:39:23.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:39:22.997Z,APEX-23710-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:46:16.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:46:16.400Z,APEX-23711-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-21T08:58:07.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-21T08:58:07.100Z,APEX-23712-2008-06-21-E-081.C-0204A-2008
APEXBOL.2008-06-22T02:52:32.000,40.0,081.C-0204(A),CRA_CENTER,CONTINUUM,APEXBOL,285.48333305,-36.96361111,2008-06-22T02:52:32.160Z,APEX-23964-2008-06-22-E-081.C-0204A-2008
...,...,...,...,...,...,...,...,...,...


In [5]:
# Get total exposure time 
TEXPTIME = float(np.sum(result["exposure"]).to("d").value)

# Get the first and last exposure start times
exp_start = result["exp_start"]
exposure = result["exposure"]
dp_id = result["dp_id"]

MJDOBS = exp_start[0]
MJDEND = exp_start[-1]
DATEOBS = MJDOBS.replace("Z", "")

MJDOBS = Time(MJDOBS, scale="utc")
MJDEND = Time(MJDEND, scale="utc")

MJDOBS = float(round(MJDOBS.mjd, 5))
MJDEND = round(float(MJDEND.mjd) + float(exposure[-1].to("d").value), 5)

# Get the provenance information
NCOMBINE = len(result)
PROV = []
for i in range(NCOMBINE):
    PROV.append(dp_id[i])

print(f"Total exposure time: {TEXPTIME} d")
print(f"Observation date: {DATEOBS}")
print(f"Start MJD: {MJDOBS}, End MJD: {MJDEND}")
print(f"Number of scans combined: {NCOMBINE}")
print(f"Provenance IDs: {PROV}")

Total exposure time: 0.049999999999999996 d
Observation date: 2008-06-21T07:49:55.997
Start MJD: 54638.32634, End MJD: 54702.17628
Number of scans combined: 108
Provenance IDs: ['APEXBOL.2008-06-21T07:49:56.000', 'APEXBOL.2008-06-21T08:01:47.000', 'APEXBOL.2008-06-21T08:08:39.000', 'APEXBOL.2008-06-21T08:20:31.000', 'APEXBOL.2008-06-21T08:27:27.000', 'APEXBOL.2008-06-21T08:39:23.000', 'APEXBOL.2008-06-21T08:46:16.000', 'APEXBOL.2008-06-21T08:58:07.000', 'APEXBOL.2008-06-22T02:52:32.000', 'APEXBOL.2008-06-22T03:04:28.000', 'APEXBOL.2008-06-22T03:11:18.000', 'APEXBOL.2008-06-22T03:23:18.000', 'APEXBOL.2008-06-22T03:30:14.000', 'APEXBOL.2008-06-22T03:42:20.000', 'APEXBOL.2008-06-22T03:49:16.000', 'APEXBOL.2008-06-22T04:01:22.000', 'APEXBOL.2008-06-29T05:45:14.000', 'APEXBOL.2008-06-29T05:46:20.000', 'APEXBOL.2008-06-29T05:58:16.000', 'APEXBOL.2008-06-29T06:05:08.000', 'APEXBOL.2008-06-29T06:16:58.000', 'APEXBOL.2008-06-29T06:23:42.000', 'APEXBOL.2008-06-29T06:35:38.000', 'APEXBOL.2008-06-

In [6]:
# Dictionary with header information
# This is a template, some values will be filled later from original file
# Otherwise they will added from the custom keys defined above

dict = {'SIMPLE': True,
        'BITPIX': None,
        'NAXIS': None,
        'NAXIS1': None,
        'NAXIS2': None,
        'PRODCATG': 'ANCILLARY.FILTERED',
        'ORIGIN': 'APEX',
        'TELESCOP': 'APEX-12m',
        'INSTRUME': 'APEXBOL',
        'MAPMODE': 'OTF',
        'OBSTECH': 'CONTINUUM',
        'OBJECT': None,
        'RA': None,
        'DEC': None,
        'CRVAL1': None,
        'CRPIX1': None,
        'CTYPE1': None,
        'CUNIT1': None,
        'CRVAL2': None,
        'CRPIX2': None,
        'CTYPE2': None,
        'CUNIT2': None,
        'CD1_1': None,
        'CD1_2': -0.0,
        'CD2_1': -0.0,
        'CD2_2': None,
        'EQUINOX': 2000.0,
        'RADESYS': None,
        'FEBE1': 'LABOCA-ABBA',
        'FILTER': '870u',
        'RESTFREQ': 666.199982,
        'BMAJ': None,
        'BMIN': None,
        'BPA': None,
        'WAVELMIN': 799400.0,
        'WAVELMAX': 951700.0,
        'WAVE': 870.0,
        'SKY_RES': None,
        'BUNIT': None,
        'FLUXCAL': 'ABSOLUTE',
        'FLUXERR': 15,
        'BNOISE': None,
        'TIMESYS': 'TAI',
        'TEXPTIME': None,
        'MJD-OBS': None,
        'MJD-END': None,
        'DATE': None,
        'PROG_ID': None,
        'PROCSOFT': 'BoA',
        'REFERENC': None,
        'NOESODAT': False,
        'P3ORIG': 'EDP',
        'ARCFILE': 'tmp.fits',
        'ORIGFILE': None,
        'CHECKSUM': "-",
        'DATASUM': "-"}

# Dictionary with comments for each header keyword
dict_comments = {'SIMPLE': 'conforms to FITS standard',
        'BITPIX': 'array data type',
        'NAXIS': 'number of array dimensions',
        'NAXIS1': 'length of data axis 1',
        'NAXIS2': 'length of data axis 2',
        'PRODCATG': 'Data product category',
        'ORIGIN': 'Facility',
        'TELESCOP': 'Telescope name',
        'INSTRUME': 'Instrument',
        'MAPMODE': 'APEX mapping mode',
        'OBSTECH': 'Technique of observation',
        'COMMENT': '',
        'OBJECT': 'Name of Object',
        'RA': '[deg] Image centre Right Ascension',
        'DEC': '[deg] Image centre Declination',
        'CRVAL1': 'Reference Value Axis 1',
        'CRPIX1': 'Reference Pixel Axis 1',
        'CTYPE1': 'WCS projection type for this axis 1',
        'CUNIT1': 'Unit Axis 1',
        'CRVAL2': 'Reference Value Axis 2',
        'CRPIX2': 'Reference Pixel Axis 2',
        'CTYPE2': 'WCS projection type for this axis 2',
        'CUNIT2': 'Unit Axis 2',
        'CD1_1': 'Linear projection matrix',
        'CD1_2': 'Linear projection matrix',
        'CD2_1': 'Linear projection matrix',
        'CD2_2': 'Linear projection matrix',
        'EQUINOX': '[years] Standard FK5',
        'RADESYS': 'Coordinate reference frame',
        'FEBE1': 'Frontend-backend combination',
        'FILTER': 'Filter used for the observation',
        'RESTFREQ': '[GHz] Rest Frequency',
        'BMAJ': '[deg] Beam major axis',
        'BMIN': '[deg] Beam minor axis',
        'BPA': '[deg] Beam position angle',
        'WAVELMIN': '[nm] Minimum wavelength',
        'WAVELMAX': '[nm] Maximum wavelength',
        'SKY_RES': '[arcsec] FWHM effective beam size',
        'BTYPE': 'Physical type of image',
        'BUNIT': 'Physical unit of image',
        'BCONV': 'Conversion factor from Jy/beam to MJy/sr',
        'FLUXCAL': 'Certifies the validity of BUNIT',
        'FLUXERR': '[%] uncertainty of the flux calibration',
        'BNOISE': 'Median RMS noise in map',
        'TIMESYS': 'Time system for MJD and DATE-OBS',
        'TEXPTIME': '[s] Total exposure time',
        'MJD-OBS': 'Begin of Observations',
        'MJD-END': 'End of Observations',
        'DATE': 'Date HDU creation',
        'WAVE': '[micron] Central wavelength of the filter',
        'PROG_ID': 'ESO programme identification',
        'NCOMBINE': '# of combined raw science data files',
        'PROCSOFT': 'Software used for data processing',
        'REFERENC': 'Bibliographic reference',
        'HORIGIN': 'Origin of used Herschel product',
        'HCALVERS': 'Version of the cal tree',
        'HLEVEL': 'The level of the product',
        'DETECTOR': 'Name of bolometer array',
        'HOBSID01': 'Observation identifier',
        'HOBSID02': 'Observation identifier',
        'NOESODAT': 'Not ESO data product',
        'CHECKSUM': '',
        'DATASUM': '',
        'ARCFILE': 'Archive File Name',
        'ORIGFILE': 'Original File Name',
        'P3ORIG': 'ESO external data product'}

In [7]:
hdr_updated = fits.Header()
for key, value in dict.items():


    #####################
    ### GENERAL CASES ###

    # Not None in dict, we will not add it to the header from dict
    if value is not None:
        hdr_updated[key] = value

    # None in dict, exists in header, we will add it from header
    elif value is None and key in hdr.keys():
        hdr_updated[key] = hdr[key]


    #####################
    ### SPECIAL CASES ###

    # Special cases for CDELT1, CDELT2 and CD1_1, CD2_2
    if key == "CD1_1":
        hdr_updated[key] = hdr['CDELT1']
    if key == "CD2_2":
        hdr_updated[key] = hdr['CDELT2']

    if key == "PROG_ID":
        hdr_updated[key] = PROG_ID

    if key == "SKY_RES":
        hdr_updated[key] = hdr['BMAJ'] * 3600.0

    if key == "BNOISE":
        rms = stats.mad_std(data, ignore_nan=True)
        hdr_updated[key] = rms

    if key == "RA": 
        if key not in hdr_updated.keys():
            hdr_updated[key] = hdr['CRVAL1']
    if key == "DEC":
        if key not in hdr_updated.keys():
            hdr_updated[key] = hdr['CRVAL2']
    
    if key == "REFERENC":
        hdr_updated[key] = REFERENC

    if key == "TEXPTIME":
        hdr_updated[key] = TEXPTIME

    if key == "MJD-OBS":
        hdr_updated[key] = MJDOBS

    if key == "MJD-END":
        hdr_updated[key] = MJDEND
    
    # elif key == "DATE":
    #     hdr_updated[key] = DATEOBS
        
    if key == "ORIGFILE":
        hdr_updated[key] = ORIGFILE
    
    #####################
    ### ADD COMMENTS ###

    # Add comments to the header for all keys
    if key in hdr_updated.keys():        
        hdr_updated.comments[key] = dict_comments.get(key, '')


#######################
### ADDITIONAL KEYS ###

# Add the NCOMBINE and PROV keys
hdr_updated.insert("CHECKSUM", ('NCOMBINE', NCOMBINE))
hdr_updated.comments["NCOMBINE"] = dict_comments.get("NCOMBINE", '')

for i in range(NCOMBINE):
    hdr_updated.insert("CHECKSUM", (f'PROV{i+1}', PROV[i]))
    hdr_updated.comments[f"PROV{i+1}"] = "Original science file name"




##############
### CHECKS ###
for key, value in dict.items():
    if key not in hdr_updated.keys():
        print(f"Key {key} is missing in the updated header.")
print("Header update completed successfully.")

Header update completed successfully.


In [8]:
hdu_updated = fits.PrimaryHDU(data=data, header=hdr_updated)
hdu_updated.writeto(f"{output_path}{ORIGFILE}", checksum=True, overwrite=True)

In [9]:
hdu_updated.header

SIMPLE  =                    T / conforms to FITS standard                      
BITPIX  =                  -32 / array data type                                
NAXIS   =                    2 / number of array dimensions                     
NAXIS1  =                  443                                                  
NAXIS2  =                  361                                                  
PRODCATG= 'ANCILLARY.FILTERED' / Data product category                          
ORIGIN  = 'APEX    '           / Facility                                       
TELESCOP= 'APEX-12m'           / Telescope name                                 
INSTRUME= 'APEXBOL '           / Instrument                                     
MAPMODE = 'OTF     '           / APEX mapping mode                              
OBSTECH = 'CONTINUUM'          / Technique of observation                       
OBJECT  = 'CrA_Center'         / Name of Object                                 
RA      =                285