# Abstract

Create a SILO dataset from a combination of Build7 headers and test data for NIRCAM Grism data

# Environment

In [1]:
from glob import glob
from itertools import count
from os import path
from shutil import copy2
from astropy.io import fits

In [2]:
from jwst.associations import load_asn
from jwst import datamodels
from jwst.exp_to_source import exp_to_source
import json

# Main - Association information

In [3]:
# This is the directory for 7.1
test_dir = '/grp/jwst/ssb/test_build7.1/examples_for_dms/level2/nrc_grism'
!cd /grp/jwst/ssb/test_build7.1/examples_for_dms/level2/nrc_grism
!pwd

/grp/jwst/ssb/test_build7.1/examples_for_dms/level2/nrc_grism


In [4]:
# starting file
asn_startname = 'nircam_grism_asn.json'
asn_data = load_asn(open(asn_startname, 'r'))
asn_data

{'asn_pool': 'dummy',
 'asn_rule': 'DMSLevel2bBase',
 'asn_type': 'None',
 'code_version': '0.7.1.beta5',
 'products': [{'members': [{'expname': 'dispersed_ramp_from_test.fits',
     'exptype': 'SCIENCE'},
    {'expname': 'test_disperse_f335m_rate_updated_bboxradecskybox_short.ecsv',
     'exptype': 'sourcecat'}],
   'name': 'test_disperse_f335m'}],
 'program': 'none',
 'target': 'singleton',
 'version_id': None}

In [5]:
grism_name = asn_data['products'][0]['members'][0]['expname']
grism_fits = fits.open(grism_name)
grism_fits.info()

Filename: dispersed_ramp_from_test.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     170   ()      
  1  SCI           1 ImageHDU        44   (2048, 2048)   float32   
  2  DQ            1 ImageHDU        11   (2048, 2048)   int32 (rescales to uint32)   
  3  ERR           1 ImageHDU         9   (2048, 2048)   float32   
  4  ZEROFRAME     1 ImageHDU         9   (2048, 2048)   float32   
  5  AREA          1 ImageHDU         9   (2048, 2048)   float32   
  6  RELSENS       1 BinTableHDU     14   0R x 2C   [D, D]   
  7  RELSENS2D     1 ImageHDU         9   (2048, 2048)   float32   
  8  WAVELENGTH    1 ImageHDU         9   (2048, 2048)   float32   
  9  PATHLOSS_POINTSOURCE    1 ImageHDU         8   (2048,)   float32   
 10  WAVELENGTH_POINTSOURCE    1 ImageHDU         8   (2048,)   float32   
 11  PATHLOSS_UNIFORMSOURCE    1 ImageHDU         8   (2048,)   float32   
 12  WAVELENGTH_UNIFORMSOURCE    1 ImageHDU         8   (2048,)   f

In [7]:
# Look at the example level2a image
l2a_example_name = 'jw94015001002_02104_00001_nrcalong_rate.fits'
l2a_example = fits.open(l2a_example_name)
l2a_example.info()

Filename: jw94015001002_02104_00001_nrcalong_rate.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     255   ()      
  1  SCI           1 ImageHDU        48   (2048, 2048)   float32   
  2  ERR           1 ImageHDU        10   (2048, 2048)   float32   
  3  DQ            1 ImageHDU        11   (2048, 2048)   int32 (rescales to uint32)   
  4  VAR_POISSON    1 ImageHDU         9   (2048, 2048)   float32   
  5  VAR_RNOISE    1 ImageHDU         9   (2048, 2048)   float32   
  6  ASDF          1 ImageHDU         7   (6175,)   uint8   


### File naming conventions
#### https://jwst-docs.stsci.edu/display/JDAT/File+Naming+Conventions+and+Data+Products
The names of the exposure level data is constructed with information from the science header of the exposure, allowing users to map it to the observation in ther corresponding APT files:

    jw<PPPPP><OOO><VVV>_<GGSAA>_<EEEEE>_<detector >_<suffix> .fits

    PPPPP = Program ID
    OOO = Observation number
    VVV = Visit number
    GG = Visit Group
    S = Parallel Sequence id (1 prime, 2-5 parallel)
    AA = Activity number (base 36)
    EEEEE = Exposure number
    <detector> = detector name; e.g. nrca1, nrclong, mirimage
    <sufix> = product type; e.g. uncal, rate


In [9]:
# copy over the data from the simulated image to the example image
l2a_example['SCI'].data = grism_fits['SCI'].data
l2a_example['DQ'].data = grism_fits['DQ'].data
l2a_example['ERR'].data = grism_fits['ERR'].data

# create a new standard name for the updated test file
newname = 'jw11111001001_01104_00001_nrcalong_rate.fits'
l2a_example[0].header['filename'] = newname
l2a_example.verify('fix')
l2a_example.writeto(newname, overwrite=True)
l2a_example.close()

In [10]:
# now finish updating the new image
l2a = fits.open(newname, mode='update')

In [11]:
diff= fits.diff.HeaderDiff(grism_fits['SCI'].header, l2a['SCI'].header)
print(diff.report())

 Headers have different number of cards:
  a: 38
  b: 42
 Extra keyword 'BUNIT'  in b: 'DN/s'
 Extra keyword 'PA_APER' in b: 0.0
 Extra keyword 'S_REGION' in b: 'POLYGON ICRS  80.53853031592796 -69.35531484211675 80.43565889909475 -69.35557881776934 80.43669256854682 -69.3194285544607 80.53697234769284 -69.31943727146422'
 Extra keyword 'VA_SCALE' in b: 1.000000033314531
 Keyword CDELT1   has different values:
    a> -8.6513888888888e-06
    b> 1.74460027777777e-05
 Keyword CDELT1   has different comments:
    a> first axis increment per pixel (deg)
     ?                               ------
    b> first axis increment per pixel
 Keyword CDELT2   has different values:
    a> 8.70374999999998e-06
    b> 1.75306861111111e-05
 Keyword CDELT2   has different comments:
    a> second axis increment per pixel (deg)
     ?                                ------
    b> second axis increment per pixel
 Keyword CRVAL1   has different values:
    a> 0.0
    b> 80.4875
 Keyword CRVAL1   has differe

In [12]:
# move wcs keywords
wcs = ['CRPIX1', 'CRPIX2', 'CDELT1', 'CDELT2', 'CRVAL1', 'CRVAL2', 'DEC_REF', 'DEC_V1', 'PC1_1', 'RA_REF', 'RA_V1', 'ROLL_REF','V2_REF', 'V3I_YANG', 'V3_REF']
for k in wcs:
    l2a['SCI'].header[k] = grism_fits['SCI'].header[k]

In [13]:
diff= fits.diff.HeaderDiff(l2a[0].header, grism_fits[0].header, ignore_blanks=True)
print(diff.report())

 Headers have different number of cards:
  a: 201
  b: 132
 Extra keyword 'APERNAME' in a: 'NRCA5_FULL'
 Extra keyword 'BARTDELT' in a: 78.31810454372317
 Extra keyword 'BENDTIME' in a: 57410.63556227637
 Extra keyword 'BKGDTARG' in a: False
 Extra keyword 'BMIDTIME' in a: 57410.63518946309
 Extra keyword 'BSTRTIME' in a: 57410.63481664982
 Extra keyword 'CAL_VCS' in a: 'c4224ab6'
 Extra keyword 'COMMENT' in a: '/ Program information'
 Extra keyword 'COMPRSSD' in a: False
 Extra keyword 'CORONMSK' in a: 'NONE'
 Extra keyword 'DATAPROB' in a: False
 Extra keyword 'DATE-END' in a: '2016-01-23'
 Extra keyword 'DRPFRMS1' in a: 0
 Extra keyword 'DRPFRMS3' in a: 0
 Extra keyword 'DVA_DEC' in a: -1.7660554078774e-07
 Extra keyword 'DVA_RA' in a: -2.1064958141811e-07
 Extra keyword 'ENG_QUAL' in a: 'SUSPECT'
 Extra keyword 'EPH_TIME' in a: 57410.6340278
 Extra keyword 'EPH_TYPE' in a: 'Predicted'
 Extra keyword 'EXPCOUNT' in a: 4
 Extra keyword 'EXPRIPAR' in a: 'PRIME'
 Extra keyword 'FRMDIVSR

In [14]:
# diff of the duplicate keywords only:
diff.diff_keyword_values

defaultdict(list,
            {'': [None,
              ('Program information', 'Basic information'),
              ('Observation identifiers', 'Programmatic information'),
              ('Visit information', 'Observation identifiers'),
              None,
              None,
              None,
              None,
              None,
              ('JWST ephemeris information', 'Reference file information'),
              ('Aperture information', 'CRDS parameters'),
              ('Velocity aberration correction information',
               'Dark reference file information'),
              ('Time information', 'Gain reference file information'),
              ('Guide star information',
               'Linearity reference file information'),
              ('Reference file information',
               'Mask reference file information'),
              ('CRDS parameters', 'Read noise reference file information'),
              ('Dark reference file information',
               'Saturation

In [15]:
# move over important primary header keywords
mkeys = ['EXP_TYPE', 'INSTRUME', 'PUPIL', 'FILTER', 'MODULE', 'DETECTOR', 'CHANNEL']
for k in mkeys:
    l2a[0].header[k] = grism_fits[0].header[k]

In [17]:
# diff keywords is a 2-tuple, [0] = list of kw only in a, [1] = list of kw only in b
diff.diff_keywords[1]  # should be mostly WCS keywords

['CAL_SVN',
 'CDELT1',
 'CDELT2',
 'COORDSYS',
 'CRPIX1',
 'CRPIX2',
 'CRVAL1',
 'CRVAL2',
 'CTYPE1',
 'CTYPE2',
 'CUNIT1',
 'CUNIT2',
 'DEC_REF',
 'DEC_V1',
 'PATTSIZE',
 'PA_V3',
 'PC1_1',
 'PC1_2',
 'PC2_1',
 'PC2_2',
 'RADESYS',
 'RA_REF',
 'RA_V1',
 'ROLL_REF',
 'SUBPXTYP',
 'V2_REF',
 'V3I_YANG',
 'V3_REF',
 'VPARITY',
 'WCSAXES']

In [18]:
diff.diff_keywords[0]  # additional keywords in the new example image

['APERNAME',
 'BARTDELT',
 'BENDTIME',
 'BKGDTARG',
 'BMIDTIME',
 'BSTRTIME',
 'CAL_VCS',
 'COMMENT',
 'COMPRSSD',
 'CORONMSK',
 'DATAPROB',
 'DATE-END',
 'DRPFRMS1',
 'DRPFRMS3',
 'DVA_DEC',
 'DVA_RA',
 'ENG_QUAL',
 'EPH_TIME',
 'EPH_TYPE',
 'EXPCOUNT',
 'EXPRIPAR',
 'FRMDIVSR',
 'GDSTARID',
 'GSENDTIM',
 'GSSTRTTM',
 'GS_DEC',
 'GS_MAG',
 'GS_ORDER',
 'GS_RA',
 'GS_UDEC',
 'GS_UMAG',
 'GS_URA',
 'HELIDELT',
 'HENDTIME',
 'HGA_MOVE',
 'HMIDTIME',
 'HSTRTIME',
 'INTARGET',
 'JWST_DX',
 'JWST_DY',
 'JWST_DZ',
 'JWST_X',
 'JWST_Y',
 'JWST_Z',
 'LAMP',
 'MU_DEC',
 'MU_EPOCH',
 'MU_RA',
 'NEXPOSUR',
 'NEXTEND',
 'NSAMPLES',
 'OBSLABEL',
 'PCS_MODE',
 'PILIN',
 'PNTG_SEQ',
 'PPS_APER',
 'PROGRAM',
 'PROP_DEC',
 'PROP_RA',
 'REFFRAME',
 'R_PERSAT',
 'R_TRPDEN',
 'R_TRPPAR',
 'SCA_NUM',
 'SDP_VER',
 'S_GANSCL',
 'S_GRPSCL',
 'S_PERSIS',
 'TARGOOPP',
 'TARGPROP',
 'TARGTYPE',
 'TARGUDEC',
 'TARGURA',
 'TEMPLATE',
 'TIME-END',
 'TIMESYS',
 'TSOVISIT',
 'VISITEND',
 'VISITSTA',
 'VISITYPE',
 'VI

In [19]:
# check the science extension
diff= fits.diff.HeaderDiff(l2a['SCI'].header, grism_fits['SCI'].header)
diff.diff_keywords

(['BUNIT', 'PA_APER', 'S_REGION', 'VA_SCALE'], [])

In [20]:
# check the dq extension
diff= fits.diff.HeaderDiff(l2a['DQ'].header, grism_fits['DQ'].header)
print(diff.diff_keywords)
# check the err extension
diff= fits.diff.HeaderDiff(l2a['ERR'].header, grism_fits['ERR'].header)
print(diff.diff_keywords)

()
(['BUNIT'], [])


In [21]:
grism_fits.close()
l2a.close()
asn_data

{'asn_pool': 'dummy',
 'asn_rule': 'DMSLevel2bBase',
 'asn_type': 'None',
 'code_version': '0.7.1.beta5',
 'products': [{'members': [{'expname': 'dispersed_ramp_from_test.fits',
     'exptype': 'SCIENCE'},
    {'expname': 'test_disperse_f335m_rate_updated_bboxradecskybox_short.ecsv',
     'exptype': 'sourcecat'}],
   'name': 'test_disperse_f335m'}],
 'program': 'none',
 'target': 'singleton',
 'version_id': None}

In [19]:
# update the name of the science image
asn_data['products'][0]['members'][0]['expname'] = newname
asn_data

{'asn_pool': 'dummy',
 'asn_rule': 'DMSLevel2bBase',
 'asn_type': 'None',
 'code_version': '0.7.1.beta5',
 'products': [{'members': [{'expname': 'jw11111001001_01104_00001_nrcalong_rate.fits',
     'exptype': 'SCIENCE'},
    {'expname': 'test_disperse_f335m_rate_updated_bboxradecskybox_short.ecsv',
     'exptype': 'sourcecat'}],
   'name': 'test_disperse_f335m'}],
 'program': 'none',
 'target': 'singleton',
 'version_id': None}

In [20]:
# rename the catalog file to match the direct image science exposure, we don't need the name of the direct image here
# I'm just going to use the name of the grism image with the _cat extension
old_catalog = asn_data['products'][0]['members'][1]['expname']
catalog_name = 'jw11111001001_01104_00001_nrcalong_cat.ecsv'
asn_data['products'][0]['members'][1]['expname'] = catalog_name
copy2(old_catalog, catalog_name)

'jw11111001001_01104_00001_nrcalong_cat.ecsv'

In [21]:
# update the rootname
asn_data['products'][0]['name'] = '_'.join(asn_data['products'][0]['members'][1]['expname'].split('_')[0:3])
asn_data

{'asn_pool': 'dummy',
 'asn_rule': 'DMSLevel2bBase',
 'asn_type': 'None',
 'code_version': '0.7.1.beta5',
 'products': [{'members': [{'expname': 'jw11111001001_01104_00001_nrcalong_rate.fits',
     'exptype': 'SCIENCE'},
    {'expname': 'jw11111001001_01104_00001_nrcalong_cat.ecsv',
     'exptype': 'sourcecat'}],
   'name': 'jw11111001001_01104_00001'}],
 'program': 'none',
 'target': 'singleton',
 'version_id': None}

In [22]:
# set the code version
asn_data['code_version'] = '0.7.8rc9'

In [23]:
asn_data

{'asn_pool': 'dummy',
 'asn_rule': 'DMSLevel2bBase',
 'asn_type': 'None',
 'code_version': '0.7.8rc9',
 'products': [{'members': [{'expname': 'jw11111001001_01104_00001_nrcalong_rate.fits',
     'exptype': 'SCIENCE'},
    {'expname': 'jw11111001001_01104_00001_nrcalong_cat.ecsv',
     'exptype': 'sourcecat'}],
   'name': 'jw11111001001_01104_00001'}],
 'program': 'none',
 'target': 'singleton',
 'version_id': None}

Docs: http://ssb.stsci.edu/doc/jwst_dev/jwst/associations/level2_asn_technical.html#

The naming convention for the association table is as follows:

jwPPPPP-TNNNN_YYYYMMDDtHHMMSS_ATYPE_MMM_asn.json

    jw: All JWST-related products begin with jw
    PPPPP: 5 digit proposal number
    TNNNN: Canididat Identifier. Can be one of the following:
        oNNN: Observation candidate specified by the letter o followed by a 3 digit number.
        c1NNN: Association candidate, specified by the letter ‘c’, followed by a number starting at 1001.
        a3NNN: Discovered whole program associations, specified by the letter ‘a’, followed by a number starting at 3001
        rNNNN: Reserverd for future use. If you see this in practice, file an issue to have this document updated.
    YYYYMMDDtHHMMSS: This is generically referred to as the version_id. A timestamp provided the DMS workflow. Note: When used outside the workflow, this field is user-specifiable.
    ATYPE: The type of association. See level3-asn-association-types (spec2 I think)
    MMM: A counter for each type of association created.


In [24]:
# write out the new asn file
new_asn_name = 'jw111110-a3001_20171109T145456_spec2_001_asn.json'
with open(new_asn_name, 'w') as f:
    json.dump(asn_data, f)