In [1]:
__author__ = 'Monika Soraisam'
__email__ = 'monika.soraisam@noirlab.edu'

In [2]:
import numpy as np
import glob
import astrodata
import gemini_instruments
from gempy.adlibrary import dataselect
import os
from pathlib import Path
import urllib
import json
import shutil
import subprocess
from astropy.coordinates import SkyCoord
import astropy.units as u
import pandas as pd
from astropy.time import Time

# for programmatically querying the GOA with astroquery
from astroquery.gemini import Observations


In [40]:
# Construct the URL for getting a summary of the files
## Example program_id used here = GS-2021A-DD-102
## Specific observation ID of interest for the above program = GS-2021A-DD-102-9
## At Gemini, every successful obeserving proposal is assigned a program ID. Now, the program may have one target or multiple targets, and for each target, a set of observation or sets of observations. 
## E.g., say we have a program to observe two supernovae X and Y. For X, say we took observations with the instrument GMOS on day 1 and 2. For Y, say we took only one observation on day 3 with instrument NIRI. 
## Every set of observation is then given an observation ID, typically the program ID suffixed with some running numbers. 

url = "https://archive.gemini.edu/jsonsummary/canonical/NotFail/GS-2021A-DD-102-9" # can use either program ID or observation ID. With the former, you get all the observations under that program

# Open the URL and fetch the JSON document text into a string
u = urllib.request.urlopen(url)
jsondoc = u.read()
u.close()

# Decode the JSON
#files_summary = json.loads(jsondoc)  # dict format
files_summary = pd.DataFrame.from_dict(json.loads(jsondoc)) # pandas dataframe

In [41]:
# show all columns of the dataframe 
pd.set_option('display.max_columns', 10)

In [42]:
files_summary

Unnamed: 0,name,filename,path,compressed,file_size,...,qa_state,release,reduction,types,phot_standard
0,S20210219S0075.fits,S20210219S0075.fits.bz2,,True,4697027,...,Pass,2021-08-19,RAW,"{'SOUTH', 'SIDEREAL', 'UNPREPARED', 'SPECT', '...",
1,S20210219S0076.fits,S20210219S0076.fits.bz2,,True,4679182,...,Pass,2021-08-19,RAW,"{'GEMINI', 'UNPREPARED', 'GMOS', 'SOUTH', 'SPE...",
2,S20210219S0077.fits,S20210219S0077.fits.bz2,,True,7639332,...,Pass,2021-02-19,RAW,"{'GCALFLAT', 'SOUTH', 'LS', 'FLAT', 'RAW', 'GM...",
3,S20210219S0078.fits,S20210219S0078.fits.bz2,,True,7706318,...,Pass,2021-02-19,RAW,"{'GCALFLAT', 'SOUTH', 'LS', 'FLAT', 'RAW', 'GM...",
4,S20210219S0079.fits,S20210219S0079.fits.bz2,,True,4734016,...,Pass,2021-08-19,RAW,"{'GEMINI', 'UNPREPARED', 'GMOS', 'SOUTH', 'SPE...",


In [43]:
files_summary.columns

Index(['name', 'filename', 'path', 'compressed', 'file_size', 'data_size',
       'file_md5', 'data_md5', 'lastmod', 'mdready', 'entrytime', 'size',
       'md5', 'pending_ingest', 'program_id', 'engineering',
       'science_verification', 'procmode', 'calibration_program',
       'observation_id', 'data_label', 'telescope', 'instrument',
       'ut_datetime', 'local_time', 'observation_type', 'observation_class',
       'object', 'ra', 'dec', 'azimuth', 'elevation', 'cass_rotator_pa',
       'airmass', 'filter_name', 'exposure_time', 'disperser', 'camera',
       'central_wavelength', 'wavelength_band', 'focal_plane_mask',
       'detector_binning', 'detector_gain_setting', 'detector_roi_setting',
       'detector_readspeed_setting', 'detector_welldepth_setting',
       'detector_readmode_setting', 'spectroscopy', 'mode', 'adaptive_optics',
       'laser_guide_star', 'wavefront_sensor', 'gcal_lamp', 'raw_iq', 'raw_cc',
       'raw_wv', 'raw_bg', 'requested_iq', 'requested_cc', 'reque

In [44]:
# Inspect some useful keywords of the files 
for i in files_summary.index:
    F = files_summary.iloc[i]
    print (i+1, F['filename'], F['observation_type'], F['observation_class'], F['qa_state'], F['object'], F['exposure_time'])

1 S20210219S0075.fits.bz2 OBJECT science Pass AT2020caa 900.0
2 S20210219S0076.fits.bz2 OBJECT science Pass AT2020caa 900.0
3 S20210219S0077.fits.bz2 FLAT partnerCal Pass GCALflat 1.0
4 S20210219S0078.fits.bz2 FLAT partnerCal Pass GCALflat 1.0
5 S20210219S0079.fits.bz2 OBJECT science Pass AT2020caa 900.0


**Note that DRAGONS reduction happen per observation ID. When using observation ID to retrieve the data from the archive, sometimes the associated calibrations don't yield the corresponding standard star observations. What I've noticed is the latter are generally accessible at the program ID level, but that involves some filtering to get the appropriate standard star if there were multiple standard stars observed for the program.**

In [91]:
# Let's use astroquery to get a summary of the standard stars associated with the program
data = Observations.query_criteria(
                                #instrument='GMOS-S',
                                program_id='GS-2021A-DD-102', ## note that I'm using program ID here
                                observation_type = "OBJECT",
                                )
                                
print (data[['detector_roi_setting','filename', #'ra', 'dec', 
             'observation_class', 'object', 'ut_datetime', 'observation_id', 'mode']])
print (data.columns)

detector_roi_setting         filename        observation_class   object         ut_datetime           observation_id    mode 
-------------------- ----------------------- ----------------- --------- -------------------------- ----------------- -------
          Full Frame S20210218S0029.fits.bz2            dayCal  Twilight 2021-02-17 23:39:53.700000 GS-2021A-DD-102-7      LS
                CCD2 S20210219S0072.fits.bz2               acq AT2020caa 2021-02-19 08:07:53.700000 GS-2021A-DD-102-8 imaging
       Central Stamp S20210219S0073.fits.bz2               acq AT2020caa 2021-02-19 08:09:47.700000 GS-2021A-DD-102-8 imaging
       Central Stamp S20210219S0074.fits.bz2               acq AT2020caa 2021-02-19 08:11:24.200000 GS-2021A-DD-102-8 imaging
          Full Frame S20210219S0075.fits.bz2           science AT2020caa 2021-02-19 08:14:01.700000 GS-2021A-DD-102-9      LS
          Full Frame S20210219S0076.fits.bz2           science AT2020caa 2021-02-19 08:29:40.200000 GS-2021A-DD-102-9 

In [72]:
# Let's define the URLs and local paths for downloading the necessary program data in bulk (as a tar archive)

url = "https://archive.gemini.edu/download/canonical/GS-2021A-DD-102/OBJECT" # adding OBJECT selection criterion as cal files are downloaded separately (see below)
tar_local = Path.cwd().as_posix() + "/GMOS_observations/gmos.tar"

# for associated calibration files
url_cal = "https://archive.gemini.edu/download/associated_calibrations/canonical/GS-2021A-DD-102"
tar_cal_local = Path.cwd().as_posix() + "/GMOS_observations/gmos_cal.tar"

In [73]:
# Use the known observation ID and the time of observation to select the standard star observed closest in time above. Then, use the observation ID of the standard star to pull required data for its reduction
exclude = 'acq' in data['observation_class']

In [96]:
['ACQUISITION' in A for A in data['types'].data]

[False, True, True, True, False, False, False, True, True, True, True, False]

In [99]:
'acq' in list(data['observation_class'].data)

True

#### A note about associated calibrations for a particular program: a jsonsummary API *feature* doesn't exist for these files like that for the program/object files (see above). 

In [14]:
# # Download the OBJECT files for the program
# with urllib.request.urlopen(url) as response:
#     with open(tar_local, 'wb') as out_file:
#         shutil.copyfileobj(response, out_file)

In [13]:
# # untar the file
# shutil.unpack_archive(tar_local, Path(tar_local).parent) # destination is the parent directory of the tar (see above)
# os.remove(tar_local)

In [12]:
# # Download the associated calibration files for the program 
# with urllib.request.urlopen(url_cal) as response:
#     with open(tar_cal_local, 'wb') as out_file:
#         shutil.copyfileobj(response, out_file)

In [11]:
# # untar the file 
# shutil.unpack_archive(tar_cal_local, Path(tar_cal_local).parent) # destination is the parent directory of the tar
# os.remove(tar_cal_local)

In [10]:
# # Let's decompress the .bz2 fits files
# subp = subprocess.Popen(('bunzip2 {0}'.format(str(Path(tar_local).parent) + '/*.bz2')), shell=True)

### Let's inspect the metadata of these fits files using astrodata

In [9]:
# for F in list(Path(tar_local).parent.glob('*.fits')):
#     ad = astrodata.open(F)
#     if ad.tags.intersection({'ACQUISITION'}):
#         continue
#     print (F.name, ad.tags)

### Create file lists

In [8]:
# #all_files = glob.glob('../gmos/gmosls_tutorial/playdata/*.fits')
# all_files = [str(pp) for pp in list(Path(tar_local).parent.glob('*.fits'))]
# all_files.sort()
# print (f'The total number of files is {len(all_files)}')

In [7]:
# # for bias
# all_biases = dataselect.select_data(all_files, tags=['BIAS', 'UNPREPARED'])
# for bias in all_biases:
#     ad = astrodata.open(bias)
#     print(bias, '  ', ad.detector_roi_setting())

In [1]:
# # bias list for spectrophotometric standard (central spectrum ROI)
# biasstd = dataselect.select_data(
#     all_files,
#     ['BIAS', 'UNPREPARED'],
#     [],
#     dataselect.expr_parser('detector_roi_setting=="Central Spectrum"')
# )
# print (f"The total number of bias files for standard is {len(biasstd)}")


# # bias list for science target (full frame ROI)
# biassci = dataselect.select_data(
#     all_files,
#     ['BIAS', 'UNPREPARED'],
#     [],
#     dataselect.expr_parser('detector_roi_setting=="Full Frame"')
# )
# print (f"The total number of bias files for science is {len(biassci)}")

In [14]:
# confirm the bias grouppings
# for bias in biassci:
#     ad = astrodata.open(bias)
#     print(bias, '  ', ad.detector_roi_setting())

In [6]:
# # flats
# flats = dataselect.select_data(all_files, ['FLAT','UNPREPARED'])
# print (f'Total number of flat files is {len(flats)}')

In [5]:
# # arcs
# arcs = dataselect.select_data(all_files, ['ARC', 'UNPREPARED'])
# print (f'Total number of arcs files is {len(arcs)}')

# for F in arcs:
#     ad = astrodata.open(F)
#     print (f'Central wavelength for arc {F} is {ad.central_wavelength()} m')

In [4]:
# # spectrophotometric standard
# stdstar = dataselect.select_data(all_files, ['STANDARD', 'UNPREPARED', 'SPECT'])
# print (f'Total number of standard star files is {len(stdstar)}')

# for F in stdstar:
#     ad = astrodata.open(F)
#     print (f'Central wavelegnth for stdstar {F} is {ad.central_wavelength()} m, {ad.object()}')

In [3]:
# for F in stdstar:
#     ad = astrodata.open(F)
#     print (f'{F}\n Tags:\n{ad.tags}\n Descriptors:\n{ad.descriptors}\n')
#     print (f'Central wavelength is {ad.central_wavelength()} m')

In [2]:
# # science target list
# all_science = dataselect.select_data(all_files, ['UNPREPARED', 'SPECT'], ['CAL'])
# for sci in all_science:
#     ad = astrodata.open(sci)
#     print(sci, '  ', ad.object(), ad.tags, ad.central_wavelength())

### Download the data from GOA

In [11]:
# if not Path("./data_dump/").exists():
#     os.mkdir("./data_dump/")

In [12]:
# obs_id = "GN-2019A-Q-901-74"
# url = f"https://archive.gemini.edu/download/canonical/{obs_id}/OBJECT" # adding OBJECT selection criterion as cal files are downloaded separately (see below)
# tar_local = "./data_dump/gmos.tar"

# # for associated calibration files
# url_cal = f"https://archive.gemini.edu/download/associated_calibrations/canonical/{obs_id}"
# tar_cal_local = "./data_dump/gmos_cal.tar"

In [13]:
# # Download the OBJECT files for the program
# with urllib.request.urlopen(url) as response:
#     with open(tar_local, 'wb') as out_file:
#         shutil.copyfileobj(response, out_file)
        
# # untar the file
# shutil.unpack_archive(tar_local, Path(tar_local).parent) # destination is the parent directory of the tar (see above)
# os.remove(tar_local)

In [14]:
# # Download the associated calibration files for the program 
# with urllib.request.urlopen(url_cal) as response:
#     with open(tar_cal_local, 'wb') as out_file:
#         shutil.copyfileobj(response, out_file)
        
# # untar the file 
# shutil.unpack_archive(tar_cal_local, Path(tar_cal_local).parent) # destination is the parent directory of the tar
# os.remove(tar_cal_local)

In [15]:
# # Let's decompress the .bz2 fits files
# subp = subprocess.Popen(('bunzip2 {0}'.format(str(Path(tar_local).parent) + '/*.bz2')), shell=True)

In [16]:
# # Inspect the metadata 
# for F in list(Path(tar_local).parent.glob('*.fits')):
#     ad = astrodata.open(F)
#     if ad.tags.intersection({'ACQUISITION'}):
#         continue
#     print (F.name, ad.tags)