In [1]:
__author__ = 'Monika Soraisam'
__email__ = 'monika.soraisam@noirlab.edu'

In [2]:
import numpy as np
import glob
import astrodata
import gemini_instruments
from gempy.adlibrary import dataselect
import os
from pathlib import Path
import urllib
import json
import shutil
import subprocess
from astropy.coordinates import SkyCoord
import astropy.units as uts
import pandas as pd
from astropy.time import Time

# for programmatically querying the GOA with astroquery
from astroquery.gemini import Observations
from astropy.table import vstack

In [3]:
# Construct the URL for getting a summary of the files
## Example program_id used here = GS-2021A-DD-102
## Specific observation ID of interest for the above program = GS-2021A-DD-102-9 (https://archive.gemini.edu/searchform/GS-2021A-DD-102-9/cols=CTOWBEQ/not_site_monitoring/NotFail/notengineering#)

## At Gemini, every successful obeserving proposal is assigned a program ID. Now, the program may have one target or multiple targets, and for each target, a set of observation or sets of observations. 
## E.g., say we have a program to observe two supernovae X and Y. For X, say we took observations with the instrument GMOS on day 1 and 2. For Y, say we took only one observation on day 3 with instrument NIRI. 
## Every set of observation is then given an observation ID, typically the program ID suffixed with some running numbers. 


# Let's get a 'summary' of the observations taken for this observation ID
# Open the URL and fetch the JSON document text into a string
obs_id = 'GS-2021A-DD-102-9'
url = f"https://archive.gemini.edu/jsonsummary/canonical/NotFail/{obs_id}" # can use either program ID or observation ID. With the former, you get all the observations under that program

u = urllib.request.urlopen(url)
jsondoc = u.read()
u.close()

# Decode the JSON
#files_summary = json.loads(jsondoc)  # dict format
files_summary = pd.DataFrame.from_dict(json.loads(jsondoc)) # pandas dataframe

In [6]:
# show all columns of the dataframe 
pd.set_option('display.max_columns', 100)

In [7]:
files_summary

Unnamed: 0,name,filename,path,compressed,file_size,data_size,file_md5,data_md5,lastmod,mdready,entrytime,size,md5,pending_ingest,program_id,engineering,science_verification,procmode,calibration_program,observation_id,data_label,telescope,instrument,ut_datetime,local_time,observation_type,observation_class,object,ra,dec,azimuth,elevation,cass_rotator_pa,airmass,filter_name,exposure_time,disperser,camera,central_wavelength,wavelength_band,focal_plane_mask,detector_binning,detector_gain_setting,detector_roi_setting,detector_readspeed_setting,detector_welldepth_setting,detector_readmode_setting,spectroscopy,mode,adaptive_optics,laser_guide_star,wavefront_sensor,gcal_lamp,raw_iq,raw_cc,raw_wv,raw_bg,requested_iq,requested_cc,requested_wv,requested_bg,qa_state,release,reduction,types,phot_standard
0,S20210219S0075.fits,S20210219S0075.fits.bz2,,True,4697027,14846400,241fbb9fd26f01d6c5c75bb34e3b277f,29367a37bb40b628a1a38db411aeb5c4,2021-02-19 08:55:35.535194+00:00,True,2021-02-19 08:55:35.560699+00:00,4697027,241fbb9fd26f01d6c5c75bb34e3b277f,False,GS-2021A-DD-102,False,False,,False,GS-2021A-DD-102-9,GS-2021A-DD-102-9-001,Gemini-South,GMOS-S,2021-02-19 08:14:01.700000,05:14:01.700000,OBJECT,science,AT2020caa,214.827395,0.057922,24.30986,57.498224,-189.181782,1.177,GG455,900.0,R400,GMOS-S,0.65,r,1.0arcsec,2x2,low,Full Frame,slow,,Classic,True,LS,False,False,PWFS2,,70,50,,50,70,70,100,80,Pass,2021-08-19,RAW,"{'SOUTH', 'SIDEREAL', 'UNPREPARED', 'SPECT', '...",
1,S20210219S0076.fits,S20210219S0076.fits.bz2,,True,4679182,14846400,976ccf6dbbbf23fe181f778f26fee548,28332d096ac069d692f2aa374f5e17e6,2021-02-19 08:55:24.760125+00:00,True,2021-02-19 08:55:24.782954+00:00,4679182,976ccf6dbbbf23fe181f778f26fee548,False,GS-2021A-DD-102,False,False,,False,GS-2021A-DD-102-9,GS-2021A-DD-102-9-002,Gemini-South,GMOS-S,2021-02-19 08:29:40.200000,05:29:39.700000,OBJECT,science,AT2020caa,214.826553,0.053842,17.260807,58.708008,-183.201241,1.165,GG455,900.0,R400,GMOS-S,0.65,r,1.0arcsec,2x2,low,Full Frame,slow,,Classic,True,LS,False,False,PWFS2,,70,50,,50,70,70,100,80,Pass,2021-08-19,RAW,"{'GEMINI', 'UNPREPARED', 'GMOS', 'SOUTH', 'SPE...",
2,S20210219S0077.fits,S20210219S0077.fits.bz2,,True,7639332,14849280,f59dc221d3d328a05931f01d1075096a,1fa46343d9971b6d93d3d2862c7cb01f,2021-02-19 08:55:13.982055+00:00,True,2021-02-19 08:55:14.011915+00:00,7639332,f59dc221d3d328a05931f01d1075096a,False,GS-2021A-DD-102,False,False,,False,GS-2021A-DD-102-9,GS-2021A-DD-102-9-003,Gemini-South,GMOS-S,2021-02-19 08:45:27.700000,05:45:26.700000,FLAT,partnerCal,GCALflat,214.826553,0.053842,9.723564,59.509594,-176.737662,1.16,GG455,1.0,R400,GMOS-S,0.65,r,1.0arcsec,2x2,low,Full Frame,slow,,Classic,True,LS,False,False,PWFS2,QH,70,50,,50,70,70,100,80,Pass,2021-02-19,RAW,"{'GCALFLAT', 'SOUTH', 'LS', 'FLAT', 'RAW', 'GM...",
3,S20210219S0078.fits,S20210219S0078.fits.bz2,,True,7706318,14849280,1e6821e12375f9edd087453953dafe24,9d472c0911310f7ca54122554b125336,2021-02-19 08:55:00.256967+00:00,True,2021-02-19 08:55:00.288761+00:00,7706318,1e6821e12375f9edd087453953dafe24,False,GS-2021A-DD-102,False,False,,False,GS-2021A-DD-102-9,GS-2021A-DD-102-9-004,Gemini-South,GMOS-S,2021-02-19 08:46:19.700000,05:46:18.700000,FLAT,partnerCal,GCALflat,214.826553,0.053842,9.300617,59.540915,-176.373689,1.16,GG455,1.0,R400,GMOS-S,0.66,r,1.0arcsec,2x2,low,Full Frame,slow,,Classic,True,LS,False,False,PWFS2,QH,70,50,,50,70,70,100,80,Pass,2021-02-19,RAW,"{'GCALFLAT', 'SOUTH', 'LS', 'FLAT', 'RAW', 'GM...",
4,S20210219S0079.fits,S20210219S0079.fits.bz2,,True,4734016,14846400,16e787a039618485ab980581b0d26d47,8719c470e2685389018542cfd3448f8f,2021-02-19 09:03:40.996314+00:00,True,2021-02-19 09:03:41.023211+00:00,4734016,16e787a039618485ab980581b0d26d47,False,GS-2021A-DD-102,False,False,,False,GS-2021A-DD-102-9,GS-2021A-DD-102-9-005,Gemini-South,GMOS-S,2021-02-19 08:47:11.700000,05:47:11.700000,OBJECT,science,AT2020caa,214.827395,0.057922,8.8766,59.566794,-176.009024,1.157,GG455,900.0,R400,GMOS-S,0.66,r,1.0arcsec,2x2,low,Full Frame,slow,,Classic,True,LS,False,False,PWFS2,,70,50,,50,70,70,100,80,Pass,2021-08-19,RAW,"{'GEMINI', 'UNPREPARED', 'GMOS', 'SOUTH', 'SPE...",


In [7]:
files_summary.columns

Index(['name', 'filename', 'path', 'compressed', 'file_size', 'data_size',
       'file_md5', 'data_md5', 'lastmod', 'mdready', 'entrytime', 'size',
       'md5', 'pending_ingest', 'program_id', 'engineering',
       'science_verification', 'procmode', 'calibration_program',
       'observation_id', 'data_label', 'telescope', 'instrument',
       'ut_datetime', 'local_time', 'observation_type', 'observation_class',
       'object', 'ra', 'dec', 'azimuth', 'elevation', 'cass_rotator_pa',
       'airmass', 'filter_name', 'exposure_time', 'disperser', 'camera',
       'central_wavelength', 'wavelength_band', 'focal_plane_mask',
       'detector_binning', 'detector_gain_setting', 'detector_roi_setting',
       'detector_readspeed_setting', 'detector_welldepth_setting',
       'detector_readmode_setting', 'spectroscopy', 'mode', 'adaptive_optics',
       'laser_guide_star', 'wavefront_sensor', 'gcal_lamp', 'raw_iq', 'raw_cc',
       'raw_wv', 'raw_bg', 'requested_iq', 'requested_cc', 'reque

In [8]:
# Inspect some useful keywords of the files 
for i in files_summary.index:
    F = files_summary.iloc[i]
    print (i+1, F['filename'], F['observation_type'], F['observation_class'], F['qa_state'], F['object'], F['exposure_time'])

1 S20210219S0075.fits.bz2 OBJECT science Pass AT2020caa 900.0
2 S20210219S0076.fits.bz2 OBJECT science Pass AT2020caa 900.0
3 S20210219S0077.fits.bz2 FLAT partnerCal Pass GCALflat 1.0
4 S20210219S0078.fits.bz2 FLAT partnerCal Pass GCALflat 1.0
5 S20210219S0079.fits.bz2 OBJECT science Pass AT2020caa 900.0


#### A note about associated calibrations for a particular program: a jsonsummary API *feature* doesn't exist for these files like that for the program/object files. 

**Let's download the observation data alongwith the required calibration files. Nice thing about the Gemini archive is, it does a smart association of the latter for a specific observation ID, so one doesn't need to scour for them**

In [9]:
if not Path("../../goa_download/").exists():
    os.mkdir("../../goa_download/")
    print (f"folder created to store data")
else:
    print (f"folder to store data already exists")

folder to store data already exists


In [10]:
# Let's define the URLs and local paths for downloading the necessary program data in bulk (as a tar archive)

url = f"https://archive.gemini.edu/download/canonical/NotFail/{obs_id}/OBJECT" 
tar_local = Path.cwd().as_posix() + "/../../goa_download/gmos.tar"
print(f"Raw data for observation ID {obs_id} will be stored here {tar_local}")

# for associated calibration files
url_cal = f"https://archive.gemini.edu/download/associated_calibrations/canonical/NotFail/{obs_id}"
tar_cal_local = Path.cwd().as_posix() + "/../../goa_download/gmos_cal.tar"
print(f"Associated calibration data for observation ID {obs_id} will be stored here {tar_cal_local}")

Raw data for observation ID GS-2021A-DD-102-9 will be stored here /Users/monika.soraisam/Desktop/tomdev/real_goats/goats/scripts/../../goa_download/gmos.tar
Associated calibration data for observation ID GS-2021A-DD-102-9 will be stored here /Users/monika.soraisam/Desktop/tomdev/real_goats/goats/scripts/../../goa_download/gmos_cal.tar


In [11]:
# Download the OBJECT files for the program
with urllib.request.urlopen(url) as response:
    with open(tar_local, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

# untar the file
shutil.unpack_archive(tar_local, Path(tar_local).parent) # destination is the parent directory of the tar (see above)
os.remove(tar_local)

print (f"Download of {tar_local} completed")

Download of /Users/monika.soraisam/Desktop/tomdev/real_goats/goats/scripts/../../goa_download/gmos.tar completed


In [12]:
# Download the associated calibration files for the observation ID 
with urllib.request.urlopen(url_cal) as response:
    with open(tar_cal_local, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)

# untar the file 
shutil.unpack_archive(tar_cal_local, Path(tar_cal_local).parent) # destination is the parent directory of the tar
os.remove(tar_cal_local)

print (f"Download of {tar_cal_local} completed")

Download of /Users/monika.soraisam/Desktop/tomdev/real_goats/goats/scripts/../../goa_download/gmos_cal.tar completed


In [13]:
print (f'The downloaded files: \n {os.listdir("../../goa_download/")}')

The downloaded files: 
 ['S20210220S0363.fits.bz2', 'S20210220S0362.fits.bz2', 'md5sums.txt', 'S20210211S0072.fits.bz2', 'S20210221S0278.fits.bz2', 'S20210221S0280.fits.bz2', 'S20210218S0135.fits.bz2', 'S20210214S0042.fits.bz2', 'S20210213S0060.fits.bz2', 'S20210217S0120.fits.bz2', 'S20210217S0121.fits.bz2', 'S20210223S0105.fits.bz2', 'S20210223S0104.fits.bz2', 'S20210223S0108.fits.bz2', 'S20210217S0119.fits.bz2', 'S20210217S0118.fits.bz2', 'S20210221S0286.fits.bz2', 'S20210213S0058.fits.bz2', 'S20210213S0059.fits.bz2', 'S20210218S0138.fits.bz2', 'S20210218S0139.fits.bz2', 'S20210219S0121.fits.bz2', 'S20210219S0120.fits.bz2', 'S20210214S0038.fits.bz2', 'S20210214S0039.fits.bz2', 'S20210211S0068.fits.bz2', 'S20210211S0069.fits.bz2', 'S20210219S0102.fits.bz2', 'S20210220S0364.fits.bz2', 'S20210220S0365.fits.bz2', 'S20210216S0123.fits.bz2', 'S20210216S0122.fits.bz2', 'S20210219S0077.fits.bz2', 'S20210219S0076.fits.bz2', 'S20210223S0106.fits.bz2', 'S20210223S0107.fits.bz2', 'S20210217S0117

In [14]:
# Note that the downloaded fits files are compressed (as bz2 files) as listed above. Let's decompress them
subp = subprocess.Popen(('bunzip2 {0}'.format(str(Path(tar_local).parent) + '/*.bz2')), shell=True).communicate()[0]
print (f"Downloaded fits files decompressed")

Downloaded fits files decompressed


### Let's inspect the metadata of these fits files using astrodata -- a library created by the DRAGONS team for working with fits files 

In [15]:
for F in list(Path(tar_local).parent.glob('*.fits')):
    ad = astrodata.open(F)
    # if ad.tags.intersection({'ACQUISITION'}):
    #     continue
    print (F.name, ad.object(), ad.tags) # the tags give an idea of what file is what

S20210221S0286.fits Bias {'BIAS', 'GEMINI', 'UNPREPARED', 'SOUTH', 'CAL', 'GMOS', 'RAW'}
S20210219S0121.fits Bias {'BIAS', 'GEMINI', 'UNPREPARED', 'SOUTH', 'CAL', 'GMOS', 'RAW'}
S20210211S0071.fits Bias {'BIAS', 'GEMINI', 'SOUTH', 'AZEL_TARGET', 'RAW', 'UNPREPARED', 'CAL', 'NON_SIDEREAL', 'AT_ZENITH', 'GMOS'}
S20210219S0101.fits CuAr {'GEMINI', 'SOUTH', 'SPECT', 'RAW', 'UNPREPARED', 'CAL', 'ARC', 'LS', 'GMOS'}
S20210213S0059.fits Bias {'BIAS', 'GEMINI', 'SOUTH', 'AZEL_TARGET', 'RAW', 'UNPREPARED', 'CAL', 'NON_SIDEREAL', 'AT_ZENITH', 'GMOS'}
S20210217S0119.fits Bias {'BIAS', 'GEMINI', 'UNPREPARED', 'SOUTH', 'CAL', 'GMOS', 'RAW'}
S20210223S0105.fits Bias {'BIAS', 'GEMINI', 'SOUTH', 'AZEL_TARGET', 'RAW', 'UNPREPARED', 'CAL', 'NON_SIDEREAL', 'AT_ZENITH', 'GMOS'}
S20210223S0104.fits Bias {'BIAS', 'GEMINI', 'SOUTH', 'AZEL_TARGET', 'RAW', 'UNPREPARED', 'CAL', 'NON_SIDEREAL', 'AT_ZENITH', 'GMOS'}
S20210217S0118.fits Bias {'BIAS', 'GEMINI', 'UNPREPARED', 'SOUTH', 'CAL', 'GMOS', 'RAW'}
S20210213

**Note that DRAGONS reduction happen per observation ID. When using observation ID to retrieve the data from the archive, *sometimes* the *associated calibration files* don't yield the corresponding standard star observations -- generally important for spectroscopic observations. What I've noticed is the latter are generally accessible at the program ID level, but that involves some filtering to get the appropriate standard star if there were multiple standard stars observed for the program.**

In [16]:
# check if standard star is present in the downloaded files, provided the mode of observations for the given observation ID is spectroscopy

## let's first check the mode of observations using the jsonsummary
check_mode = files_summary.query('observation_type == "OBJECT" and observation_class == "science"') ## filtering the science observations

print (f"Is {obs_id} in spectroscopy mode? {np.all(check_mode['spectroscopy'].values==True)}")

Is GS-2021A-DD-102-9 in spectroscopy mode? True


In [17]:
## Now, let's search the 'downloaded' progCal and partnerCal files for a standard star. Note that the latter always has observation_class as progCal or partnerCal
specphot_exists = False
for F in list(Path(tar_local).parent.glob('*.fits')):
    ad = astrodata.open(F)
    if (ad.observation_class() == "progCal" or ad.observation_class() == "partnerCal") and (ad.observation_type() == 'OBJECT'):
        specphot_exists = True
        break
        
print (f"specphot exists among downloaded raw data: {specphot_exists}")

specphot exists among downloaded raw data: False


**So the example observation ID above, unfortunately, doesn't have the specphot (i.e., the standard star) *associated* with it in the archive in the default calibrations linked to it. So, we need to attempt to find an appropriate one.** 

## Block below is used to query the archive to find *possible* specphots, aka, standard stars for the spectrosocpic observations

In [18]:
# Let's use astroquery to get a summary of possible standard stars 
data1 = Observations.query_criteria(
    instrument = check_mode['instrument'].values[0],
    observation_class = "partnerCal",
    observation_type = "OBJECT",
    utc_date = ((Time(check_mode.ut_datetime.values[0]) - 1.0 * uts.day).to_datetime(), 
                (Time(check_mode.ut_datetime.values[0]) + 1.0 * uts.day).to_datetime()
               ),
    mode = check_mode['mode'].values[0]
)

data2 = Observations.query_criteria(
    instrument = check_mode['instrument'].values[0],
    observation_class = "progCal",
    observation_type = "OBJECT",
    utc_date = ((Time(check_mode.ut_datetime.values[0]) - 1.0 * uts.day).to_datetime(), 
                (Time(check_mode.ut_datetime.values[0]) + 1.0 * uts.day).to_datetime()
               ),
    mode = check_mode['mode'].values[0]
)

query_result = vstack([data1, data2])
print (query_result[['detector_roi_setting','filename', 
             'observation_class', 'object', 'ut_datetime', 
             'observation_id', 'program_id', 'mode']])

detector_roi_setting         filename        observation_class  object ...   observation_id     program_id   mode
-------------------- ----------------------- ----------------- ------- ... ----------------- --------------- ----
    Central Spectrum S20210219S0084.fits.bz2        partnerCal LTT6248 ... GS-2021A-DD-102-5 GS-2021A-DD-102   LS
    Central Spectrum S20210219S0089.fits.bz2        partnerCal LTT6248 ...  GS-2021A-Q-301-5  GS-2021A-Q-301   LS


In [19]:
## We can see the first spehphot above has the same 'program ID' as our science case. We will need to download the raw data for the observation ID of this specphot -- 
## note that the specphot also comes with its own associated calibrations. Normally when the specphot is already linked to the science data, i.e., we didn't face this missing specphot case, 
## then even the calibration files of the specphot would have been automatically pulled from the archive with the science data

specphot_obs_id = query_result[query_result['program_id'] == check_mode['program_id'].values[0]]['observation_id'].data[0]
specphot_obs_id

'GS-2021A-DD-102-5'

## Helper download function

In [4]:
## Below I'm creating a helper function encompassing the whole download process

def download_public_goa(oid=None, dest=None):
    """
    Parameters
    ----------
    oid: str
        Observation ID
    dest: str
        Path of the local folder to store the downloaded data
    """
    if oid is None or dest is None:
        print ("Required parameters None; doing nothing")
        return
        
    if not Path(dest).exists():
        os.mkdir(dest)
        print (f"{dest} created to store data")
    else:
        print (f"{dest} to store data already exists")

    # Let's define the URLs and local paths for downloading the necessary program data in bulk (as a tar archive)
    url = f"https://archive.gemini.edu/download/canonical/NotFail/{oid}/OBJECT" 
    tar_local = Path(dest).as_posix() + "/gmos.tar"
    print(f"Raw data for observation ID {oid} will be stored here {tar_local}")
    
    # for associated calibration files
    url_cal = f"https://archive.gemini.edu/download/associated_calibrations/canonical/NotFail/{oid}"
    tar_cal_local = Path(dest).as_posix() + "/gmos_cal.tar"
    print(f"Associated calibration data for observation ID {oid} will be stored here {tar_cal_local}")


    # Download the OBJECT files for the program
    with urllib.request.urlopen(url) as response:
        with open(tar_local, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)
    
    # untar the file
    shutil.unpack_archive(tar_local, Path(tar_local).parent) # destination is the parent directory of the tar (see above)
    os.remove(tar_local)
    
    print (f"Download of {tar_local} completed")


    # Download the associated calibration files for the observation ID 
    with urllib.request.urlopen(url_cal) as response:
        with open(tar_cal_local, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)
    
    # untar the file 
    shutil.unpack_archive(tar_cal_local, Path(tar_cal_local).parent) # destination is the parent directory of the tar
    os.remove(tar_cal_local)
    
    print (f"Download of {tar_cal_local} completed")
    
    # Note that the downloaded fits files are compressed (as bz2 files). Let's decompress them
    subp = subprocess.Popen(('bunzip2 {0}'.format(str(Path(tar_local).parent) + '/*.bz2')), shell=True).communicate()[0]
    print (f"Downloaded fits files decompressed")


## Helper function to check if specphot for a spectroscopic observation ID has been included in the downloaded files from the archive

In [5]:
def check_specphot_present():
    check_mode = files_summary.query('observation_type == "OBJECT" and observation_class == "science"') ## filtering the science observations
    if np.all(check_mode['spectroscopy'].values==True) == False:
        print ("This is not a spectroscopic observation. Nothing to do")
        return
    
    ## Now, let's search the 'downloaded' progCal and partnerCal files for a standard star. Note that the latter always has observation_class as progCal or partnerCal
    specphot_exists = False
    for F in list(Path(tar_local).parent.glob('*.fits')):
        ad = astrodata.open(F)
        if (ad.observation_class() == "progCal" or ad.observation_class() == "partnerCal") and (ad.observation_type() == 'OBJECT'):
            specphot_exists = True
            break
            
    print (f"specphot exists among downloaded raw data: {specphot_exists}")

In [22]:
check_specphot_present()

specphot exists among downloaded raw data: False


### Let's download the raw data for the specphot identified above, now using the helper function

In [26]:
download_public_goa(oid=specphot_obs_id, dest="../../goa_download/")

../../goa_download/ to store data already exists
Raw data for observation ID GS-2021A-DD-102-5 will be stored here ../../goa_download/gmos.tar
Associated calibration data for observation ID GS-2021A-DD-102-5 will be stored here ../../goa_download/gmos_cal.tar
Download of ../../goa_download/gmos.tar completed
Download of ../../goa_download/gmos_cal.tar completed
Downloaded fits files decompressed


### Let's confirm that we are NO longer missing the specphot data

In [27]:
check_specphot_present()

specphot exists among downloaded raw data: True


# For accessing proprietary data

## All  the functionalities for downloading data are similar for propietary data, except the need to provide the GOA session cookie (while the user is logged in) as payload to the JSON API call. See the *download_proprietary_goa* function below. 

Note that the jsonsummary of proprietary data, however, is still public. 

**Let's use observation ID `GS-2022B-Q-234-18`, which is still in its proprietary phase.**

In [3]:
# Let's get the jsonsummary of the above proprietary observation ID
obs_id = 'GS-2022B-Q-234-18'
url = f"https://archive.gemini.edu/jsonsummary/canonical/NotFail/{obs_id}" # can use either program ID or observation ID. With the former, you get all the observations under that program

u = urllib.request.urlopen(url)
jsondoc = u.read()
u.close()

# Decode the JSON
#files_summary = json.loads(jsondoc)  # dict format
files_summary = pd.DataFrame.from_dict(json.loads(jsondoc)) # pandas dataframe

In [4]:
files_summary

Unnamed: 0,name,filename,path,compressed,file_size,data_size,file_md5,data_md5,lastmod,mdready,...,raw_bg,requested_iq,requested_cc,requested_wv,requested_bg,qa_state,release,reduction,types,phot_standard
0,S20221011S0017.fits,S20221011S0017.fits.bz2,,True,15410847,67201920,563462ae151e860df8ecacca804398c8,65e08819cb907b29cbfc1aee28cd728d,2022-10-11 07:57:30.848996+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'SIDEREAL', 'GEMINI', 'SOUTH', 'RAW', 'GSAOI'...",
1,S20221011S0018.fits,S20221011S0018.fits.bz2,,True,15407139,67201920,9aa7f539c6c974b2985ea7970f6b34c1,8c65fa83bc06f2501a08e5cb35efbdc2,2022-10-11 07:57:18.156915+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
2,S20221011S0019.fits,S20221011S0019.fits.bz2,,True,15410921,67201920,f7874ba6899ac6576a48384ceca58938,091a15d2db5d70f8d5c0cd34070d7597,2022-10-11 07:57:07.332847+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'SIDEREAL', 'GEMINI', 'SOUTH', 'RAW', 'GSAOI'...",
3,S20221011S0020.fits,S20221011S0020.fits.bz2,,True,15406151,67201920,de1afe8cd9ed3512853d810d2085dfb9,a9cf382f993cd7e95d24e9fd9ec55b82,2022-10-11 07:56:54.313765+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
4,S20221011S0021.fits,S20221011S0021.fits.bz2,,True,15402840,67201920,f47d0c6293d863387d8e2770481e743f,6ef31ea7395c156470dd2593fcdb1d88,2022-10-11 07:56:42.149688+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
5,S20221011S0022.fits,S20221011S0022.fits.bz2,,True,15397097,67201920,f54a0fbe88dad0ba1010e62759cbb7ef,a3974be64fe9666662aae6586321b654,2022-10-11 07:56:30.332613+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
6,S20221011S0023.fits,S20221011S0023.fits.bz2,,True,17829908,67201920,9f15ccdfb2a2675d4c65604ba0490008,f313b5d2189977e4bb2f2db11c82196e,2022-10-11 07:56:18.301537+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
7,S20221011S0024.fits,S20221011S0024.fits.bz2,,True,17822129,67201920,6eec84d062bbdb804342e833f8e50e14,1aed90c20da7e54c45550773f0252883,2022-10-11 07:56:06.581463+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
8,S20221011S0025.fits,S20221011S0025.fits.bz2,,True,17819853,67201920,223addeea2371a2229a22c5ee614fdd3,4033270ad805118739b86ab57d2684f5,2022-10-11 07:55:54.732388+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'GSAOI', 'IMAGE', 'GEMINI', 'UNPREPARED', 'SI...",
9,S20221011S0029.fits,S20221011S0029.fits.bz2,,True,17967717,67201920,8536b956f023b453d1c597e0f25b5034,7376f2e3e1f11af7c855bc9e6ed2d130,2022-10-11 07:55:41.179303+00:00,True,...,100,70,50,100,100,Usable,2023-10-11,RAW,"{'SIDEREAL', 'GEMINI', 'SOUTH', 'RAW', 'GSAOI'...",


In [5]:
## Below I'm creating a helper function encompassing the whole download process

def download_proprietary_goa(oid=None, dest=None, session_cookie=None):
    """
    Parameters
    ----------
    oid: str
        Observation ID
    dest: str
        Path of the local folder to store the downloaded data
    session_cookie: str
        GOA session cookie, while the user is logged into GOA
    """
    if oid is None or dest is None or session_cookie is None:
        print ("Required parameters None; doing nothing")
        return
        
    if not Path(dest).exists():
        os.mkdir(dest)
        print (f"{dest} created to store data")
    else:
        print (f"{dest} to store data already exists")

    # Let's define the URLs and local paths for downloading the necessary program data in bulk (as a tar archive)
    url = f"https://archive.gemini.edu/download/canonical/NotFail/{oid}/OBJECT" 
    tar_local = Path(dest).as_posix() + "/gmos.tar"
    print(f"Raw data for observation ID {oid} will be stored here {tar_local}")
    
    # for associated calibration files
    url_cal = f"https://archive.gemini.edu/download/associated_calibrations/canonical/NotFail/{oid}"
    tar_cal_local = Path(dest).as_posix() + "/gmos_cal.tar"
    print(f"Associated calibration data for observation ID {oid} will be stored here {tar_cal_local}")



    # Download the OBJECT files for the program, with authentication cookie
    cookies = f"gemini_archive_session={session_cookie}"
    request = urllib.request.Request(url, headers={'Cookie':cookies})
    with urllib.request.urlopen(request) as response:
        with open(tar_local, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)
    
    # untar the file
    shutil.unpack_archive(tar_local, Path(tar_local).parent) # destination is the parent directory of the tar (see above)
    os.remove(tar_local)
    
    print (f"Download of {tar_local} completed")


    # Download the associated calibration files for the observation ID
    request = urllib.request.Request(url_cal, headers={'Cookie':cookies})
    with urllib.request.urlopen(request) as response:
        with open(tar_cal_local, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)
    
    # untar the file 
    shutil.unpack_archive(tar_cal_local, Path(tar_cal_local).parent) # destination is the parent directory of the tar
    os.remove(tar_cal_local)
    
    print (f"Download of {tar_cal_local} completed")
    
    # Note that the downloaded fits files are compressed (as bz2 files). Let's decompress them
    subp = subprocess.Popen(('bunzip2 {0}'.format(str(Path(tar_local).parent) + '/*.bz2')), shell=True).communicate()[0]
    print (f"Downloaded fits files decompressed")


In [33]:
my_session_cookie = "blahblah=="

In [32]:
download_proprietary_goa(oid="GS-2022B-Q-234-18", dest="../../goa_download/", session_cookie=my_session_cookie)

../../goa_download/ to store data already exists
Raw data for observation ID GS-2022B-Q-234-18 will be stored here ../../goa_download/gmos.tar
Associated calibration data for observation ID GS-2022B-Q-234-18 will be stored here ../../goa_download/gmos_cal.tar
Download of ../../goa_download/gmos.tar completed
Download of ../../goa_download/gmos_cal.tar completed
Downloaded fits files decompressed
