<hr style="border:2px solid #0281c9"> </hr>

<img align="left" alt="ESO Logo" src="http://archive.eso.org/i/esologo.png">  

<div align="center">
  <h1 style="color: #0281c9; font-weight: bold;">ESO Science Archive</h1> 
  <h2 style="color: #0281c9; font-weight: bold;">Jupyter Notebooks</h2>
</div>

<hr style="border:2px solid #0281c9"> </hr>

## **Download all raw data for a given reduced data product**

The below example is to download all the raw data used to produce one of the reduced products in the `195.B-0283` survey. 

---
**Note:** See `ESO_Introduction` and `ESO_Query` notebooks for more basic usage.

<hr style="border:2px solid #0281c9"> </hr>

In [1]:
# downloadURL(file_url[, dirname, filename, session]): Method to download a file given its URL,
# either anonymously or with a token.
# Returns: http status, filepath on disk (if successul)
import cgi
import os
import sys
import requests
def downloadURL(file_url, dirname='./data/', filename=None, session=None):
    """Method to download a file, either anonymously (no session or session not "tokenized"), or authenticated (if session with token is provided).
       It returns: http status, and filepath on disk (if successful)"""

    if dirname != None:
        if not os.access(dirname, os.W_OK):
            print("ERROR: Provided directory (%s) is not writable" % (dirname))
            sys.exit(1)
      
    if session!=None:
        response = session.get(file_url, stream=True)
    else:
        # no session -> no authentication
        response = requests.get(file_url, stream=True)

    # If not provided, define the filename from the response header
    if filename == None:
        contentdisposition = response.headers.get('Content-Disposition')
        if contentdisposition != None:
            value, params = cgi.parse_header(contentdisposition)
            filename = params["filename"]

        # if the response header does not provide a name, derive a name from the URL
        if filename == None:
            # last chance: get anything after the last '/'
            filename = file_url[file_url.rindex('/')+1:]

    # define the file path where the file is going to be stored
    if dirname == None:
        filepath = filename
    else:
        filepath = dirname + '/' + filename

    if response.status_code == 200:
        with open(filepath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=50000):
                f.write(chunk)

    return (response.status_code, filepath)

In [2]:
import astroquery # import astroquery
print(f"astroquery version: {astroquery.__version__}") # check the version of astroquery

from astroquery.eso import Eso # import the ESO module from astroquery
eso = Eso() # create an instance of the ESO class 

instrument = "HAWKI"       # the instrument to HAWKI
prog_id    = "195.B-0283"  # the program ID
dp_cat     = "SCIENCE"     # the data product category to SCIENCE

eso.maxrec = 3    # For this example we limit the number of records to 3

table_reduced = eso.query_surveys(prog_id) # query the survey with the program ID
dp_ip = table_reduced[0]["dp_id"] # get the dp_id of the first data product

header = eso.get_headers([dp_ip]) # get the headers of the data products in the table
dp_ips_raw = [header[keyword].value[0] for keyword in header.colnames if keyword.startswith("PROV")] # get the dp_id of the raw data products

# data_files = eso.retrieve_data(dp_ips_raw[0]) # download the first raw data product (to test)
# data_files = eso.retrieve_data(dp_ips_raw) # download raw data 

astroquery version: 0.4.11.dev10290


  warn("Partial result set. Potential causes MAXREC, async storage space, etc.",


In [3]:
table_raw = eso.query_main(instrument, column_filters={"dp_id": dp_ips_raw[10]})[0]
# table_raw = eso.query_main("SPHERE", column_filters={"prog_id": "'098.C-0739(C)'", "dp_cat": "SCIENCE"})[0]
table_raw

object,ra,dec,dp_id,date_obs,prog_id,access_estsize,access_url,datalink_url,dec_pnt,det_chip1id,det_chop_ncycles,det_dit,det_expid,det_ndit,dp_cat,dp_tech,dp_type,ecl_lat,ecl_lon,exp_start,exposure,filter_path,gal_lat,gal_lon,grat_path,gris_path,ins_mode,instrument,lambda_max,lambda_min,last_mod_date,mjd_obs,ob_id,ob_name,obs_mode,origfile,period,pi_coi,prog_title,prog_type,ra_pnt,release_date,s_region,slit_path,target,tel_airm_end,tel_airm_start,tel_alt,tel_ambi_fwhm_end,tel_ambi_fwhm_start,tel_ambi_pres_end,tel_ambi_pres_start,tel_ambi_rhum,tel_az,telescope,tpl_expno,tpl_id,tpl_name,tpl_nexp,tpl_seqno,tpl_start
Unnamed: 0_level_1,deg,deg,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,kbyte,Unnamed: 7_level_1,Unnamed: 8_level_1,deg,Unnamed: 10_level_1,Unnamed: 11_level_1,s,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,deg,deg,Unnamed: 20_level_1,s,Unnamed: 22_level_1,deg,deg,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,nm,nm,Unnamed: 31_level_1,d,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,deg,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,deg,arcsec,arcsec,hPa,hPa,%,deg,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1
object,float64,float64,object,object,object,int64,object,object,float64,object,int16,float32,int16,int16,object,object,object,float64,float64,object,float32,object,float64,float64,object,object,object,object,float64,float64,object,float64,int32,object,object,object,int16,object,object,int32,float64,object,object,object,object,float32,float32,float32,float32,float32,float32,float32,float32,float32,object,int32,object,object,int32,int32,object
FAST-SGRA-10,266.55260305,-28.81358,HAWKI.2015-06-08T04:27:11.526,2015-06-08T04:27:11.5263,195.B-0283(A),188713,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:27:11.526,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,-28.81358,,--,1.2621,28368,20,SCIENCE,"IMAGE,HIT",OBJECT,-5.410595,273.033689,2015-06-08T04:27:11.527Z,25.242,"J,OPEN",-0.046583,0.172024,,,IMG,HAWKI,1335.0,1181.0,2015-06-08T13:32:04.260Z,57181.18555007,200357897,FAST_GC-10-J,v,HAWKI_IMG_OBS_FAST159_0287.fits,95,SCHOEDEL/ NEUMAYER/ NOGUERAS/ GALLEGO/ GALLEGO/ DONG/ NAJARRO/ FELDMEIER/ NISHIYAMA/ GIRARD,THE FINGERPRINT OF A GALACTIC NUCLEUS: A $0.2$-RESOLUTION $JHK$ IMAGING SURVEY OF THE CENTRE OF THE MILKY WAY,4,266.552603,2016-06-08T13:32:03.080Z,POSITION J2000 266.552603 -28.81358,,FAST-SGRA-10,1.027,1.028,76.673,1.8,1.8,745.53,745.52,6.0,291.408,ESO-VLT-U4,1,HAWKI_img_obs_FastPhot,Imaging fast photometry with jitter (no offsets,39,2,2015-06-08T04:26:55


In [4]:
# For the sake of this example, let's just consider the first science raw frame:
datalink_url = table_raw['datalink_url']
datalink_url

'https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526'

In [5]:
import pyvo
datalink = pyvo.dal.adhoc.DatalinkResults.from_result_url(datalink_url)
datalink.to_table()

ID,access_url,service_def,error_message,semantics,description,content_type,content_length,eso_origfile,eso_category,eso_datalink
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,byte,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
object,object,object,object,object,object,object,int64,object,object,object
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:27:11.526,,,#this,Requested file,application/fits,188713993,HAWKI.2015-06-08T04:27:11.526.fits.Z,,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/ADP.2021-04-30T12:05:58.577,,,#derivation,Science product derived from the requested file,image/fits,46088640,F10_chip1_J.fits,SCIENCE.IMAGE,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?ADP.2021-04-30T12:05:58.577
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/ADP.2021-04-30T12:05:58.579,,,#derivation,Science product derived from the requested file,image/fits,46088640,F10_chip2_J.fits,SCIENCE.IMAGE,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?ADP.2021-04-30T12:05:58.579
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/ADP.2021-04-30T12:05:58.581,,,#derivation,Science product derived from the requested file,image/fits,46088640,F10_chip3_J.fits,SCIENCE.IMAGE,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?ADP.2021-04-30T12:05:58.581
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/ADP.2021-04-30T12:05:58.583,,,#derivation,Science product derived from the requested file,image/fits,46088640,F10_chip4_J.fits,SCIENCE.IMAGE,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?ADP.2021-04-30T12:05:58.583
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://archive.eso.org/calselector/v1/associations?dp_id=HAWKI.2015-06-08T04:27:11.526&mode=Raw2Raw&responseformat=votable,,,http://archive.eso.org/rdf/datalink/eso#calSelector_raw2raw,"List of access points of all the raw calibrations associated to the provided input raw file, and siblings (if any)",application/x-votable+xml,500000,,,
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://archive.eso.org/calselector/v1/associations?dp_id=HAWKI.2015-06-08T04:27:11.526&mode=Raw2Master&responseformat=votable,,,http://archive.eso.org/rdf/datalink/eso#calSelector_raw2master,"List of access points of all the master calibrations associated to the provided input raw file, and siblings (if any)",application/x-votable+xml,500000,,,
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:27:11.526.NL,,,http://archive.eso.org/rdf/datalink/eso#night_log,"The Night Log report contains comments on any issues that could have happened during the observation (e.g. instrument problems), as well as the information about the ambient conditions (airmass, seeing, transparency, etc.). The content_length provides only an order of magnitude of the file size.",text/plain,10000,HAWKI.2015-06-08T04:27:11.526.NL.txt,Night Log,


In [6]:
# Let's get the link to the processed calibration files (raw2master)

semantics = 'http://archive.eso.org/rdf/datalink/eso#calSelector_raw2master'

raw2master_url = next(datalink.bysemantics( semantics )).access_url
raw2master_url

'https://archive.eso.org/calselector/v1/associations?dp_id=HAWKI.2015-06-08T04:27:11.526&mode=Raw2Master&responseformat=votable'

In [7]:
associated_calib_files = pyvo.dal.adhoc.DatalinkResults.from_result_url(raw2master_url)
associated_calib_files.to_table()[:3]

ID,access_url,service_def,error_message,semantics,description,content_type,content_length,eso_category,eso_datalink
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,byte,Unnamed: 8_level_1,Unnamed: 9_level_1
object,object,object,object,object,object,object,int64,object,object
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:27:11.526,,,#this,"category=""HIT_SCIENCE_IMG"" certified=""false"" complete=""true"" mode=""Raw2Raw"" type=""main"" messages=""""",application/fits,188713993,HIT_JITTER_OBS,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:27:50.266,,,http://archive.eso.org/rdf/datalink/eso#sibling_raw,Any raw science file that needs to/can be calibrated together with the science raw file provided in input,application/fits,189104547,HIT_JITTER_OBS,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:27:50.266
ivo://eso.org/ID?HAWKI.2015-06-08T04:27:11.526,https://dataportal.eso.org/dataPortal/file/HAWKI.2015-06-08T04:28:29.008,,,http://archive.eso.org/rdf/datalink/eso#sibling_raw,Any raw science file that needs to/can be calibrated together with the science raw file provided in input,application/fits,188650159,HIT_JITTER_OBS,https://archive.eso.org/datalink/links?ID=ivo://eso.org/ID?HAWKI.2015-06-08T04:28:29.008


In [8]:
# create and use a mask to get only the #calibration entries,
# given that other entries, like #this or ...#sibiling_raw, could be present:
calibrator_mask = associated_calib_files['semantics'] == '#calibration'
calib_urls = associated_calib_files.to_table()[calibrator_mask]['access_url','eso_category']
calib_urls[:3]  # show the first 3 calibration files

access_url,eso_category
object,object
https://dataportal.eso.org/dataPortal/file/M.HAWKI.2017-05-05T10:55:25.030,MASTER_2MASS_CATALOGUE_ASTROM
https://dataportal.eso.org/dataPortal/file/M.HAWKI.2017-05-05T10:55:03.200,MASTER_2MASS_CATALOGUE_PHOTOM
https://dataportal.eso.org/dataPortal/file/M.HAWKI.2017-01-10T15:19:36.363,SCHLEGEL_MAP_NORTH


In [11]:
##   - printCalselectorInfo(description, mode_requested): method that returns
##     possible alerts and warnings on the obtained calibration cascade,
##     while printing most relevant info.

# calselectorInfo(description): [internal] parsing a calselector description 
import re
def calselectorInfo(description):
    """Parse the main calSelector description, and fetch: category, complete, certified, mode, and messages."""

    category=""
    complete=""
    certified=""
    mode=""
    messages=""

    m = re.search('category="([^"]+)"', description)
    if m:
        category=m.group(1)
    m = re.search('complete="([^"]+)"', description)
    if m:
        complete=m.group(1).lower()
    m = re.search('certified="([^"]+)"', description)
    if m:
        certified=m.group(1).lower()
    m = re.search('mode="([^"]+)"', description)
    if m:
        mode=m.group(1).lower()
    m = re.search('messages="([^"]+)"', description)
    if m:
        messages=m.group(1)

    return category, complete, certified, mode, messages


def printCalselectorInfo(description, mode_requested):
    """Print the most relevant params contained in the main calselector description."""

    category, complete, certified, mode_executed, messages = calselectorInfo(description)

    alert=""
    if complete!= "true":
        alert = "ALERT: incomplete calibration cascade"

    mode_warning=""
    if mode_executed != mode_requested:
        mode_warning = "WARNING: requested mode (%s) could not be executed" % (mode_requested)

    certified_warning=""
    if certified != "true":
        certified_warning = "WARNING: certified=\"%s\"" %(certified)

    print("    calibration info:")
    print("    ------------------------------------")
    print("    science category=%s" % (category))
    print("    cascade complete=%s" % (complete))
    print("    cascade messages=%s" % (messages))
    print("    cascade certified=%s" % (certified))
    print("    cascade executed mode=%s" % (mode_executed))
    print("    full description: %s" % (description))

    return alert, mode_warning, certified_warning

In [12]:
# Given the above list of "associated_calib_files"
# and knowing that we requested...
mode_requested = "raw2master"

# ... let's print out some important info and warnings on the received calibration cascade: 
# - is the cascade complete? 
# - is the cascade certified?
# - has the cascade being generated for the mode you requested (processed calibrations) or not?

# That info is embedded in the description field of the #this record.
# We use the printCalselectorInfo of the eso_programmatic.py to parse/make sense of it.

this_description = next(associated_calib_files.bysemantics('#this')).description

alert, mode_warning, certified_warning = printCalselectorInfo(this_description, mode_requested)

if alert!="":
    print("%s" % (alert))
if mode_warning!="":
    print("%s" % (mode_warning))
if certified_warning!="":
    print("%s" % (certified_warning))
    
question = None
answer = None
if len(calib_urls):
    print()
    if alert or mode_warning or certified_warning:    
        question = "Given the above warning(s), do you still want to download these %d calib files [y/n]? " %(len(calib_urls))
    else:
        question = "No warnings reported, do you want to download these %d calib files [y/n]? " %(len(calib_urls))

    calibration info:
    ------------------------------------
    science category=HIT_SCIENCE_IMG
    cascade complete=true
    cascade messages=
    cascade certified=false
    cascade executed mode=raw2raw
    full description: category="HIT_SCIENCE_IMG" certified="false" complete="true" mode="Raw2Raw" type="main" messages=""



In [13]:
print("Downloading the %d calibration reference files..." % (len(calib_urls)) )

i_calib=0
for url,category in calib_urls:
    i_calib+=1
    status, filename = downloadURL(url, )
    if status==200:
        print("    CALIB: %4d/%d dp_id: %s (%s) downloaded"  % (i_calib, len(calib_urls), filename, category))
    else:
        print("    CALIB: %4d/%d dp_id: %s (%s) NOT DOWNLOADED (http status:%d)"  % (i_calib, len(calib_urls), filename, category, status))

Downloading the 51 calibration reference files...
    CALIB:    1/51 dp_id: ./data//M.HAWKI.2017-05-05T10:55:25.030.fits (MASTER_2MASS_CATALOGUE_ASTROM) downloaded
    CALIB:    2/51 dp_id: ./data//M.HAWKI.2017-05-05T10:55:03.200.fits (MASTER_2MASS_CATALOGUE_PHOTOM) downloaded


KeyboardInterrupt: 

In [None]:
association_tree_semantics = 'http://archive.eso.org/rdf/datalink/eso#calSelector_raw2master'

# Notice that the datalink service and the calselector service use the same semantics
# to indicate two different things:
# - in datalink: it points to the distinct list of calibration reference files (responseformat=votable);
#                its eso_category is not defined
# - in calselector: it points to the calibration cascade description (format still XML but not votable);
#                its eso_category is set to "ASSOCIATION_TREE"

association_tree_mask = associated_calib_files['semantics'] == association_tree_semantics
association_tree = associated_calib_files.to_table()[association_tree_mask]['access_url','eso_category']

for url, category in association_tree:
    # the url points to the calselector service, which, for metadata protected files, needs a tokenised-session
    status, filename = downloadURL(url)
    print(url)
    if status == 200:
        print("  Association tree: %s (%s) downloaded"  % (filename, category))
    else:
        print("  Association tree: %s (%s) NOT DOWNLOADED (http status:%d)"  % (filename, category, status))

https://archive.eso.org/calselector/v1/associations?dp_id=SPHER.2016-09-26T03:04:09.308&mode=Raw2Master
  Association tree: ./data//SPHER.2016-09-26T03:04:09.308_raw2master.xml (ASSOCIATION_TREE) downloaded


<hr style="border:2px solid #0281c9"> </hr>