# Downloads data from APPLAUSE  

##  WORK IN PROGRESS

This script automatically downloads from the APPLAUSE archive, all data necessay to run the analysis scripts on a plate sequence.

For each entry (plate ID) in a sequence, we need  to download two kinds of data: plate scans, and source tables. Analysis requires two scans and four source tables per pair of exposures:

- plate scan 1 
- plate scan 2
- sources 1
- sources calib 1
- sources 2
- sources calib 2

Data is downloaded only when not already physically present in the data directory (DATADIR path in file *settings.py*).

In [1]:
import os
from pathlib import Path
import shutil
from importlib import reload
from urllib.parse import urlsplit
import requests
import json

import pyvo as vo

from astropy.io import fits
from astropy.utils.data import download_file
from astropy.table import Table

from applause_token import token
from settings import DATAPATH, get_parameters, get_table_sources, current_dataset, images, fname, sequences

## Functions

In [2]:
def download_scan(table, plate_id, test=False):
    
    print("Downloading scan for plate: ", plate_id' "...")

    # get URL for this plate ID 
    mask = table['plate_id'] == plate_id
    table_1 = table[mask]
    url_1 = table_1['filename_scan'][0]
    
    # get file name from URL
    parsed_url = urlsplit(url_1)
    filename = parsed_url.path.split('/')[-1]
    
    # if file already exists in data storage, bail out
    file_path = Path(os.path.join(DATAPATH, filename))
    if file_path.is_file():
        print("Image scan for plate: ", plate_id, " already in storage.")
        return

    # Download the file and get the local filename/path
    local_file_path = download_file(url_1)
    if test:
        print(f"File downloaded to: {local_file_path}")

    # Move and rename file into data directory
    new_path = shutil.move(local_file_path, os.path.join(DATAPATH, filename))
    print("Image scan for plate: ", plate_id, " downloaded and written to:  ", new_path)

    # TEST: open the local FITS file using the obtained path
    if test:
        with fits.open(new_path) as hdul:
            # Access the header information or data
            header_info = hdul[0].header
            print(f"TEST: telescope used: {header_info.get('TELESCOP')}")
            print(f"TEST: date: {header_info.get('DATE-AVG')}")

    # once image is successfully downloaded, we need to update the 'images.json' 
    # dictionary with the new association "plate id: file name" entry.
    images[str(plate_id)] = filename        
            
    try:
        json_file = open('images.json', 'w')
        json.dump(images, json_file, indent=4)
    except IOError as e:
        print(f"Error writing to file: {e}")

In [3]:
def download_sources_table(plate_id, calib=False):
    '''
    This function requires that a 'token' variable be present in the namespace.

    The usual way to do this is to create a 1-line .py import file named 
    'applause_token'. The file will define the variable with the token definition 
    you got from APPLAUSE (look for the 'API token' entry in the dropdown menu 
    from your login name in the APPLAUSE web page). Something like:
    
    token = '2a2223453fdf3eq89aweafs6da05415hhh1d4369193e'
    
    '''
    # printout helper
    cal_suffix = ""
    if calib:
        cal_suffix = "_calib"

    # If table already exists in data storage, bail out.
    # We check just one table of the set of four, assuming 
    # that it is an error if one or more of the four tables
    # are missing.
    table_name = fname(get_table_sources(plate_id, calib=calib))
    file_path = Path(table_name)
    if file_path.is_file():
        print("Table sources" + cal_suffix + " for plate: ", plate_id, " already in storage.")
        return

    print("Downloading table sources" + cal_suffix + " for plate: ", plate_id, "...")

    # setup
    name = 'APPLAUSE',
    url = 'https://www.plate-archive.org/tap'
    ap_token = 'Token ' + token

    # assemble query string
    qstr = 'SELECT * FROM applause_dr4.source'
    if calib:
        qstr = qstr + '_calib'
    qstr = qstr + ' WHERE plate_id = ' + str(plate_id) + ' ORDER BY source_id'

#     print('\npyvo version %s \n' % vo.__version__)
#     print('TAP service %s \n' % name)

    # session mechanics
    tap_session = requests.Session()
    tap_session.headers['Authorization'] = ap_token

    tap_service = vo.dal.TAPService(url, session=tap_session)

    lang = 'PostgreSQL'

    job = tap_service.submit_job(qstr, language=lang)
    job.run()

    job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=7200000.) # 2-hr queue

    try:
        job.raise_if_error()
    except Exception as e:
        print(e)
        return
    
    results = job.fetch_result()

    # save an astropy table in csv format (for backwards compatibility)
    result_table = results.to_table()
    
    table_name = get_table_sources(plate_id)
    file_name = fname(table_name)
    results.write(file_name, format='csv', overwrite=True)
    
    print("Table", table_name, + " downloaded and saved.")

## Define catalog and sequence

In [4]:
catalog_applause = Table.read('footprints_5.csv', format='ascii.csv')

sequence = sequences['seq 11']
print(sequence)

[9534, 9535, 9536, 9537, 9538, 9539, 9540, 9543]


## Download

In [5]:
# for plate_id in sequence:
    
#     download_scan(catalog_applause, plate_id)
#     download_sources_table(plate_id)
#     download_sources_table(plate_id, calib=True)



# debug --  applause seems to be broken....

download_sources_table(9534)


DALQueryError: Query Error: <No useful error from server>