# Downloading Exoplanet Data Solutions

***

This notebook contains solutions to the exercises described in the Exercises section of `downloading_exoplanet_data.ipynb`.

In [3]:
import ast
import sys
import os
import json
import pprint
import urllib.request
from urllib.parse import quote as urlencode

import numpy as np
import requests
from astropy.table import Table
from astroquery.mast import Observations

# Exercise 1
*Use the MAST API to sort Swift data on WASP-12 b by PI and download it.*

We begin with much of the same procedure as the tutorial notebook.

In [4]:
object_of_interest = 'WASP-12 b'

resolver_request = {'service':'Mast.Name.Lookup',
                     'params':{'input':object_of_interest,
                               'format':'json'},
                     }

# encoding the request as a json string
req_string = json.dumps(resolver_request)
req_string = urlencode(req_string)

In [5]:
pp = pprint.PrettyPrinter(indent=4)

In [6]:
object_of_interest = 'WASP-12 b'

resolver_request = {'service':'Mast.Name.Lookup',
                     'params':{'input':object_of_interest,
                               'format':'json'},
                     }

# encoding the request as a json string
req_string = json.dumps(resolver_request)
req_string = urlencode(req_string)

In [7]:
# retrieve our system's Python version for the request. 
version = ".".join(map(str, sys.version_info[:3]))

# create HTTP Header Variables
headers = {"Content-type": "application/x-www-form-urlencoded",
           "Accept": "text/plain",
           "User-agent":"python-requests/"+version}


In [8]:
# base API url
request_url='https://mast.stsci.edu/api/v0/invoke'    

# perform the HTTP request
resp = requests.post(request_url, data="request="+req_string, headers=headers)

In [9]:
resolved_object = resp.json()

pp.pprint(resolved_object)

{   'resolvedCoordinate': [   {   'cached': False,
                                  'canonicalName': 'WASP-12 b',
                                  'decl': 29.672277777989713,
                                  'ra': 97.6366416613261,
                                  'resolver': 'EXO',
                                  'resolverTime': 79,
                                  'searchRadius': 0.000333,
                                  'searchString': 'wasp-12 b'}],
    'status': ''}


In [10]:
obj_ra = resolved_object['resolvedCoordinate'][0]['ra']
obj_dec = resolved_object['resolvedCoordinate'][0]['decl']

obj_ra, obj_dec

(97.6366416613261, 29.672277777989713)

In [11]:
mast_request = {'service':'Mast.Caom.Cone',
                'params':{'ra':obj_ra,
                          'dec':obj_dec,
                          'radius':0.2},
                'format':'json',
                'pagesize':2000,
                'page':1,
                'removenullcolumns':True,
                'removecache':True}


# encoding the request as a json string
req_string = json.dumps(mast_request)
req_string = urlencode(req_string)

# perform the HTTP request
resp = requests.post(request_url, data="request="+req_string, headers=headers)

# decode the HTTP result
mast_data = resp.json()


print(mast_data.keys())
print("Query status:",mast_data['status'])

dict_keys(['status', 'msg', 'data', 'fields', 'paging'])
Query status: COMPLETE


In [14]:
mast_data_table = Table()

col_type_list = [(field['name'], field['type']) for field in mast_data['fields']]

for col, atype in col_type_list:
    
    # recast data types
    if atype=="string":
        atype="str"
    if atype=="boolean":
        atype="bool"
    
    # create column in Table corresponding to the mast_data field
    mast_data_table[col] = np.array([x.get(col, None) for x in mast_data['data']],dtype=atype)
    
print(mast_data_table)

intentType obs_collection provenance_name instrument_name project filters ... dataRights mtFlag srcDen  obsid       distance     _selected_
---------- -------------- --------------- --------------- ------- ------- ... ---------- ------ ------ -------- ---------------- ----------
   science           TESS            SPOC      Photometer    TESS    TESS ...     PUBLIC  False    nan 27266912              0.0      False
   science           TESS            SPOC      Photometer    TESS    TESS ...     PUBLIC  False    nan 65432622              0.0      False
   science           TESS            SPOC      Photometer    TESS    TESS ...     PUBLIC  False    nan 68942182              0.0      False
   science           TESS            SPOC      Photometer    TESS    TESS ...     PUBLIC  False    nan 71308547              0.0      False
   science           TESS            SPOC      Photometer    TESS    TESS ...     PUBLIC  False    nan 71741843              0.0      False
   science          

In [15]:
mast_data_table.columns

<TableColumns names=('intentType','obs_collection','provenance_name','instrument_name','project','filters','wavelength_region','target_name','target_classification','obs_id','s_ra','s_dec','dataproduct_type','proposal_pi','calib_level','t_min','t_max','t_exptime','em_min','em_max','obs_title','t_obs_release','proposal_id','proposal_type','sequence_number','s_region','jpegURL','dataURL','dataRights','mtFlag','srcDen','obsid','distance','_selected_')>

Instead of next sorting by `t_min`, though, we sort by PI.

In [17]:
mast_data_table.sort('proposal_pi')

In [19]:
print(mast_data_table)

intentType obs_collection provenance_name instrument_name project filters ... dataRights mtFlag srcDen  obsid       distance     _selected_
---------- -------------- --------------- --------------- ------- ------- ... ---------- ------ ------ -------- ---------------- ----------
   science    SPITZER_SHA    SSC Pipeline            IRAC    None   IRAC1 ...     PUBLIC  False    nan  1696175              0.0      False
   science    SPITZER_SHA    SSC Pipeline            IRAC    None   IRAC3 ...     PUBLIC  False    nan  1696175              0.0      False
   science    SPITZER_SHA    SSC Pipeline            IRAC    None   IRAC2 ...     PUBLIC  False    nan  1696175              0.0      False
   science    SPITZER_SHA    SSC Pipeline            IRAC    None   IRAC4 ...     PUBLIC  False    nan  1696175              0.0      False
   science          SWIFT            None            UVOT    None    UVM2 ...     PUBLIC  False 5885.0  1513232              0.0      False
   science          

In [21]:
recent_index = -1
interesting_observation = mast_data_table[mast_data_table["obs_collection"] == "SWIFT"][recent_index]
print("Observation:",
      [interesting_observation[x] for x in ['dataproduct_type', 'obs_collection', 'instrument_name']])

Observation: ['cube', 'SWIFT', 'UVOT']


In [23]:
obsid = interesting_observation['obsid']

product_request = {'service':'Mast.Caom.Products',
                  'params':{'obsid':obsid},
                  'format':'json',
                  'pagesize':100,
                  'page':1}   

# encoding the request as a json string
req_string = json.dumps(product_request)
req_string = urlencode(req_string)

# perform the HTTP request
resp = requests.post(request_url, data="request="+req_string, headers=headers)

# dfrecode the HTTP result
obs_products = resp.json()

print("Number of data products:", len(obs_products["data"]))
print("Product information column names:")
pp.pprint(obs_products['fields'])

Number of data products: 5
Product information column names:
[   {'name': 'obsID', 'type': 'string'},
    {'name': 'obs_collection', 'type': 'string'},
    {'name': 'dataproduct_type', 'type': 'string'},
    {'name': 'obs_id', 'type': 'string'},
    {'name': 'description', 'type': 'string'},
    {'name': 'type', 'type': 'string'},
    {'name': 'dataURI', 'type': 'string'},
    {'name': 'productType', 'type': 'string'},
    {'name': 'productGroupDescription', 'type': 'string'},
    {'name': 'productSubGroupDescription', 'type': 'string'},
    {'name': 'productDocumentationURL', 'type': 'string'},
    {'name': 'project', 'type': 'string'},
    {'name': 'prvversion', 'type': 'string'},
    {'name': 'proposal_id', 'type': 'string'},
    {'name': 'productFilename', 'type': 'string'},
    {'name': 'size', 'type': 'int'},
    {'name': 'parent_obsid', 'type': 'string'},
    {'name': 'dataRights', 'type': 'string'},
    {'name': 'calib_level', 'type': 'int'},
    {'name': '_selected_', 'type': 

In [24]:
pp.pprint([x.get('productType',"") for x in obs_products["data"]])

['PREVIEW', 'SCIENCE', 'SCIENCE', 'SCIENCE', 'SCIENCE']


In [25]:
sci_prod_arr = [x for x in obs_products['data'] if x.get("productType", None) == 'SCIENCE']
science_products = Table()

col_type_list = [(field['name'], field['type']) for field in obs_products['fields']]

for col, atype in col_type_list:
    
    # recast data types
    if atype=="string":
        atype="str"
    if atype=="boolean":
        atype="bool"
    if atype == "int":
        atype = "float" # array may contain nan values, and they do not exist in numpy integer arrays
        
    # Create column in Table corresponding to the mast_data field
    science_products[col] = np.array([x.get(col,None) for x in sci_prod_arr],dtype=atype)

print("Number of science products:",len(science_products))
print(science_products)

Number of science products: 4
 obsID  obs_collection dataproduct_type    obs_id   ... parent_obsid dataRights calib_level _selected_
------- -------------- ---------------- ----------- ... ------------ ---------- ----------- ----------
1468834          SWIFT             cube 00032242001 ...      1468834     PUBLIC         2.0      False
1468834          SWIFT             cube 00032242001 ...      1468834     PUBLIC         2.0      False
1468834          SWIFT             cube 00032242001 ...      1468834     PUBLIC         2.0      False
1468834          SWIFT             cube 00032242001 ...      1468834     PUBLIC         2.0      False


In [None]:
download_url = 'https://mast.stsci.edu/api/v0.1/Download/file?'

for row in science_products:     

    # Make file path
    out_path = os.path.join("mastFiles", row['obs_collection'], row['obs_id'])
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_path = os.path.join(out_path, os.path.basename(row['productFilename']))
        
    # Download the data
    payload = {"uri":row['dataURI']}
    resp = requests.get(download_url, params=payload)
    
    # Save to file
    with open(out_path,'wb') as f:
        f.write(resp.content)
        
    # Check for file 
    if not os.path.isfile(out_path):
        print("ERROR: " + out_path + " failed to download.")
    else:
        print("COMPLETE: ", out_path)

COMPLETE:  mastFiles/SWIFT/00032242001/sw00032242001u_ex.img.gz
COMPLETE:  mastFiles/SWIFT/00032242001/sw00032242001um2_ex.img.gz
COMPLETE:  mastFiles/SWIFT/00032242001/sw00032242001um2_rw.img.gz


In [None]:
ls mastFiles/SPITZER_SHA/000001AF1000/

# Exercise 2

# Exercise 3