# PO.DAAC Web Services Example

Notebook version of the example of the PO.DACC Web Services worfklow:

https://podaac.jpl.nasa.gov/ws

> This example python script shows an entire subsetting workflow using the PO.DAAC Web Services, exercising several of the services: datasets are searched (Dataset Search); variables in the found datasets are identified (Dataset Variables); granules meeting search criteria are found (Granule Search); and the identified granules are subsetted (Granule Subset). Before running this script, email_address variable need to be changed to a valid email address to receive the result. 

## Install and load dependencies

`feedparser` is the only dependence that is not part of the standard library. It is available on [PyPi](https://pypi.python.org/pypi/feedparser) and can be installed using `pip`.

    pip install feedparser --user

In [1]:
import feedparser
import httplib
import json
import os
import time
import urllib
from zipfile import ZipFile

## Search qualifiers

In [2]:
start_time = '2014-06-02T05:31:00Z'
stop_time = '2014-06-02T11:45:30Z'
bounding_box = '-175,-23,-100,23' #something like the tropical pacific

## Search for available datasets

In [3]:
cws_ds_search = 'https://podaac.jpl.nasa.gov/ws/search/dataset?'
#following are the attributes that define the search
ds_search_args = { 'processLevel' : '2', 'satellite' : 'AQUARIUS_SAC-D' }
search_url = cws_ds_search + urllib.urlencode(ds_search_args)
datasets = feedparser.parse(search_url)

In [5]:
datasets

{'bozo': 0,
 'encoding': u'utf-8',
 'entries': [{'content': [{'base': u'https://podaac.jpl.nasa.gov/ws/search/dataset?satellite=AQUARIUS_SAC-D&processLevel=2',
     'language': None,
     'type': u'text/plain',
     'value': u'The version 4.0 Aquarius CAP Level 2 product contains the third release of the AQUARIUS/SAC-D orbital/swath data based on the Combined Active Passive (CAP) algorithm. CAP is a P.I. produced data set developed and provided by the JPL Climate Oceans and Solid Earth group (S. Yueh).  This Level 2 data set contains sea surface salinity (SSS), wind speed and wind direction data derived from 3 different radiometers and the onboard scatterometer. The CAP algorithm simultaneously retrieves the salinity, wind speed and direction by minimizing the sum of squared differences between model and observations. The main improvement of CAP V4.0 is calibration of the rain roughness correction geophysical model function to HYCOM SSS adjusted by the Rain Impact Model (RIM) to accoun

In [6]:
[(d.podaac_datasetid, d.podaac_shortname) for d in datasets.entries]

[(u'PODAAC-AQR40-2TOCS', u'AQUARIUS_L2_SSS_CAP_V4'),
 (u'PODAAC-AQR50-2IOCS', u'AQUARIUS_L2_SSS_CAL_V5'),
 (u'PODAAC-AQR50-2JOCS', u'AQUARIUS_L2_SSS_QL_V5'),
 (u'PODAAC-AQR50-2SOCS', u'AQUARIUS_L2_SSS_V5')]

## Find what are the variables in the last of the 4 datasets

In [7]:
search_url = 'https://podaac.jpl.nasa.gov/ws/dataset/variables?datasetId=' + datasets.entries[3].podaac_datasetid
search_url

u'https://podaac.jpl.nasa.gov/ws/dataset/variables?datasetId=PODAAC-AQR50-2SOCS'

In [8]:
search_urlobj = urllib.urlopen(search_url)
variables = json.loads(search_urlobj.read())

In [9]:
variables

{u'footprint': {u's1': u'0:*,0:0',
  u's2': u'0:*,*:*',
  u'strategy': u'periodic'},
 u'image': {u'ppd': 4, u'res': 8},
 u'imgVariables': [{u'id': u'Aquarius Data/SSS',
   u'max': u'40',
   u'min': u'30',
   u'palette': u'palette_AQUARIUS_SSS',
   u'title': u'Sea Surface Salinity',
   u'units': u'PSU'},
  {u'id': u'Aquarius Data/density',
   u'max': u'1030',
   u'min': u'1012',
   u'palette': u'paletteMedspirationIndexed',
   u'title': u'Derived Sea Surface Density',
   u'units': u'kg/m^3'},
  {u'id': u'Aquarius Data/SSS_unc_ran',
   u'max': u'.5',
   u'min': u'0',
   u'palette': u'palette_AQUARIUS_SSS',
   u'title': u'Sea Surface Salinity uncertainty (random)',
   u'units': u'PSU'},
  {u'id': u'Aquarius Data/SSS_unc_sys',
   u'max': u'2',
   u'min': u'0',
   u'palette': u'palette_AQUARIUS_SSS',
   u'title': u'Sea Surface Salinity uncertainty (systematic))',
   u'units': u'PSU'},
  {u'id': u'Aquarius Data/anc_SSS_hycom',
   u'max': u'40',
   u'min': u'30',
   u'palette': u'palette_AQUA

## Search for granules in the dataset

In [11]:
cws_gran_search = 'https://podaac.jpl.nasa.gov/ws/search/granule?'
gran_search_args = {'datasetId': d.podaac_datasetid, 'startTime': start_time,
                    'endTime': stop_time, 'itemsPerPage': 350 }
search_url = cws_gran_search + urllib.urlencode(gran_search_args)
granules = feedparser.parse(search_url)

In [12]:
granules

{'bozo': 0,
 'encoding': u'utf-8',
 'entries': [{'dcterms_date': u'2014-06-02T11:41:01.337000Z/2014-06-02T13:17:58.938000Z',
   'dcterms_identifier': u'PODAAC-AQR50-2SOCS:Q2014153114100.L2_SCI_V5.0',
   'gml_envelope': u'',
   'gml_lowercorner': u'-180 -90',
   'gml_uppercorner': u'180 90',
   'guidislink': True,
   'id': u'PODAAC-AQR50-2SOCS:Q2014153114100.L2_SCI_V5.0',
   'link': u'PODAAC-AQR50-2SOCS:Q2014153114100.L2_SCI_V5.0',
   'links': [{'href': u'http://podaac.jpl.nasa.gov/ws/search/granule?full=true&granuleName=Q2014153114100.L2_SCI_V5.0&datasetId=PODAAC-AQR50-2SOCS',
     'rel': u'enclosure',
     'title': u'PO.DAAC Metadata',
     'type': u'application/atom+xml'},
    {'href': u'http://podaac.jpl.nasa.gov/ws/metadata/granule?granuleName=Q2014153114100.L2_SCI_V5.0&datasetId=PODAAC-AQR50-2SOCS&format=iso',
     'rel': u'enclosure',
     'title': u'ISO-19115 Metadata',
     'type': u'text/xml'},
    {'href': u'http://podaac.jpl.nasa.gov/ws/metadata/granule?granuleName=Q20141531

In [13]:
[(g.title, g.time_start, g.time_end) for g in granules.entries]

[(u'Q2014153114100.L2_SCI_V5.0',
  u'2014-06-02T11:41:01.337000Z',
  u'2014-06-02T13:17:58.938000Z'),
 (u'Q2014153100300.L2_SCI_V5.0',
  u'2014-06-02T10:03:00.375000Z',
  u'2014-06-02T11:40:59.897000Z'),
 (u'Q2014153082500.L2_SCI_V5.0',
  u'2014-06-02T08:25:00.854000Z',
  u'2014-06-02T10:02:58.935000Z'),
 (u'Q2014153064700.L2_SCI_V5.0',
  u'2014-06-02T06:47:01.332000Z',
  u'2014-06-02T08:24:59.414000Z'),
 (u'Q2014153050900.L2_SCI_V5.0',
  u'2014-06-02T05:09:00.371000Z',
  u'2014-06-02T06:46:59.892000Z')]

In [14]:
granules_list = [g.title.encode(encoding='ascii') for g in granules.entries]
granules_list

['Q2014153114100.L2_SCI_V5.0',
 'Q2014153100300.L2_SCI_V5.0',
 'Q2014153082500.L2_SCI_V5.0',
 'Q2014153064700.L2_SCI_V5.0',
 'Q2014153050900.L2_SCI_V5.0']

## Construct the request dictionary

In [23]:
request = {'email': '',
           'query': [{'datasetId': d.podaac_datasetid,
                      'compact': 'False',
                      'bbox': bounding_box,
                      'variables' : variables['variables'],
                      'granuleIds' : granules_list }] }
request

{'email': '',
 'query': [{'bbox': '-175,-23,-100,23',
   'compact': 'False',
   'datasetId': u'PODAAC-AQR50-2SOCS',
   'granuleIds': ['Q2014153114100.L2_SCI_V5.0',
    'Q2014153100300.L2_SCI_V5.0',
    'Q2014153082500.L2_SCI_V5.0',
    'Q2014153064700.L2_SCI_V5.0',
    'Q2014153050900.L2_SCI_V5.0'],
   'variables': [u'/Aquarius Data/EIA_SSS_sens',
    u'/Aquarius Data/EIA_err',
    u'/Aquarius Data/IU_coupling_SSS_sens',
    u'/Aquarius Data/IU_coupling_err',
    u'/Aquarius Data/Kpc_HH_ant',
    u'/Aquarius Data/Kpc_HH_toa',
    u'/Aquarius Data/Kpc_HV_ant',
    u'/Aquarius Data/Kpc_HV_toa',
    u'/Aquarius Data/Kpc_VH_ant',
    u'/Aquarius Data/Kpc_VH_toa',
    u'/Aquarius Data/Kpc_VV_ant',
    u'/Aquarius Data/Kpc_VV_toa',
    u'/Aquarius Data/Kpc_total',
    u'/Aquarius Data/NEDT_3_SSS_sens',
    u'/Aquarius Data/NEDT_3_err',
    u'/Aquarius Data/NEDT_H_SSS_sens',
    u'/Aquarius Data/NEDT_H_err',
    u'/Aquarius Data/NEDT_V_SSS_sens',
    u'/Aquarius Data/NEDT_V_err',
    u'/Aquar

## Submit POST request and get token for status queries

In [16]:
params = json.dumps(request, separators=(',', ':'), sort_keys=True)
params

'{"email":"finch@jpl.nasa.gov","query":[{"bbox":"-175,-23,-100,23","compact":"False","datasetId":"PODAAC-AQR50-2SOCS","granuleIds":["Q2014153114100.L2_SCI_V5.0","Q2014153100300.L2_SCI_V5.0","Q2014153082500.L2_SCI_V5.0","Q2014153064700.L2_SCI_V5.0","Q2014153050900.L2_SCI_V5.0"],"variables":["/Aquarius Data/EIA_SSS_sens","/Aquarius Data/EIA_err","/Aquarius Data/IU_coupling_SSS_sens","/Aquarius Data/IU_coupling_err","/Aquarius Data/Kpc_HH_ant","/Aquarius Data/Kpc_HH_toa","/Aquarius Data/Kpc_HV_ant","/Aquarius Data/Kpc_HV_toa","/Aquarius Data/Kpc_VH_ant","/Aquarius Data/Kpc_VH_toa","/Aquarius Data/Kpc_VV_ant","/Aquarius Data/Kpc_VV_toa","/Aquarius Data/Kpc_total","/Aquarius Data/NEDT_3_SSS_sens","/Aquarius Data/NEDT_3_err","/Aquarius Data/NEDT_H_SSS_sens","/Aquarius Data/NEDT_H_err","/Aquarius Data/NEDT_V_SSS_sens","/Aquarius Data/NEDT_V_err","/Aquarius Data/RFI_level_SSS_sens","/Aquarius Data/RFI_level_err","/Aquarius Data/SSS","/Aquarius Data/SSS_matchup","/Aquarius Data/SSS_nolc","/Aqua

In [17]:
params_url = urllib.urlencode({'query': params})
params_url

'query=%7B%22email%22%3A%22finch%40jpl.nasa.gov%22%2C%22query%22%3A%5B%7B%22bbox%22%3A%22-175%2C-23%2C-100%2C23%22%2C%22compact%22%3A%22False%22%2C%22datasetId%22%3A%22PODAAC-AQR50-2SOCS%22%2C%22granuleIds%22%3A%5B%22Q2014153114100.L2_SCI_V5.0%22%2C%22Q2014153100300.L2_SCI_V5.0%22%2C%22Q2014153082500.L2_SCI_V5.0%22%2C%22Q2014153064700.L2_SCI_V5.0%22%2C%22Q2014153050900.L2_SCI_V5.0%22%5D%2C%22variables%22%3A%5B%22%2FAquarius+Data%2FEIA_SSS_sens%22%2C%22%2FAquarius+Data%2FEIA_err%22%2C%22%2FAquarius+Data%2FIU_coupling_SSS_sens%22%2C%22%2FAquarius+Data%2FIU_coupling_err%22%2C%22%2FAquarius+Data%2FKpc_HH_ant%22%2C%22%2FAquarius+Data%2FKpc_HH_toa%22%2C%22%2FAquarius+Data%2FKpc_HV_ant%22%2C%22%2FAquarius+Data%2FKpc_HV_toa%22%2C%22%2FAquarius+Data%2FKpc_VH_ant%22%2C%22%2FAquarius+Data%2FKpc_VH_toa%22%2C%22%2FAquarius+Data%2FKpc_VV_ant%22%2C%22%2FAquarius+Data%2FKpc_VV_toa%22%2C%22%2FAquarius+Data%2FKpc_total%22%2C%22%2FAquarius+Data%2FNEDT_3_SSS_sens%22%2C%22%2FAquarius+Data%2FNEDT_3_err%22%2

In [18]:
headers = {'Content-type': 'application/x-www-form-urlencoded',
          'Accept': '*'}
connection = httplib.HTTPSConnection('podaac.jpl.nasa.gov')
connection.request('POST', '/ws/subset/granule?request=submit',
                  params_url, headers)
response = connection.getresponse()
data = response.read()
result = json.loads(data)
token = result['token']
connection.close()
token

u'db2d36d6-fc94-4a0c-9be6-9b8c0904943a'

## Track the progress of the request

In [19]:
while True:
    connection = httplib.HTTPSConnection('podaac.jpl.nasa.gov')
    connection.request('GET', '/ws/subset/status?token=' + token, params)
    response = connection.getresponse()
    data = response.read()
    connection.close()
    result = json.loads(data)
    status = result['status']
    
    if status != 'done':
        rstring = '{}: %{}/%{} granules'.format(result['status'],
                                                result['granuleCompleted'],
                                                result['totalNumGranule'])
        time.sleep(3)
        print rstring
    else:
        rstring = '{}: at {}'.format(result['status'], result['resultURLs'])
        print rstring
        break

processing: %0/%5 granules
processing: %0/%5 granules
processing: %0/%5 granules
processing: %0/%5 granules
processing: %0/%5 granules
processing: %0/%5 granules
processing: %0/%5 granules
compressing: %5/%5 granules
done: at [u'https://podaac-tools.jpl.nasa.gov/l2ss-download/db2d36d6-fc94-4a0c-9be6-9b8c0904943a/6a00eb13-4495-4147-90e9-d7edcd73e262.zip']


In [20]:
filename = 'results.zip'
urllib.urlretrieve(result['resultURLs'][0], filename)

('results.zip', <httplib.HTTPMessage instance at 0x109c3dbd8>)

In [21]:
with ZipFile(filename, "r") as zip_ref:
    zip_ref.extractall('result')

In [22]:
os.listdir('result')

['subsetted-Q2014153050900.L2_SCI_V5.0.hdf',
 'subsetted-Q2014153064700.L2_SCI_V5.0.hdf',
 'subsetted-Q2014153082500.L2_SCI_V5.0.hdf',
 'subsetted-Q2014153100300.L2_SCI_V5.0.hdf',
 'subsetted-Q2014153114100.L2_SCI_V5.0.hdf']