<a href="https://colab.research.google.com/github/catafest/colab_google/blob/master/catafest_060.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import python modules
import sys
import json
import urllib3
import certifi
import requests
from time import sleep
from http.cookiejar import CookieJar
import urllib.request
from urllib.parse import urlencode
import getpass

In [2]:
# Create a urllib PoolManager instance to make requests.
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',ca_certs=certifi.where())
# Set the URL for the GES DISC subset service endpoint
url = 'https://disc.gsfc.nasa.gov/service/subset/jsonwsp'

In [3]:
def get_http_data(request):
    hdrs = {'Content-Type': 'application/json',
            'Accept'      : 'application/json'}
    data = json.dumps(request)
    r = http.request('POST', url, body=data, headers=hdrs)
    response = json.loads(r.data)
    # Check for errors
    if response['type'] == 'jsonwsp/fault' :
        print('API Error: faulty %s request' % response['methodname'])
        sys.exit(1)
    return response

In [4]:
# Define the parameters for the third subset example
product = 'M2TMNXAER_5.12.4'
varNames =['OCSMASS', 'BCSMASS', 'SO4SMASS', 'DUSMASS25', 'SSSMASS25']
# my local area
minlon = 26
maxlon = 27
minlat = 47
maxlat = 48
begTime = '2022-03'
endTime = '2022-04'

In [5]:
# Construct JSON WSP request for API method: subset
subset_request = {
    'methodname': 'subset',
    'type': 'jsonwsp/request',
    'version': '1.0',
    'args': {
        'role'  : 'subset',
        'start' : begTime,
        'end'   : endTime,
        'box'   : [minlon, minlat, maxlon, maxlat],
        'crop'  : True,
#        'mapping': interp,
#        'grid'  : destGrid,
        'data': [{'datasetId': product,
                  'variable' : varNames[0]
                 },
                  {'datasetId': product,
                  'variable' : varNames[1]
                 },
                 {'datasetId': product,
                  'variable' : varNames[2]
                 },
                  {'datasetId': product,
                  'variable' : varNames[3]
                 },
                 {'datasetId': product,
                  'variable' : varNames[4]
                 }]
           }
}

In [6]:
# Submit the subset request to the GES DISC Server
response = get_http_data(subset_request)
# Report the JobID and initial status
myJobId = response['result']['jobId']
print('Job ID: '+myJobId)
print('Job status: '+response['result']['Status'])

Job ID: 66143e3c5a7cfcbade0aa947
Job status: Accepted


In [7]:
# Construct JSON WSP request for API method: GetStatus
status_request = {
    'methodname': 'GetStatus',
    'version': '1.0',
    'type': 'jsonwsp/request',
    'args': {'jobId': myJobId}
}

# Check on the job status after a brief nap
while response['result']['Status'] in ['Accepted', 'Running']:
    sleep(5)
    response = get_http_data(status_request)
    status  = response['result']['Status']
    percent = response['result']['PercentCompleted']
    print ('Job status: %s (%d%c complete)' % (status,percent,'%'))
if response['result']['Status'] == 'Succeeded' :
    print ('Job Finished:  %s' % response['result']['message'])
else :
    print('Job Failed: %s' % response['fault']['code'])
    sys.exit(1)

Job status: Succeeded (100% complete)
Job Finished:  Complete (M2TMNXAER_5.12.4)


In [8]:
# Construct JSON WSP request for API method: GetResult
batchsize = 20
results_request = {
    'methodname': 'GetResult',
    'version': '1.0',
    'type': 'jsonwsp/request',
    'args': {
        'jobId': myJobId,
        'count': batchsize,
        'startIndex': 0
    }
}

# Retrieve the results in JSON in multiple batches
# Initialize variables, then submit the first GetResults request
# Add the results from this batch to the list and increment the count
results = []
count = 0
response = get_http_data(results_request)
count = count + response['result']['itemsPerPage']
results.extend(response['result']['items'])

# Increment the startIndex and keep asking for more results until we have them all
total = response['result']['totalResults']
while count < total :
    results_request['args']['startIndex'] += batchsize
    response = get_http_data(results_request)
    count = count + response['result']['itemsPerPage']
    results.extend(response['result']['items'])

# Check on the bookkeeping
print('Retrieved %d out of %d expected items' % (len(results), total))

Retrieved 3 out of 3 expected items


In [9]:
# Sort the results into documents and URLs

docs = []     # documentation
urls = []     # data URLs
for item in results :
    try:
        if item['start'] and item['end'] : urls.append(item)
    except:
        docs.append(item)
# Print out the documentation links, but do not download them
print('\nDocumentation:')
for item in docs : print(item['label']+': '+item['link'])


Documentation:
README Document: https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2_MONTHLY/M2TMNXAER.5.12.4/doc/MERRA2.README.pdf


In [10]:
# Create a password manager to deal with the 401 response that is returned from
# Earthdata Login

username = input("Provide your EarthData userid: ")
password = getpass.getpass("Provide your EarthData password: ")

password_manager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, "https://urs.earthdata.nasa.gov", username, password)

# Create a cookie jar for storing cookies. This is used to store and return the session cookie #given to use by the data server
cookie_jar = CookieJar()

# Install all the handlers.
opener = urllib.request.build_opener (urllib.request.HTTPBasicAuthHandler (password_manager),urllib.request.HTTPCookieProcessor (cookie_jar))
urllib.request.install_opener(opener)

# Open a request for the data, and download files
print('\nHTTP_services output:')
for item in urls:
    URL = item['link']
    DataRequest = urllib.request.Request(URL)
    DataResponse = urllib.request.urlopen(DataRequest)

# Print out the result
    DataBody = DataResponse.read()

# Save file to working directory
    try:
        file_name = item['label']
        file_ = open(file_name, 'wb')
        file_.write(DataBody)
        file_.close()
        print (file_name, "is downloaded")
    except requests.exceptions.HTTPError as e:
         print(e)

print('Downloading is done and find the downloaded files in your current working directory')

Provide your EarthData userid: catafest
Provide your EarthData password: ··········

HTTP_services output:
MERRA2_400.tavgM_2d_aer_Nx.202203.nc4.nc4 is downloaded
MERRA2_400.tavgM_2d_aer_Nx.202204.nc4.nc4 is downloaded
Downloading is done and find the downloaded files in your current working directory


In [11]:
!pwd
!ls -l --block-size=M

/content
total 1M
-rw-r--r-- 1 root root 1M Apr  8 18:58 MERRA2_400.tavgM_2d_aer_Nx.202203.nc4.nc4
-rw-r--r-- 1 root root 1M Apr  8 18:58 MERRA2_400.tavgM_2d_aer_Nx.202204.nc4.nc4
drwxr-xr-x 1 root root 1M Apr  5 13:21 sample_data
