In [1]:
import requests
import math
import aiohttp
import asyncio
import time

In [2]:
CMR_OPS = 'https://cmr.earthdata.nasa.gov/search'

In [3]:
provider = 'LPCLOUD'

In [4]:
url = f'{CMR_OPS}/{"granules"}'

In [5]:
collections = ['C2021957657-LPCLOUD', 'C2021957295-LPCLOUD']
datetime_range = '2021-10-17T00:00:00Z,2021-10-17T23:59:59Z'
page_size = 2000

---

## Parallel Request

In [6]:
def get_cmr_pages_urls(collections, datetime_range, page_size):
    req = requests.get(url,
                       params={
                           'concept_id': collections,
                           'temporal': datetime_range,
                           'page_size': page_size,
                       },
                       headers={
                           'Accept': 'application/json'
                       }
                      )
    hits = int(req.headers['CMR-Hits'])
    n_pages = math.ceil(hits/page_size)
    cmr_pages_urls = [f'{req.url}&page_num={x}'.replace('granules?', 'granules.json?') for x in list(range(1,n_pages+1))]
    return cmr_pages_urls

In [7]:
urls = get_cmr_pages_urls(collections, datetime_range, page_size)
urls

['https://cmr.earthdata.nasa.gov/search/granules.json?concept_id=C2021957657-LPCLOUD&concept_id=C2021957295-LPCLOUD&temporal=2021-10-17T00%3A00%3A00Z%2C2021-10-17T23%3A59%3A59Z&page_size=2000&page_num=1',
 'https://cmr.earthdata.nasa.gov/search/granules.json?concept_id=C2021957657-LPCLOUD&concept_id=C2021957295-LPCLOUD&temporal=2021-10-17T00%3A00%3A00Z%2C2021-10-17T23%3A59%3A59Z&page_size=2000&page_num=2',
 'https://cmr.earthdata.nasa.gov/search/granules.json?concept_id=C2021957657-LPCLOUD&concept_id=C2021957295-LPCLOUD&temporal=2021-10-17T00%3A00%3A00Z%2C2021-10-17T23%3A59%3A59Z&page_size=2000&page_num=3',
 'https://cmr.earthdata.nasa.gov/search/granules.json?concept_id=C2021957657-LPCLOUD&concept_id=C2021957295-LPCLOUD&temporal=2021-10-17T00%3A00%3A00Z%2C2021-10-17T23%3A59%3A59Z&page_size=2000&page_num=4',
 'https://cmr.earthdata.nasa.gov/search/granules.json?concept_id=C2021957657-LPCLOUD&concept_id=C2021957295-LPCLOUD&temporal=2021-10-17T00%3A00%3A00Z%2C2021-10-17T23%3A59%3A59Z&pag

In [8]:
results = []

In [9]:
def get_tasks(session):
    tasks = []
    for l in urls:
        tasks.append(session.get(l))
    return tasks

In [10]:
async def get_url():
    async with aiohttp.ClientSession() as session:
        tasks = get_tasks(session)
        responses = await asyncio.gather(*tasks)
        for response in responses:
            res = await response.json()
            #print(res)
            results.extend([l['href'] for g in res['feed']['entry'] for l in g['links'] if 'https' in l['href'] and '.tif' in l['href']])

In [11]:
start = time.time()

await get_url()

end = time.time()

total_time = end - start
total_time

5.689258813858032

In [12]:
len(results)

139005

In [13]:
results[:25]

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.VZA.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B01.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B06.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B02.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B03.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.SAA.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30