In [1]:
import requests
import math
import time
from pprint import pprint

In [2]:
CMR_OPS = 'https://cmr.earthdata.nasa.gov/search'

In [3]:
provider = 'LPCLOUD'

In [4]:
url = f'{CMR_OPS}/{"granules"}'

In [12]:
collections = ['C2021957657-LPCLOUD', 'C2021957295-LPCLOUD']
datetime_range = '2021-10-17T00:00:00Z,2021-10-17T23:59:59Z'
page_size = 2000

---

## CMR API Request: Get information from multiple pages 

CMR Paging Details - <https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#paging-details>

In [13]:
def get_page_total(collections, datetime_range, page_size):
    hits = requests.get(url, 
                        params={
                            'concept_id': collections,
                            'temporal': datetime_range,
                            'page_size': page_size,
                            },
                        headers={
                            'Accept': 'application/json'
                            }
                       ).headers['CMR-Hits']
    return math.ceil(int(hits)/page_size)

In [14]:
page_numbers = list(range(1, get_page_total(collections, datetime_range, page_size)+1))
page_numbers

[1, 2, 3, 4, 5]

In [15]:
data_urls = []

In [16]:
start = time.time()
for n in page_numbers:
    print(f'Page: {n}')
    response = requests.get(url,
                            params={
                                'concept_id': collections,
                                'temporal': datetime_range,
                                'page_size': page_size,
                                'page_num': n
                            },
                            headers={
                                'Accept': 'application/json'
                            }
                           )
    print(f'Page {n} Resonse Code: {response.status_code}')
    
    granules = response.json()['feed']['entry']
    print(f'Number of Granules: {len(granules)}')
    
    for g in granules:
        data_urls.extend([x['href'] for x in g['links'] if 'https' in x['href'] and '.tif' in x['href']])
end = time.time()
print(f'Total time: {end-start}')

Page: 1
Page 1 Resonse Code: 200
Number of Granules: 2000
Page: 2
Page 2 Resonse Code: 200
Number of Granules: 2000
Page: 3
Page 3 Resonse Code: 200
Number of Granules: 2000
Page: 4
Page 4 Resonse Code: 200
Number of Granules: 2000
Page: 5
Page 5 Resonse Code: 200
Number of Granules: 121
Total time: 9.523819208145142


In [17]:
len(data_urls)

139005

In [18]:
data_urls[:50]

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.VZA.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B01.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B06.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B02.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.B03.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T59ULA.2021289T235621.v2.0/HLS.S30.T59ULA.2021289T235621.v2.0.SAA.tif',
 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30