In [6]:
import os
import requests
import earthaccess
import pandas as pd
import datetime as dt
import geopandas
from shapely.geometry import MultiPolygon, Polygon, box
from shapely.geometry.polygon import orient

In [2]:
geojson_filename = "11SPS.geojson"

In [3]:
doi = '10.5067/EMIT/EMITL2ARFL.001'# EMIT L2A Reflectance

# CMR API base url
cmrurl='https://cmr.earthdata.nasa.gov/search/' 

doisearch = cmrurl + 'collections.json?doi=' + doi
concept_id = requests.get(doisearch).json()['feed']['entry'][0]['id']
print(concept_id)

C2408750690-LPCLOUD


In [15]:
# Temporal Bound - Year, month, day. Hour, minutes, and seconds (ZULU) can also be included 
start_date = dt.datetime(2024, 5, 1)
end_date = dt.datetime(2024, 5, 30, 23, 23, 59)  

# CMR formatted start and end times
dt_format = '%Y-%m-%dT%H:%M:%SZ'
temporal_str = start_date.strftime(dt_format) + ',' + end_date.strftime(dt_format)
print(temporal_str)

2024-05-01T00:00:00Z,2024-05-30T23:23:59Z


In [16]:
polygon = geopandas.read_file(geojson_filename)
polygon

Unnamed: 0,Name,Land,geometry
0,11SPS,True,"POLYGON Z ((-115.92423 33.43491 0, -114.74363 ..."


In [17]:
polygon.loc[0, "geometry"] = orient(polygon.geometry[0], sign=1.0)
polygon

Unnamed: 0,Name,Land,geometry
0,11SPS,True,"POLYGON Z ((-115.92423 33.43491 0, -115.93616 ..."


In [18]:
geojson = {"shapefile": (geojson_filename, polygon.geometry.to_json(), "application/geo+json")}
geojson

{'shapefile': ('11SPS.geojson',
  '{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-115.92423373599996, 33.43490918800006, 0.0], [-115.93615876899997, 32.444635394000045, 0.0], [-114.76862412, 32.429359161000036, 0.0], [-114.74363157999994, 33.419046707000064, 0.0], [-115.92423373599996, 33.43490918800006, 0.0]]]}, "bbox": [-115.93615876899997, 32.429359161000036, -114.74363157999994, 33.43490918800006]}], "bbox": [-115.93615876899997, 32.429359161000036, -114.74363157999994, 33.43490918800006]}',
  'application/geo+json')}

In [19]:
# Search using a Polygon
# polygon = geopandas.read_file(geojson_filename)
# geojson = {"shapefile": (geojson_filename, polygon.geometry.to_json(), "application/geo+json")}

page_num = 1
page_size = 2000 # CMR page size limit

granule_arr = []

while True:
    
     # defining parameters
    cmr_param = {
        "collection_concept_id": concept_id, 
        "page_size": page_size,
        "page_num": page_num,
        "temporal": temporal_str,
        "simplify-shapefile": 'true' # this is needed to bypass 5000 coordinates limit of CMR
    }

    granulesearch = cmrurl + 'granules.json'
    response = requests.post(granulesearch, data=cmr_param, files=geojson)
    print(response)
    print(response.json())
    granules = response.json()['feed']['entry']
       
    if granules:
        for g in granules:
            granule_urls = ''
            granule_poly = ''
                       
            # read granule title and cloud cover
            granule_name = g['title']
            cloud_cover = g['cloud_cover']
    
            # reading bounding geometries
            if 'polygons' in g:
                polygons= g['polygons']
                multipolygons = []
                for poly in polygons:
                    i=iter(poly[0].split (" "))
                    ltln = list(map(" ".join,zip(i,i)))
                    multipolygons.append(Polygon([[float(p.split(" ")[1]), float(p.split(" ")[0])] for p in ltln]))
                granule_poly = MultiPolygon(multipolygons)
            
            # Get https URLs to .nc files and exclude .dmrpp files
            granule_urls = [x['href'] for x in g['links'] if 'https' in x['href'] and '.nc' in x['href'] and '.dmrpp' not in x['href']]
            # Add to list
            granule_arr.append([granule_urls, cloud_cover, granule_poly])
                           
        page_num += 1
    else: 
        break
 
print(granule_arr)

<Response [200]>
{'feed': {'updated': '2024-08-23T01:28:13.025Z', 'id': 'https://cmr.earthdata.nasa.gov:443/search/granules.json', 'title': 'ECHO granule metadata', 'entry': [{'time_start': '2024-05-21T23:27:16.000Z', 'cloud_cover': '30', 'updated': '2024-05-23T23:03:47.000Z', 'dataset_id': 'EMIT L2A Estimated Surface Reflectance and Uncertainty and Masks 60 m V001', 'data_center': 'LPCLOUD', 'title': 'EMIT_L2A_RFL_001_20240521T232716_2414216_002', 'coordinate_system': 'GEODETIC', 'day_night_flag': 'DAY', 'time_end': '2024-05-21T23:27:28.000Z', 'id': 'G3029286621-LPCLOUD', 'original_format': 'UMM_JSON', 'granule_size': '3579.7861518859863', 'browse_flag': True, 'polygons': [['33.5352974 -115.9862595 32.9145088 -116.7228775 32.3046684 -116.208931 32.925457 -115.4723129 33.5352974 -115.9862595']], 'collection_concept_id': 'C2408750690-LPCLOUD', 'online_access_flag': True, 'links': [{'rel': 'http://esipfed.org/ns/fedsearch/1.1/data#', 'title': 'Download EMIT_L2A_RFL_001_20240521T232716_24

In [20]:
# creating a pandas dataframe
cmr_results_df = pd.DataFrame(granule_arr, columns=["asset_url", "cloud_cover", "granule_poly"])
# Drop granules with empty geometry - if any exist
cmr_results_df = cmr_results_df[cmr_results_df['granule_poly'] != '']
# Expand so each row contains a single url 
cmr_results_df = cmr_results_df.explode('asset_url')
# Name each asset based on filename
cmr_results_df.insert(0,'asset_name', cmr_results_df.asset_url.str.split('/',n=-1).str.get(-1))

cmr_results_df

Unnamed: 0,asset_name,asset_url,cloud_cover,granule_poly
0,EMIT_L2A_RFL_001_20240521T232716_2414216_002.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,30,"MULTIPOLYGON (((-115.9862595 33.5352974, -116...."
0,EMIT_L2A_RFLUNCERT_001_20240521T232716_2414216...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,30,"MULTIPOLYGON (((-115.9862595 33.5352974, -116...."
0,EMIT_L2A_MASK_001_20240521T232716_2414216_002.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,30,"MULTIPOLYGON (((-115.9862595 33.5352974, -116...."
1,EMIT_L2A_RFL_001_20240521T232728_2414216_003.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,20,"MULTIPOLYGON (((-115.3462372 34.0547523, -116...."
1,EMIT_L2A_RFLUNCERT_001_20240521T232728_2414216...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,20,"MULTIPOLYGON (((-115.3462372 34.0547523, -116...."
1,EMIT_L2A_MASK_001_20240521T232728_2414216_003.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,20,"MULTIPOLYGON (((-115.3462372 34.0547523, -116...."
2,EMIT_L2A_RFL_001_20240521T232740_2414216_004.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,8,"MULTIPOLYGON (((-114.7049332 34.5700073, -115...."
2,EMIT_L2A_RFLUNCERT_001_20240521T232740_2414216...,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,8,"MULTIPOLYGON (((-114.7049332 34.5700073, -115...."
2,EMIT_L2A_MASK_001_20240521T232740_2414216_004.nc,https://data.lpdaac.earthdatacloud.nasa.gov/lp...,8,"MULTIPOLYGON (((-114.7049332 34.5700073, -115...."
