In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import re
import json
import math
import h5py
import shutil
import zipfile
import requests
import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry.polygon import orient
from shapely.geometry import Polygon, mapping
from xml.etree import ElementTree as ET

In [2]:
def make_granule_list(geojson, short_name='ATL03',
                      start_date='2018-01-01', end_date='2030-01-01', 
                      start_time='00:00:00', end_time='23:59:59',
                      simplify_geometry=False,
                      write_csv=False, return_df=True,
                      list_out_name=None, 
                      geojson_dir='geojsons/'):

    temporal = start_date + 'T' + start_time + 'Z' + ',' + end_date + 'T' + end_time + 'Z'
    cmr_collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json'
    granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'
    base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'

    # Get json response from CMR collection metadata
    params = {'short_name': short_name}
    response = requests.get(cmr_collections_url, params=params)
    results = json.loads(response.content)

    # Find all instances of 'version_id' in metadata and print most recent version number
    versions = [el['version_id'] for el in results['feed']['entry']]
    latest_version = max(versions)
    capability_url = f'https://n5eil02u.ecs.nsidc.org/egi/capabilities/{short_name}.{latest_version}.xml'

    # read in geojson file
    gdf = gpd.read_file(geojson_dir + geojson)
    if simplify_geometry:
        print('Simplifying input region...')
        gdf = gdf.simplify(0.05, preserve_topology=True)
        
    gdf['geometry'] = orient(gdf.geometry.loc[0],sign=1.0)
    polygon = ','.join([str(c) for xy in zip(*gdf.exterior.loc[0].coords.xy) for c in xy])
    search_params = {'short_name': short_name, 'version': latest_version, 'temporal': temporal, 'page_size': 100,
                     'page_num': 1,'polygon': polygon}

    # query for granules 
    granules = []
    headers={'Accept': 'application/json'}
    while True:
        response = requests.get(granule_search_url, params=search_params, headers=headers)
        results = json.loads(response.content)

        if len(results['feed']['entry']) == 0:
            break # Out of results, so break out of loop

        # Collect results and increment page_num
        granules.extend(results['feed']['entry'])
        search_params['page_num'] += 1

    granule_list, idx_unique = np.unique(np.array([g['producer_granule_id'] for g in granules]), return_index=True)
    granules = [g for i,g in enumerate(granules) if i in idx_unique]
    size_mb = [float(result["granule_size"]) for result in granules]
    
    print('Found %i %s version %s granules over %s between %s and %s.' % (len(granule_list), short_name, latest_version, 
                                                                          geojson, start_date, end_date))

    thisdf = pd.DataFrame({'granule': granule_list, 
                           'region': geojson, 
                           'size_mb': size_mb})
    
    print('The total size of all granules is %s GB.' % f'{np.round(thisdf.size_mb.sum()/1024,2):,}')

    if write_csv:
        if not list_out_name:
            list_out_name = 'granule_lists/%s_%s_%s_%s.csv' % (geojson.replace('.geojson', ''), 
                                                               short_name, start_date, end_date)
        thisdf.to_csv(list_out_name, index=False)
        print('Wrote file: %s' % list_out_name)
        
    if return_df:
        return thisdf

In [4]:
make_granule_list('simplified_ANT_1000_East_B-C.geojson', start_date='2021-01-01', end_date='2021-03-01', 
                  simplify_geometry=False, write_csv=True)

Found 54 ATL03 version 006 granules over simplified_ANT_1000_East_B-C.geojson between 2021-01-01 and 2021-03-01.
The total size of all granules is 138.56 GB.
Wrote file: granule_lists/simplified_ANT_1000_East_B-C_ATL03_2021-01-01_2021-03-01.csv


Unnamed: 0,granule,region,size_mb
0,ATL03_20210102075314_01421010_006_01.h5,simplified_ANT_1000_East_B-C.geojson,5915.852291
1,ATL03_20210102204058_01501012_006_01.h5,simplified_ANT_1000_East_B-C.geojson,1209.682523
2,ATL03_20210103072735_01571010_006_01.h5,simplified_ANT_1000_East_B-C.geojson,6079.276546
3,ATL03_20210103201518_01651012_006_01.h5,simplified_ANT_1000_East_B-C.geojson,1434.215286
4,ATL03_20210106074454_02031010_006_01.h5,simplified_ANT_1000_East_B-C.geojson,5749.245593
5,ATL03_20210106203237_02111012_006_01.h5,simplified_ANT_1000_East_B-C.geojson,1493.66564
6,ATL03_20210107071914_02181010_006_01.h5,simplified_ANT_1000_East_B-C.geojson,6011.741141
7,ATL03_20210107200658_02261012_006_01.h5,simplified_ANT_1000_East_B-C.geojson,1536.959898
8,ATL03_20210110073636_02641010_006_01.h5,simplified_ANT_1000_East_B-C.geojson,4792.984506
9,ATL03_20210110202420_02721012_006_01.h5,simplified_ANT_1000_East_B-C.geojson,1546.87166
