In [1]:
# https://nasa-openscapes.github.io/2021-Cloud-Hackathon/tutorials/02_Data_Discovery_CMR-STAC_API.html

from pystac_client import Client  
from collections import defaultdict    
import json
import geopandas
# import geoviews as gv
from cartopy import crs
# gv.extension('bokeh', 'matplotlib')
import geopandas as gpd

In [16]:
# find hls tiles given a point

def find_hls_tiles(point=False, band=False, limit=False, collections = ['HLSL30.v2.0', 'HLSS30.v2.0']):

    STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'


    catalog = Client.open(f'{STAC_URL}/LPCLOUD/')



    try:
        x, y = point[0], point[1]
    except TypeError:
        print("Point must be in the form of [lat,lon]")
        raise

    point = geopandas.points_from_xy([x],[y])
    point = point[0]



    # JOHN - THIS IS WHERE YOU WOULD ADD IN SEARCH PARAMETERS
    search = catalog.search(
        collections=collections, intersects = point)


    # print(f'{search.matched()} Tiles Found...')


    item_collection = search.get_all_items()

    if limit:
        item_collection = item_collection[:limit]

    if band:
        links = []
        if type(band) == list:
            for i in item_collection:
                for b in band:
                    link = i.assets[b].href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://')
                    # print(link)
                    links.append(link)
        
        else:
            for i in item_collection:
                link = i.assets[band].href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://')
                links.append(link)
    
    else:
        links =[]
        for i in item_collection:
            # print(i.assets)
            for key in i.assets:
                if key.startswith('B'):
                    link = i.assets[key].href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://')
                    # print(link)
                    links.append(link)

    return links

In [17]:
# given a reach ID, find the nodes

import glob
import netCDF4
import os
import numpy as np


data_dir = '/home/confluence/data/mnt/input/sword'





def get_reach_nodes(data_dir, reach_id):

    all_nodes = []

    files = glob.glob(os.path.join(data_dir, '*'))
    print(f'Searching across {len(files)} continents for nodes...')

    for i in files:

        rootgrp = netCDF4.Dataset(i, "r", format="NETCDF4")

        node_ids_indexes = np.where(rootgrp.groups['nodes'].variables['reach_id'][:].data.astype('U') == str(reach_id))

        if len(node_ids_indexes[0])!=0:
            for y in node_ids_indexes[0]:
                node_id = str(rootgrp.groups['nodes'].variables['node_id'][y].data.astype('U'))
                all_nodes.append(node_id)



            # all_nodes.extend(node_ids[0].tolist())

        rootgrp.close()

    print(f'Found {len(set(all_nodes))} nodes...')
    return list(set(all_nodes))





# get_reach_nodes(data_dir,74270100221)

In [18]:
# given a reach ID, find the lat/lon points of all nodes



import glob
import netCDF4
import os
import numpy as np

data_dir = '/home/confluence/data/mnt/input/sword'


def get_reach_node_cords(data_dir, reach_id):

    all_nodes = []

    files = glob.glob(os.path.join(data_dir, '*'))
    print(f'Searching across {len(files)} continents for nodes...')

    for i in files:

        rootgrp = netCDF4.Dataset(i, "r", format="NETCDF4")

        node_ids_indexes = np.where(rootgrp.groups['nodes'].variables['reach_id'][:].data.astype('U') == str(reach_id))

        if len(node_ids_indexes[0])!=0:
            for y in node_ids_indexes[0]:

                lat = str(rootgrp.groups['nodes'].variables['x'][y].data.astype('U'))
                lon = str(rootgrp.groups['nodes'].variables['y'][y].data.astype('U'))
                all_nodes.append([lat,lon])



            # all_nodes.extend(node_ids[0].tolist())

        rootgrp.close()

    print(f'Found {len(all_nodes)} nodes...')
    return all_nodes











In [19]:
# given a reach ID, create download links for any hls tiles that intersect the nodes in the reach


def find_download_links_for_reach_tiles(data_dir, reach_id):
    node_coords = get_reach_node_cords(data_dir,reach_id)
    all_links = []
    for i in node_coords:
        links = find_hls_tiles(i,limit=1)
        all_links.extend(links)

    return set(all_links)

In [20]:

data_dir = '/home/confluence/data/mnt/input/sword'
reach_id = 74270100221

find_download_links_for_reach_tiles(data_dir, reach_id)

Searching across 6 continents for nodes...
Found 42 nodes...


{'s3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B01.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B02.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B03.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B04.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B05.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B06.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B07.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/HLS.L30.T15SYB.2013103T163805.v2.0.B09.tif',
 's3://lp-prod-protected/HLSL30.020/HLS.L30.T15SYB.2013103T163805.v2.0/H