# Intake-STAC + NASA CMR

NASA's Common Metadata Repository (CMR) now has a STAC endpoint https://github.com/nasa/cmr-stac. In theory we can use Intake-STAC to browse NASA's entire data catalog! 

**This example is experimental as the cmr-stac service is very new.**

In [None]:
import intake
import os
import hvplot.xarray
import pandas as pd

In [None]:
# testing remote reading of netcdf data
import fsspec
import aiohttp
import netrc
import xarray as xr

In [None]:
# Search not currently working:
# https://github.com/sat-utils/sat-search/issues/106
#import satsearch
#URL = 'https://cmr.earthdata.nasa.gov/cmr-stac/ASF'
#results = satsearch.Search.search(url=URL,
#                                  collections=['C1595422627-ASF']
#                                  )
#items = results.items()
#print('%s items' % len(items))

In [None]:
# Can up number of returned results
limit = 500
cat = intake.open_stac_catalog(f'https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections?limit={limit}')
col_info = pd.DataFrame(cat.metadata['collections'])
print(len(col_info))
col_info.head()

In [None]:
# Opening item collection directly also not working
# https://github.com/sat-utils/sat-stac/issues/65
#from satstac import ItemCollection
#col = 'C1595422627-ASF'
#limit=10
#url = f'https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections/{col}/items?limit={limit}'
#print(url)
#items = ItemCollection.open(url)
#print(len(items))

In [None]:
# Open an item directly (works but need to know item id in advance...)
item = intake.open_stac_item('https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections/C1595422627-ASF/items/G1636018550-ASF')

In [None]:
list(item)

In [None]:
# thumbnails don't always Nasa EarthData require authentication
item.browse.plot.thumbnail()

### Remote NetCDF + Authentication

In [None]:
%%time

# let's open this netcdf file directly (reads entire file into memory I think, not efficient or pretty, but works

(username, account, password) = netrc.netrc().authenticators("urs.earthdata.nasa.gov")
fs = fsspec.filesystem('http', client_kwargs={'auth': aiohttp.BasicAuth(username, password)})

with fs.open(item.data.urlpath) as f:
    da = xr.open_dataset(f, group='/science/grids/data', engine='h5netcdf', chunks={})

da

In [None]:
da['amplitude'].data

In [None]:
%%time 

# OR just download the file and work with it locally:
localPath = item._stac_obj.download('data')
da = xr.open_dataset(localPath, group='/science/grids/data', engine='h5netcdf', chunks={})
da

In [None]:
# Note that it is faster to download the entire netcdf file and open it up than to read remotely (lots of network requests) 

### Remote Cloud-optimized geotiff

In [None]:
item = intake.open_stac_item('https://cmr.earthdata.nasa.gov/cmr-stac/NSIDC_ECS/collections/C1908075185-NSIDC_ECS/items/G1921160945-NSIDC_ECS')
#print(item.yaml())
print(list(item))

In [None]:
print(item['0'].yaml())

In [None]:
%%time

# Assests with RasterIOSource Driver require these GDAL environment variables
# and a properly configured .netrc with NASA EarthData credentials
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR'
os.environ['GDAL_HTTP_COOKIEFILE']='.urs_cookies' 
os.environ['GDAL_HTTP_COOKIEJAR']='.urs_cookies'

da = item['0'].to_dask()
da

In [None]:
# NOTE that reading the cloud optimized geotiff remotely is very dast! only metadata is read to initialize