In [1]:
%pip install --quiet "h5coro>=0.0.7"

Note: you may need to restart the kernel to use updated packages.


In [1]:
%matplotlib widget

import numpy as np
import os
import matplotlib.pyplot as plt
import earthaccess
import xarray as xr
import h5py
from xarray.backends.api import open_datatree
from h5coro import h5coro, s3driver
import geopandas as gpd
import fsspec
import s3fs

In [2]:
# Authenticate for accessing NASA data (MODIS)
auth = earthaccess.login(strategy="netrc")

# If we are not authenticated
if not auth.authenticated:
    # Ask for credentials and persist them in a .netrc file
    auth.login(strategy="interactive", persist=True)

In [3]:
bbox = (-122, 39.3, -120, 40) # west, south, east, north
start_dt = '2020-08-01'
end_dt = '2020-10-31'

# Open ICESat-2 ATL08 data

In [42]:
# Log in using earthaccess (this manages your Earthdata login session)
auth = earthaccess.login(strategy="netrc")

# Get S3 credentials for accessing the NSIDC data
s3_creds = auth.get_s3_credentials(daac="NSIDC")

# Extract the credentials
aws_access_key_id = s3_creds['accessKeyId']
aws_secret_access_key = s3_creds['secretAccessKey']
aws_session_token = s3_creds['sessionToken']

# Initialize S3FileSystem with the obtained credentials
fs = s3fs.S3FileSystem(key=aws_access_key_id, 
                       secret=aws_secret_access_key, 
                       token=aws_session_token)

In [25]:
# Gather all files from search location and time
results = earthaccess.search_data(
    concept_id='C2613553260-NSIDC_CPRD',
    bounding_box=bbox,
    temporal=(start_dt, end_dt),
    cloud_hosted=True
)
print (f'{len(results)} TOTAL granules')

18 TOTAL granules


In [26]:
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/18 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/18 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/18 [00:00<?, ?it/s]

In [91]:
variables = ['gt1r/land_segments/canopy/h_max_canopy', 'gt1r/land_segments/longitude', 'gt1r/land_segments/latitude']

In [92]:
%%timeit
# Initialize the H5Coro object with the S3 driver and credentials
h5obj = h5coro.H5Coro(paths[0].details['name'], h5coro.s3driver.S3Driver, 
                      errorChecking=True, verbose=False, 
                      credentials=s3_creds, multiProcess=False)

# Read the datasets from the ICESat-2 file
data = h5obj.readDatasets(variables, block=True, enableAttributes=False)

xr_cover_coro = xr.DataArray(data=data['gt1r/land_segments/canopy/h_max_canopy'],
                        coords={'lat':(['x'],data['gt1r/land_segments/latitude']),
                                'lon':(['x'],data['gt1r/land_segments/longitude'])},
                        dims = ['x'])

431 ms ± 57.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Open GEDI data 

In [4]:
# Gather all files from search location and time
results = earthaccess.search_data(
    concept_id='C2142776747-LPCLOUD',
    bounding_box=bbox,
    temporal=(start_dt, end_dt),
    cloud_hosted=True
)
print (f'{len(results)} TOTAL granules')

30 TOTAL granules


In [5]:
paths = earthaccess.open(results)

QUEUEING TASKS | :   0%|          | 0/30 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/30 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/30 [00:00<?, ?it/s]

In [13]:
# Get S3 credentials for accessing the LPDAAC data
s3_creds = auth.get_s3_credentials(daac="LPDAAC")

# Extract the credentials
aws_access_key_id = s3_creds['accessKeyId']
aws_secret_access_key = s3_creds['secretAccessKey']
aws_session_token = s3_creds['sessionToken']

# Initialize S3FileSystem with the obtained credentials
fs = s3fs.S3FileSystem(key=aws_access_key_id, 
                       secret=aws_secret_access_key, 
                       token=aws_session_token)

In [72]:
%%timeit

# Read GEDI using h5coro + xarray
h5obj = h5coro.H5Coro(paths[0].details['name'], h5coro.s3driver.S3Driver, 
                      errorChecking=True, verbose=False, 
                      credentials=s3_creds, multiProcess=False)
variables = ['BEAM0001/geolocation/lat_lowestmode',
             'BEAM0001/geolocation/lon_lowestmode',
             'BEAM0001/cover']
data = h5obj.readDatasets(variables, block=True, enableAttributes=False)
# for variable in data:
#     print(f'{variable}: {data[variable][0:10]}')

# Convert to dataArray
xr_cover_coro = xr.DataArray(data=data['BEAM0001/cover'],
                        coords={'lat':(['x'],data['BEAM0001/geolocation/lat_lowestmode']),
                                'lon':(['x'],data['BEAM0001/geolocation/lon_lowestmode'])},
                        dims = ['x'])

824 ms ± 55.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
