---
title: USGS Landsat Cloud Data - Search and Access
---

In [None]:
#import hvplot.xarray
import rasterio as rio
from rasterio.session import AWSSession
import rioxarray
import boto3
import json
import dask
import glob
import re
from pystac_client import Client

In [None]:
# import hvplot

Seach for LS Scenes using STAC API

In [None]:
stac_url = 'https://landsatlook.usgs.gov/stac-server'

In [None]:
ls_cat = Client.open(stac_url)

In [None]:
#ls_cat

In [None]:
[f'{x.title} --- {x.id}' for x in ls_cat.get_children()]

In [None]:
list(ls_cat.get_all_collections())

In [None]:
bbox = [-120,37,-119,38]

In [None]:
datetime_range = '2019-10-01T00:00:00Z/2020-09-30T23:59:59Z'
datetime_range

In [None]:
search = ls_cat.search(
    collections = ['landsat-c2l3-fsca'],       # fsca
    bbox = bbox,                          # Specified above
    datetime = datetime_range             # Specified above
)

In [None]:
search.matched()

Print first 5 item ids and associated datetime

In [None]:
items = list(search.get_items())
[f'{x.id} --- {x.datetime}' for x in items][:5]

In [None]:
http_links = []

for i in items:
    for a in i.assets.values():
        a_link = json.loads(json.dumps(a.to_dict()))
        if a_link['href'].endswith('.TIF'):
            http_links.append(a_link['href'])
        

In [None]:
len(http_links)

In [None]:
http_links[:5]

In [None]:
s3_links = []

for i in items:
    for a in i.assets.values():
        a_link = json.loads(json.dumps(a.to_dict()))
        if 'alternate' in a_link:
            s3_asset = a_link['alternate']['s3']['href']
            if s3_asset.endswith('.TIF'):
                s3_links.append(s3_asset)

In [None]:
len(s3_links)

In [None]:
s3_links[:1]

In [None]:
# filter for just ground snow tifs
ground_snow_s3_links = [url for url in s3_links if url.endswith('GROUND_SNOW.TIF')]
print(ground_snow_s3_links[:2])

In [None]:
ground_snow_s3_links[9:10]

## Data Access - rioxarray

Set working environment

In [None]:
session = boto3.Session()

In [None]:
rio_env = rio.Env(AWSSession(session, requester_pays=True), 
                  AWS_NO_SIGN_REQUEST='NO',
                  GDAL_DISABLE_READDIR_ON_OPEN='TRUE')
rio_env.__enter__()

In [None]:
s3_url = "s3://usgs-landsat-level-3/collection02/SNOW/2020/CU/003/008/LC08_CU_003008_20200107_20210504_02_SNOW/LC08_CU_003008_20200107_20210504_02_VIEWABLE_SNOW.TIF"


Read S3 asset as xarray datarray 

In [None]:
da = rioxarray.open_rasterio(s3_url, chunks='auto').squeeze('band', drop=True)

In [None]:
da

In [None]:
da.plot()

Plot scene

In [None]:
# da.hvplot(x='x', y='y', crs='epsg:32617', rasterize=True, width=800, height=600, tiles = 'ESRI') # TODO: how to exclude fill values
# da.hvplot(x='x', y='y', crs='epsg:4326', rasterize=True, width=800, height=600)

---

## AWS CLI

List content in bucket

In [None]:
! aws s3 ls s3://usgs-landsat/collection02/level-1/standard/oli-tirs/2021/016/042/LC08_L1TP_016042_20211027_20211104_02_T1/ --request-payer requester

Download content in bucket to local

In [None]:
! aws s3 cp s3://usgs-landsat/collection02/level-1/standard/oli-tirs/2021/016/042/LC08_L1TP_016042_20211027_20211104_02_T1/ . --recursive --request-payer 

In [None]:
! aws s3 ls --request-payer requester s3://usgs-landsat-level-3/collection02/SNOW/2020/CU/003/009/LC08_CU_003009_20200107_20210504_02_SNOW/LC08_CU_003009_20200107_20210504_02_GROUND_SNOW.TIF
