In [1]:
# Import required libraries
import os
import pandas as pd
import rasterio
import xarray as xr
import requests

from dask.distributed import Client
from dask.distributed import wait, progress

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Set environment variables for cloud-optimized-geotiffs efficiency
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='YES'
os.environ['CPL_VSIL_CURL_ALLOWED_EXTENSIONS']='TIF'

In [3]:
def query_cmr_landsat(collection='Landsat_8_OLI_TIRS_C1',tier='T1', path=47, row=27):
    """Query NASA CMR for Collection1, Tier1 Landsat scenes from a specific path and row."""
    
    data = [f'short_name={collection}',
            f'page_size=2000',
            f'attribute[]=string,CollectionCategory,{tier}',
            f'attribute[]=int,WRSPath,{path}',
            f'attribute[]=int,WRSRow,{row}',
           ]

    query = 'https://cmr.earthdata.nasa.gov/search/granules.json?' + '&'.join(data)

    r = requests.get(query, timeout=100)
    print(r.url)
    
    df = pd.DataFrame(r.json()['feed']['entry'])
    
    # Save results to a file
    #print('Saved results to cmr-result.json')
    #with open('cmr-result.json', 'w') as j:
    #    j.write(r.text)
    
    return df

In [4]:
df = query_cmr_landsat()

https://cmr.earthdata.nasa.gov/search/granules.json?short_name=Landsat_8_OLI_TIRS_C1&page_size=2000&attribute%5B%5D=string,CollectionCategory,T1&attribute%5B%5D=int,WRSPath,47&attribute%5B%5D=int,WRSRow,27


In [5]:
df.columns

RangeIndex(start=0, stop=0, step=1)

In [6]:
def make_google_archive(pids, bands):
    """Turn list of product_ids into pandas dataframe for NDVI analysis."""
    
    path =  pids[0].split('_')[2][1:3]
    row =  pids[0].split('_')[2][-2:]
    baseurl = f'https://storage.googleapis.com/gcp-public-data-landsat/LC07/01/0{path}/0{row}'
    
    dates = [pd.to_datetime(x.split('_')[3]) for x in pids]
    df = pd.DataFrame(dict(product_id=pids, date=dates))
    
    for band in bands:
        df[band] = [f'{baseurl}/{x}/{x}_{band}.TIF' for x in pids]
    
    return df

In [7]:
pids = df.title.tolist()
df = make_google_archive(pids, ['B4', 'B5'])

AttributeError: 'DataFrame' object has no attribute 'title'

In [24]:
from matplotlib.pyplot import imshow
import boto3
import rasterio as rio
%matplotlib inline
from rasterio.session import AWSSession

In [3]:
! aws s3 ls s3://usgs-landsat/collection02/


Unable to locate credentials. You can configure credentials by running "aws configure".


In [None]:
! aws s3api get-object --bucket usgs-landsat --key collection02/level-2/standard/oli-tirs/2020/026/027/LC08_L2SP_026027_20200827_20200906_02_T1/LC08_L2SP_026027_20200827_20200906_02_T1_MTL.txt LC08_L2SP_026027_20200827_20200906_02_T1_MTL.txt

In [25]:
aws_session = AWSSession(boto3.Session(), requester_pays=True)

In [26]:
cog = 's3://usgs-landsat/collection02/level-2/standard/oli-tirs/2020/026/027/LC08_L2SP_026027_20200827_20200906_02_T1/LC08_L2SP_026027_20200827_20200906_02_T1_SR_B2.TIF'

In [27]:
with rio.open(cog) as src:
    profile = src.profile
    arr = src.read(1)
imshow(arr)

RasterioIOError: '/vsis3/usgs-landsat/collection02/level-2/standard/oli-tirs/2020/026/027/LC08_L2SP_026027_20200827_20200906_02_T1/LC08_L2SP_026027_20200827_20200906_02_T1_SR_B2.TIF' does not exist in the file system, and is not recognized as a supported dataset name.

In [15]:
s3_scenes = pd.read_csv('https://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', compression='gzip')

HTTPError: HTTP Error 404: Not Found

In [11]:
import pandas as pd
import geopandas as gpd
import folium
import os, shutil
from glob import glob

In [16]:
! aws s3 ls s3://usgs-landsat/collection02/level-2/standard/oli-tirs/2020/026/027/LC08_L2SP_026027_20200827_20200906_02_T1/ --request-payer requester

2020-12-03 20:59:05     117122 LC08_L2SP_026027_20200827_20200906_02_T1_ANG.txt
2020-12-03 20:59:19      14549 LC08_L2SP_026027_20200827_20200906_02_T1_MTL.json
2020-12-03 20:59:06      15424 LC08_L2SP_026027_20200827_20200906_02_T1_MTL.txt
2020-12-03 20:59:06      22726 LC08_L2SP_026027_20200827_20200906_02_T1_MTL.xml
2020-12-03 20:59:06    2670023 LC08_L2SP_026027_20200827_20200906_02_T1_QA_PIXEL.TIF
2020-12-03 20:59:06     230417 LC08_L2SP_026027_20200827_20200906_02_T1_QA_RADSAT.TIF
2020-12-03 20:59:06   74920387 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B1.TIF
2020-12-03 20:59:07   72961917 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B2.TIF
2020-12-03 20:59:08   74604511 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B3.TIF
2020-12-03 20:59:08   73094885 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B4.TIF
2020-12-03 20:59:09   82065367 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B5.TIF
2020-12-03 20:59:10   77232505 LC08_L2SP_026027_20200827_20200906_02_T1_SR_B6.TIF
2020-12-03 20:59