## Query EMIT API and download

In [1]:
import pystac_client
import boto3
import requests
from pathlib import Path

In [2]:
aws_session = False

try:
    import subprocess
    cmd = "ec2-metadata --availability-zone"
    result = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    output = result.communicate()[0].decode("utf-8")
    region = output.strip().split()[1]
    if region.startswith('us-west-2'):
        aws_session = True
except:
    pass

In [3]:
aws_session

True

## AWS S3 set up 

**Direct download from source lp-prod-protected bucket only works in an cloud compute instance running in AWS us-west-2 (not tested)**

In [4]:
if aws_session:
    # This requires you to have an entry in .netrc for the NASA Earthdata Login
    s3credentials = requests.get('https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials').json()

    # AWS Credentials for Source

    source_credentials = {
        "aws_access_key_id": s3credentials['accessKeyId'],
        "aws_secret_access_key": s3credentials['secretAccessKey'],
        "aws_session_token": s3credentials['sessionToken'],
        #"region_name": "us-west-2",  # Source bucket region
    }

    # Initialize S3 clients for source and destination
    session = boto3.session.Session(**source_credentials)
    source_s3 = session.client("s3")

    # Source and destination bucket details
    source_bucket = "lp-prod-protected"

    # Destination S3 session - requires aws credentials set
    dest_session = boto3.session.Session(region_name="ap-southeast-2")
    dest_s3 = dest_session.client("s3")
    destination_bucket = "frontiersi-hyperspectral"

## Query for AUS data

In [5]:
# STAC collection 
# https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/collections/EMITL2ARFL_001

# LPCLOUD catalog
catalog_url = 'https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/'

In [6]:
# Define search parameters

search_params = {
    "collections": ["EMITL2ARFL_001"],  # Specify the collection
    "bbox": (112, -44., 154, -9.),      # Define the bounding box (swLon,swLat,neLon,neLat)
    "datetime": "2024-01-01T00:00:00Z/2024-12-31T23:59:59Z",  # Date range
    "query": {
        "eo:cloud_cover": {"lt": 10}  # Query parameter: cloud cover less than 10%
    }
}

In [7]:
# Query STAC catalog

catalog = pystac_client.Client.open(catalog_url)

# Run the STAC query
query = catalog.search(**search_params)

# List items returned by the query
items = list(query.items())

In [8]:
len(items)

2562

`data` assets are located in private bucket 

`browse` and `metadata` are in public locations

In [9]:
# E.g.

items[0].assets

{'browse': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 'thumbnail_0': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 'thumbnail_1': <Asset href=s3://lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 '001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.nc>,
 '001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_MASK_001_20240103T005606_2400301_004': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-prote

### Download data

In [None]:
def download_emit(url, local_filename):
    local_file = Path(local_filename)
    # Get the directory path
    local_path = local_file.parent
    # Create the directory and intermediate directories if needed
    local_path.mkdir(parents=True, exist_ok=True)
    
    try:
        # Perform the GET request (requests will use .netrc for credentials)
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Save the file locally
            with open(local_file, "wb") as file:
                file.write(response.content)
            print(f"File downloaded successfully: {local_file}")
        else:
            print(f"Failed to download file. Status code: {response.status_code}")
            print(f"Response text: {response.text}")

    except Exception as e:
        print(f"An error occurred: {e}")


In [None]:
# Download data

for item in items:
    for key, asset in item.assets.items():
        if asset.roles==['data']:
            if aws_session:
                key = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/','')
                copy_source = {"Bucket": source_bucket, "Key": key}
                dest_s3.copy_object(Bucket=destination_bucket, Key=key, CopySource=copy_source)
            else:
                local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/', 'data/nz/')
                download_emit(asset.href, local_file)
        if asset.roles==['browse'] and not aws_session:
            local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/', 'data/nz/')
            download_emit(asset.href, local_file)
