## Query EMIT API and download

In [None]:
import pystac_client
import boto3
import requests
from pathlib import Path


## AWS S3 set up 

**Direct download from source lp-prod-protected bucket only works in an cloud compute instance running in AWS us-west-2 (not tested)**

In [None]:
aws_session = False

if aws_session:
    # This requires you to have an entry in .netrc for the NASA Earthdata Login
    s3credentials = requests.get('https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials').json()

    # AWS Credentials for Source

    source_credentials = {
        "aws_access_key_id": s3credentials['accessKeyId'],
        "aws_secret_access_key": s3credentials['secretAccessKey'],
        "aws_session_token": s3credentials['sessionToken'],
        #"region_name": "us-west-2",  # Source bucket region
    }

    # Initialize S3 clients for source and destination
    session = boto3.session.Session(**source_credentials)
    source_s3 = session.client("s3")

    # Source and destination bucket details
    source_bucket = "lp-prod-protected"

In [None]:
upload_to_aws = False

if upload_to_aws:
    # AWS Credentials for destination

    destination_credentials = {
        "aws_access_key_id": "",
        "aws_secret_access_key": "",
        "region_name": "ap-southeast-2",  # Destination bucket region
    }

    # Initialize S3 clients for destination

    destination_s3 = boto3.client("s3", **destination_credentials)

    # Destination bucket details
    destination_bucket = "frontiersi-hyperspectral"

## Query for NZ data

In [None]:
# STAC collection 
# https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/collections/EMITL2ARFL_001

# LPCLOUD catalog
catalog_url = 'https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/'

In [None]:
# Define search parameters

search_params = {
    "collections": ["EMITL2ARFL_001"],  # Specify the collection
    "bbox": (166., -48., 179., -34.),      # Define the bounding box (swLon,swLat,neLon,neLat)
    #"datetime": "2023-01-01T00:00:00Z/2023-12-31T23:59:59Z",  # Date range
    "query": {
        "eo:cloud_cover": {"lt": 10}  # Query parameter: cloud cover less than 10%
    }
}

In [None]:
# Query STAC catalog

catalog = pystac_client.Client.open(catalog_url )

# Run the STAC query
query = catalog.search(**search_params)

# List items returned by the query
items = list(query.items())

In [None]:
len(items)

`data` assets are located in private bucket 

`browse` and `metadata` are in public locations

In [None]:
# E.g.

items[0].assets

### Download data

In [None]:
def download_emit(url, local_filename):
    local_file = Path(local_filename)
    # Get the directory path
    local_path = local_file.parent
    # Create the directory and intermediate directories if needed
    local_path.mkdir(parents=True, exist_ok=True)
    
    try:
        # Perform the GET request (requests will use .netrc for credentials)
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Save the file locally
            with open(local_file, "wb") as file:
                file.write(response.content)
            print(f"File downloaded successfully: {local_file}")
        else:
            print(f"Failed to download file. Status code: {response.status_code}")
            print(f"Response text: {response.text}")

    except Exception as e:
        print(f"An error occurred: {e}")


In [None]:
# Download data

for item in items:
    for key, asset in item.assets.items():
        if asset.roles==['data']:
            local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/', 'data/nz/')
            download_emit(asset.href, local_file)
        if asset.roles==['browse']:
            local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/', 'data/nz/')
            download_emit(asset.href, local_file)
    

### Direct Access (in progress)

Refer to Alex L's scripts

In [None]:
def load_emit_granule(granule: str, token: str) -> xr.Dataset:
    """
    Load an EMIT granule from the NASA LPDAAC S3 bucket.
    """

    http_url = asset.href
    fs = HTTPFileSystem(headers={"Authorization": f"bearer {token}"})

    return emit_xarray(fs.open(http_url))