## Query EMIT API and download

In [1]:
import pystac_client
import boto3
import requests
from pathlib import Path

In [2]:
aws_session = False

try:
    import subprocess
    cmd = "ec2-metadata --availability-zone"
    result = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    output = result.communicate()[0].decode("utf-8")
    region = output.strip().split()[1]
    if region.startswith('us-west-2'):
        aws_session = True
except:
    pass

In [3]:
aws_session

True

## AWS S3 set up 

**Direct download from source lp-prod-protected bucket only works in an cloud compute instance running in AWS us-west-2 (not tested)**

In [4]:
if aws_session:
    # This requires you to have an entry in .netrc for the NASA Earthdata Login
    s3credentials = requests.get('https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials').json()

    # AWS Credentials for Source

    source_credentials = {
        "aws_access_key_id": s3credentials['accessKeyId'],
        "aws_secret_access_key": s3credentials['secretAccessKey'],
        "aws_session_token": s3credentials['sessionToken'],
        #"region_name": "us-west-2",  # Source bucket region
    }

    # Initialize S3 clients for source and destination
    session = boto3.session.Session(**source_credentials)
    source_s3 = session.client("s3")

    # Source and destination bucket details
    source_bucket = "lp-prod-protected"

    # Destination S3 session - requires aws credentials set
    dest_session = boto3.session.Session(region_name="ap-southeast-2")
    dest_s3 = dest_session.client("s3")
    destination_bucket = "frontiersi-hyperspectral"

## Query for AUS data

In [5]:
# STAC collection 
# https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/collections/EMITL2ARFL_001

# LPCLOUD catalog
catalog_url = 'https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/'

In [6]:
# Define search parameters

search_params = {
    "collections": ["EMITL2ARFL_001"],  # Specify the collection
    "bbox": (112, -44., 154, -9.),      # Define the bounding box (swLon,swLat,neLon,neLat)
    "datetime": "2024-01-01T00:00:00Z/2024-12-31T23:59:59Z",  # Date range
    "query": {
        "eo:cloud_cover": {"lt": 10}  # Query parameter: cloud cover less than 10%
    }
}

In [7]:
# Query STAC catalog

catalog = pystac_client.Client.open(catalog_url)

# Run the STAC query
query = catalog.search(**search_params)

# List items returned by the query
items = list(query.items())

In [8]:
len(items)

2562

`data` assets are located in private bucket 

`browse` and `metadata` are in public locations

In [9]:
# E.g.

items[0].assets

{'browse': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 'thumbnail_0': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 'thumbnail_1': <Asset href=s3://lp-prod-public/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.png>,
 '001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/EMITL2ARFL.001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_RFL_001_20240103T005606_2400301_004.nc>,
 '001/EMIT_L2A_RFL_001_20240103T005606_2400301_004/EMIT_L2A_MASK_001_20240103T005606_2400301_004': <Asset href=https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-prote

### Download data

In [None]:
def download_emit(url, local_filename):
    local_file = Path(local_filename)
    # Get the directory path
    local_path = local_file.parent
    # Create the directory and intermediate directories if needed
    local_path.mkdir(parents=True, exist_ok=True)
    
    try:
        # Perform the GET request (requests will use .netrc for credentials)
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Save the file locally
            with open(local_file, "wb") as file:
                file.write(response.content)
            print(f"File downloaded successfully: {local_file}")
        else:
            print(f"Failed to download file. Status code: {response.status_code}")
            print(f"Response text: {response.text}")

    except Exception as e:
        print(f"An error occurred: {e}")


In [None]:
# Download data

for item in items:
    for key, asset in item.assets.items():
        if asset.roles==['data']:
            if aws_session:
                key = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/','')
                copy_source = {"Bucket": source_bucket, "Key": key}
                dest_s3.copy_object(Bucket=destination_bucket, Key=key, CopySource=copy_source)
            else:
                local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/', 'data/nz/')
                download_emit(asset.href, local_file)
        if asset.roles==['browse'] and not aws_session:
            local_file = asset.href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-public/', 'data/nz/')
            download_emit(asset.href, local_file)


## Intersect with TERN FC Sites

In [None]:
import pandas as pd
import numpy as np
import datetime
import pystac_client
from datetime import datetime, timedelta

# Read the CSV file into a DataFrame
in_df = pd.read_csv('s2_fc/star_transects.csv')

# Get the relevant columns: FID, ref_x, ref_y, and obs_time
df = in_df[['FID', 'ref_x', 'ref_y', 'obs_time']]
# Sort the df by date descending
df = df.sort_values(by='obs_time', ascending=False)



# LPCLOUD catalog
catalog_url = 'https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/'

# Initialize dictionary to store FID and corresponding STAC items
fid_to_items = {}

# Create a STAC client
catalog = pystac_client.Client.open(catalog_url)

# Process each row in the dataframe
for _, row in df.iterrows():
    fid = row['FID']
    lon = row['ref_x']
    lat = row['ref_y']
    
    # Parse observation time and create date range (±7 days)
    try:
        obs_date = datetime.strptime(row['obs_time'], '%Y-%m-%dT%H:%M:%S')
        print(f"Processing FID {fid} with observation date {obs_date}:", end=' ')
        start_date = (obs_date - timedelta(days=14)).strftime('%Y-%m-%dT00:00:00Z')
        end_date = (obs_date + timedelta(days=14)).strftime('%Y-%m-%dT23:59:59Z')
        date_range = f"{start_date}/{end_date}"
    except (ValueError, TypeError):
        # Skip this row if the date is invalid
        print(f"Invalid date format for FID {fid}: {row['obs_time']}")
        continue
    
    # Create a 0.01 degree buffer around the point
    buffer = 0.005  # 0.01 degrees total (±0.005 on each side)
    bbox = (lon - buffer, lat - buffer, lon + buffer, lat + buffer)
    
    # Define search parameters for this specific location and time
    search_params = {
        "collections": ["EMITL2ARFL_001"],
        "bbox": bbox,  # Small bounding box around the point
        "datetime": date_range,  # ±7 days from observation date
        "query": {
            "eo:cloud_cover": {"lt": 10}  # Cloud cover less than 10%
        }
    }
    
    # Query STAC catalog for this location and time
    try:
        query = catalog.search(**search_params)
        items_for_fid = list(query.items())
        
        # Store results with FID
        if items_for_fid:
            print(f"Found {len(items_for_fid)} items")
            fid_to_items[fid] = items_for_fid
        else:
            print(f"No items found")
    except Exception as e:
        print(f"Error querying: {e}")

# Print summary
print(f"\nFound EMIT data for {len(fid_to_items)} out of {len(df)} locations")



Processing FID star_transects.fid-35735e87_1942a663648_-6f3b with observation date 2024-05-02 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-71dd with observation date 2024-05-02 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74c1 with observation date 2024-05-01 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74c2 with observation date 2024-05-01 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74c3 with observation date 2024-05-01 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74c4 with observation date 2024-05-01 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74c0 with observation date 2024-05-01 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a663648_-74bb with observation date 2024-04-30 00:00:00: No items found
Processing FID star_transects.fid-35735e87_1942a

In [None]:
assets = []

# Example: print details of the first few matches
for fid, items_list in list(fid_to_items.items()):
    for i, item in enumerate(items_list[:2]):  # Show max 2 items per FID
        print(f"FID {fid} : {item.id}, Date: {item.datetime.strftime('%Y-%m-%d')}")
        # Print the asset hrefs
        for key, asset in item.assets.items():
            if asset.roles == ['data']:
                assets.append(asset.href)

# Save the asset URLs to a CSV file
assets_df = pd.DataFrame(assets, columns=['asset_url'])
assets_df.to_csv('s2_fc/emit_asset_urls.csv', index=False)
# Save the FID to items mapping to a CSV file
fid_items_df = pd.DataFrame(fid_to_items.items(), columns=['FID', 'items'])
fid_items_df.to_csv('s2_fc/fid_to_items.csv', index=False)


FID star_transects.fid-35735e87_1942a663648_-72fa : EMIT_L2A_RFL_001_20231022T055300_2329504_048, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-72fb : EMIT_L2A_RFL_001_20231022T055248_2329504_047, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-72fb : EMIT_L2A_RFL_001_20231022T055300_2329504_048, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-6fd1 : EMIT_L2A_RFL_001_20231022T055248_2329504_047, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-7332 : EMIT_L2A_RFL_001_20231022T055248_2329504_047, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-7067 : EMIT_L2A_RFL_001_20231022T055237_2329504_046, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-7068 : EMIT_L2A_RFL_001_20231022T055237_2329504_046, Date: 2023-10-22
FID star_transects.fid-35735e87_1942a663648_-754d : EMIT_L2A_RFL_001_20231030T024137_2330302_025, Date: 2023-10-30
FID star_transects.fid-35735e87_1942a663648_-754d : EMIT_L2A_RFL_001_20231103T01

In [26]:
import json
from shapely.geometry import box, mapping
import geopandas as gpd

# Create a list to store all GeoJSON features
features = []

# Iterate through all FIDs and their items
for fid, items_list in fid_to_items.items():
    for item in items_list:
        # Extract bounding box from the item
        if hasattr(item, 'bbox') and item.bbox:
            bbox = item.bbox
            
            # Create a GeoJSON feature with properties
            feature = {
                "type": "Feature",
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [[
                        [bbox[0], bbox[1]],  # bottom left
                        [bbox[2], bbox[1]],  # bottom right
                        [bbox[2], bbox[3]],  # top right
                        [bbox[0], bbox[3]],  # top left
                        [bbox[0], bbox[1]]   # close the polygon
                    ]]
                },
                "properties": {
                    "fid": fid,
                    "item_id": item.id,
                    "datetime": item.datetime.strftime('%Y-%m-%d'),
                    "cloud_cover": item.properties.get("eo:cloud_cover", None),
                    "acquisition_date": item.properties.get("datetime", None)
                }
            }
            
            features.append(feature)

# Create a feature collection
feature_collection = {
    "type": "FeatureCollection",
    "features": features
}

# Save to file
with open('s2_fc/emit_coverage.geojson', 'w') as f:
    json.dump(feature_collection, f)



print(f"Created GeoJSON with {len(features)} features")


Created GeoJSON with 17 features
