In [12]:
import requests

def query_neon_site(site_code='SJER'):
    """
    Queries NEON API for a specific site and returns detailed metadata.

    Parameters:
    -----------
    site_code : str
        NEON site code (e.g. 'SJER')

    Returns:
    --------
    dict
        A dictionary of selected metadata fields.
    """
    SERVER = 'http://data.neonscience.org/api/v0/'
    url = SERVER + 'sites/' + site_code

    response = requests.get(url)
    if response.status_code != 200:
        raise ValueError(f"Failed to get site metadata: {response.status_code}")

    site_json = response.json()
    site_data = site_json['data']

    return {
        'siteCode': site_data.get('siteCode'),
        'siteName': site_data.get('siteName'),
        'siteDescription': site_data.get('siteDescription'),
        'siteType': site_data.get('siteType'),
        'siteLatitude': site_data.get('siteLatitude'),
        'siteLongitude': site_data.get('siteLongitude'),
        'stateCode': site_data.get('stateCode'),
        'stateName': site_data.get('stateName'),
        'domainCode': site_data.get('domainCode'),
        'domainName': site_data.get('domainName')
    }

# Example usage:
metadata = query_neon_site('SJER')
print(metadata)


{'siteCode': 'SJER', 'siteName': 'San Joaquin Experimental Range NEON', 'siteDescription': 'San Joaquin Experimental Range NEON', 'siteType': 'CORE', 'siteLatitude': 37.10878, 'siteLongitude': -119.73228, 'stateCode': 'CA', 'stateName': 'California', 'domainCode': 'D17', 'domainName': 'Pacific Southwest'}


In [1]:
import requests
import pandas as pd
import time
import os
import json

def build_subplot_centroids(pa_matrix, cache_file="subplot_centroids_cache.json", pause_sec=0.5):
    """
    Build a DataFrame of subplot centroids using NEON's API.

    Args:
        pa_matrix: presence/absence matrix with 'namedLocation' and 'subplotID'
        cache_file: path to cache JSON file
        pause_sec: time to wait between API calls

    Returns:
        DataFrame with columns:
        ['namedLocation', 'subplotID', 'full_named_location', 'latitude', 'longitude', 'coordinateUncertainty']
    """
    # Step 1: Construct full namedLocation
    df = pa_matrix[["plotID", "subplotID", "eventID"]].copy()
    df["namedLocation"] = pa_matrix["plotID"].str.replace("_", ".basePlot.div", regex=False)
    df["full_named_location"] = df["namedLocation"] + "." + df["subplotID"]

    unique_locs = df["full_named_location"].unique()

    # Load or initialize cache
    if os.path.exists(cache_file):
        with open(cache_file, "r") as f:
            cache = json.load(f)
    else:
        cache = {}

    results = []

    for loc in unique_locs:
        if loc not in cache:
            url = f"https://data.neonscience.org/api/v0/locations/{loc}"
            r = requests.get(url)
            if r.status_code == 200:
                data = r.json()["data"]
                cache[loc] = {
                    "latitude": float(data["locationDecimalLatitude"]),
                    "longitude": float(data["locationDecimalLongitude"]),
                    "coordinateUncertainty": float(data["coordinateUncertainty"]),
                }
                time.sleep(pause_sec)  # avoid hammering API
            else:
                print(f"Failed to retrieve {loc}")
                continue

        latlon = cache[loc]
        results.append({
            "full_named_location": loc,
            "namedLocation": loc.rsplit(".", 1)[0],
            "subplotID": loc.rsplit(".", 1)[1],
            "latitude": latlon["latitude"],
            "longitude": latlon["longitude"],
            "coordinateUncertainty": latlon["coordinateUncertainty"],
        })

    # Save updated cache
    with open(cache_file, "w") as f:
        json.dump(cache, f, indent=2)

    return pd.DataFrame(results)


In [2]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import rasterio
import numpy as np
from shapely.geometry import box
import geopandas as gpd

def plot_presence_grid(pa_matrix, centroid_df, species_id, dem_path=None, bbox=None):
    """
    Plots presence/absence as square grid cells over hillshaded DEM.

    Args:
        pa_matrix: DataFrame with presence/absence data
        centroid_df: output of build_subplot_centroids
        species_id: taxonID to plot
        dem_path: optional path to GeoTIFF for hillshade
        bbox: optional extent (lon_min, lon_max, lat_min, lat_max)
    """

    # Join presence/absence with lat/lon
    geo_df = pa_matrix.merge(
        centroid_df,
        on=["subplotID"],
        how="left"
    ).dropna(subset=["latitude", "longitude", species_id])

    # Assign scale based on subplotID
    def get_size(subplot):
        if "_1_" in subplot:
            return 1
        elif "_10_" in subplot:
            return 10
        elif "_100" in subplot:
            return 100
        return None

    geo_df["subplot_size"] = geo_df["subplotID"].apply(get_size)

    # Build square polygons from centroid and size
    def make_square(row):
        s = row["subplot_size"]
        d = np.sqrt(s) / 111111  # meters to degrees approx.
        return box(
            row["longitude"] - d / 2,
            row["latitude"] - d / 2,
            row["longitude"] + d / 2,
            row["latitude"] + d / 2
        )

    geo_df["geometry"] = geo_df.apply(make_square, axis=1)
    gdf = gpd.GeoDataFrame(geo_df, geometry="geometry", crs="EPSG:4326")

    # Load hillshade from DEM
    if dem_path:
        with rasterio.open(dem_path) as src:
            dem = src.read(1, masked=True)
            bounds = src.bounds
            transform = src.transform

            # Hillshade
            x, y = np.gradient(dem)
            slope = np.pi/2 - np.arctan(np.hypot(x, y))
            aspect = np.arctan2(-x, y)
            az, alt = np.radians(315), np.radians(45)
            shaded = np.sin(alt) * np.sin(slope) + np.cos(alt) * np.cos(slope) * np.cos(az - aspect)
            hillshade = (shaded - shaded.min()) / (shaded.max() - shaded.min())

            extent = [bounds.left, bounds.right, bounds.bottom, bounds.top]

    # Plot
    proj = ccrs.PlateCarree()
    fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": proj})

    if dem_path:
        ax.imshow(hillshade, extent=extent, origin='upper', cmap='Greys', transform=proj)

    # Plot presence/absence
    present = gdf[gdf[species_id] == 1]
    absent = gdf[gdf[species_id] == 0]

    present.plot(ax=ax, facecolor="forestgreen", edgecolor="black", linewidth=0.2, label="Present", alpha=0.7, zorder=3)
    absent.plot(ax=ax, facecolor="lightgray", edgecolor="black", linewidth=0.2, label="Absent", alpha=0.5, zorder=2)

    if bbox:
        ax.set_extent(bbox, crs=proj)

    ax.set_title(f"Species Presence Grid: {species_id}")
    ax.legend()
    plt.tight_layout()
    plt.show()


In [14]:
stacked_10m_df = pd.read_csv('../data/neon_sjer_site/artifacts/stacked_10m.csv')
pa_matrix = pd.read_csv('../data/neon_sjer_site/artifacts/pa_df.csv')

In [5]:
pa_matrix

Unnamed: 0,plotID,subplotID,eventID,2PLANT,ACAM,ACMI2,AECA,AGGR,AGHE2,AGOSE,...,TRSU3,TRVA,TRWI3,URTICA,VERON,VULPI,VUMI,VUMI/VUMY,VUMIC,VUMY
0,SJER_001,31_100,SJER.1.2017,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,SJER_001,31_100,SJER.1.2018,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,SJER_001,31_100,SJER.1.2019,1,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
3,SJER_001,31_100,SJER.1.2021,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,SJER_001,31_100,SJER.1.2023,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547,SJER_047,41_100,SJER.1.2017,1,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
548,SJER_047,41_100,SJER.1.2018,0,0,0,0,0,0,0,...,0,0,1,0,0,1,0,0,0,0
549,SJER_047,41_100,SJER.1.2019,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
550,SJER_047,41_100,SJER.1.2021,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0


In [6]:
dem_path = "../data/neon_sjer_site/topo_map/output_be.tif"  # update to your DEM filepath


In [16]:
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box
import pandas as pd

coords_df = stacked_10m_df[['subplotID', 'decimalLatitude', 'decimalLongitude']].copy()

coords_df.rename({'decimalLatitude': 'latitude', 'decimalLongitude': 'longitude'}, axis=1, inplace=True)

# Merge
plot_df = pd.merge(coords_df, pa_matrix, on='subplotID')


In [None]:

# Create 10x10m polygons around each lat/lon point
def make_grid(row, size=10):
    half = size / 2 / 111320  # Convert meters to degrees
    return box(
        row['longitude'] - half, row['latitude'] - half,
        row['longitude'] + half, row['latitude'] + half
    )

# Build GeoDataFrame explicitly
polygons = plot_df.apply(make_grid, axis=1).tolist()
gdf = gpd.GeoDataFrame(plot_df, geometry=polygons, crs="EPSG:4326")

# Plot
fig, ax = plt.subplots(figsize=(8, 8))
gdf.plot(ax=ax, column='species_code', cmap='Greens', edgecolor='black', legend=True)
ax.set_title("Species Presence/Absence in 10x10m Grids")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
plt.tight_layout()
plt.show()


In [7]:
# Step 1: Build cached subplot locations
centroids_df = build_subplot_centroids(pa_matrix)

# Step 2: Plot species grid
plot_presence_grid(
    pa_matrix,
    centroid_df=centroids_df,
    species_id="QUWI2",
    dem_path="../data/NASADEM_SJER.tif",
    bbox=[-119.75, -119.6, 37.0, 37.2]
)


Failed to retrieve SJER.basePlot.div001.31_100
Failed to retrieve SJER.basePlot.div001.32_100
Failed to retrieve SJER.basePlot.div001.40_100
Failed to retrieve SJER.basePlot.div001.41_100
Failed to retrieve SJER.basePlot.div002.31_100
Failed to retrieve SJER.basePlot.div002.32_100
Failed to retrieve SJER.basePlot.div002.40_100
Failed to retrieve SJER.basePlot.div002.41_100
Failed to retrieve SJER.basePlot.div003.31_100
Failed to retrieve SJER.basePlot.div003.32_100
Failed to retrieve SJER.basePlot.div003.40_100
Failed to retrieve SJER.basePlot.div003.41_100
Failed to retrieve SJER.basePlot.div004.31_100
Failed to retrieve SJER.basePlot.div004.32_100
Failed to retrieve SJER.basePlot.div004.40_100
Failed to retrieve SJER.basePlot.div004.41_100
Failed to retrieve SJER.basePlot.div005.31_100
Failed to retrieve SJER.basePlot.div005.32_100
Failed to retrieve SJER.basePlot.div005.40_100
Failed to retrieve SJER.basePlot.div005.41_100
Failed to retrieve SJER.basePlot.div006.31_100
Failed to ret

KeyError: 'subplotID'