In [None]:
"""
ECOSTRESS Daily Evapotranspiration (ET) Download for Iowa (2019-2023)

This notebook downloads ECOSTRESS L3 JET (Evapotranspiration) data for Iowa
using NASA's earthaccess library.

PURPOSE:
--------
Download ECOSTRESS actual ET observations to combine with NLDAS modeled ET
for irrigation detection using the Pierrat et al. (2025) Human ET methodology:
    Human ET = ECOSTRESS ET - NLDAS ET_baseline
    
Where ECOSTRESS ET > NLDAS ET, the excess represents human water inputs
(irrigation) beyond natural water availability.

Dataset: ECOSTRESS Evapotranspiration PT-JPL L3 Daily 70 m V002
Short Name: ECO_L3T_JET
DOI: 10.5067/ECOSTRESS/ECO_L3T_JET.002
Key Variable: ETdaily (Daily integrated evapotranspiration)
Units: kg/m2/day (equivalent to mm/day)
Spatial Resolution: ~70 m

ECOSTRESS Characteristics:
- Irregular temporal sampling (not daily global coverage)
- Multiple overpass times capture diurnal variation
- Higher spatial resolution (~70m) than NLDAS (~12km)
- Cloud masking required

Requirements:
- earthaccess library
- NASA Earthdata account (~/.netrc)

Adapted for SIF-ET-Irrigation Analysis Project - Iowa 2019-2023
Following methodology from Pierrat et al. (2025)
"""

In [2]:
import sys
import subprocess

# Install to your home directory
target_dir = '/home/jcoldiron/.local/lib/python3.12/site-packages'
subprocess.check_call([
    sys.executable, '-m', 'pip', 'install', 
    '--target=' + target_dir,
    'earthaccess'
])

# Add to Python path
if target_dir not in sys.path:
    sys.path.insert(0, target_dir)

print(f"Installed to: {target_dir}")
print("Now try: import earthaccess")

# Install earthaccess if needed
# pip install earthaccess

import earthaccess
print(f"earthaccess version: {earthaccess.__version__}")

Collecting earthaccess
  Using cached earthaccess-0.16.0-py3-none-any.whl.metadata (9.8 kB)
Collecting fsspec>=2025.2 (from earthaccess)
  Using cached fsspec-2026.2.0-py3-none-any.whl.metadata (10 kB)
Collecting importlib-resources>=6.3.2 (from earthaccess)
  Using cached importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)
Collecting multimethod>=1.8 (from earthaccess)
  Using cached multimethod-2.0.2-py3-none-any.whl.metadata (8.4 kB)
Collecting pqdm>=0.1 (from earthaccess)
  Using cached pqdm-0.2.0-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting python-cmr>=0.10.0 (from earthaccess)
  Using cached python_cmr-0.13.0-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.26 (from earthaccess)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting s3fs>=2025.2 (from earthaccess)
  Using cached s3fs-2026.2.0-py3-none-any.whl.metadata (1.2 kB)
Collecting tenacity>=8.0 (from earthaccess)
  Using cached tenacity-9.1.4-py3-none-any.whl.metadata (1.2 kB)
Co

[0m

Installed to: /home/jcoldiron/.local/lib/python3.12/site-packages
Now try: import earthaccess
earthaccess version: 0.16.0


In [8]:
# Install earthaccess if needed
# pip install earthaccess rioxarray geopandas

import earthaccess
#import xarray as xr
#import rioxarray as rxr
#import geopandas as gpd
#import pandas as pd
from pathlib import Path
#import numpy as np
from datetime import datetime
#import matplotlib.pyplot as plt

print(f"earthaccess version: {earthaccess.__version__}")

earthaccess version: 0.16.0


In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Authenticate with NASA Earthdata
auth = earthaccess.login()

# Temporal range - Full 5-year period
start_year = 2019
end_year = 2023
temporal_range = (f"{start_year}-01-01", f"{end_year}-12-31")

# Iowa bounding box (west, south, east, north)
iowa_bbox = (-96.64, 40.38, -90.14, 43.50)

# Project paths
project_root = Path("../..").resolve()
local_path = project_root / "data" / "raw" / "ECOSTRESS_JET"
local_path.mkdir(parents=True, exist_ok=True)

print(f"Project root: {project_root}")
print(f"Download path: {local_path}")
print(f"Temporal range: {temporal_range}")
print(f"Bounding box (Iowa): {iowa_bbox}")

# =============================================================================
# SEARCH ECOSTRESS DATA
# =============================================================================

print(f"\nSearching ECOSTRESS L3 JET data for Iowa {start_year}-{end_year}...")

results = earthaccess.search_data(
    short_name='ECO_L3T_JET',
    version='002',
    bounding_box=iowa_bbox,
    temporal=temporal_range
)

print(f"Found {len(results)} granules")
print(f"\nNote: ECOSTRESS has irregular temporal sampling.")
print(f"Each granule contains multiple files (ETdaily, ETinst, cloud masks, etc.)")

if results:
    # Show first result structure
    print(f"\nFirst granule info:")
    print(f"  Date: {results[0]['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime']}")
    print(f"  Files in granule: {len(results[0]['umm']['RelatedUrls'])}")
    
    # Count by year
    dates = [r['umm']['TemporalExtent']['RangeDateTime']['BeginningDateTime'] for r in results]
    years = [datetime.fromisoformat(d.replace('Z', '+00:00')).year for d in dates]
    year_counts = pd.Series(years).value_counts().sort_index()
    print(f"\nGranules per year:")
    for year, count in year_counts.items():
        print(f"  {year}: {count}")

Enter your Earthdata Login username:  jcoldiron
Enter your Earthdata password:  ········


Project root: /home/jcoldiron/iowa-corn-project/code/SIF-Analysis
Download path: /home/jcoldiron/iowa-corn-project/code/SIF-Analysis/data/raw/ECOSTRESS_JET
Temporal range: ('2019-01-01', '2023-12-31')
Bounding box (Iowa): (-96.64, 40.38, -90.14, 43.5)

Searching ECOSTRESS L3 JET data for Iowa 2019-2023...


In [None]:
# =============================================================================
# DOWNLOAD ECOSTRESS GRANULES
# =============================================================================

print(f"\nDownloading {len(results)} granules...")
print(f"This may take a while. Each granule has multiple files.")

downloaded_files = earthaccess.download(
    results, 
    local_path=str(local_path)
)

print(f"\nDownloaded {len(downloaded_files)} total files")
print(f"Location: {local_path}")

# Organize by file type
etdaily_files = [f for f in downloaded_files if 'ETdaily.tif' in str(f)]
cloud_files = [f for f in downloaded_files if 'cloud.tif' in str(f)]

print(f"\nFile breakdown:")
print(f"  ETdaily files: {len(etdaily_files)}")
print(f"  Cloud mask files: {len(cloud_files)}")
print(f"  Other files: {len(downloaded_files) - len(etdaily_files) - len(cloud_files)}")

In [None]:
"""
Process and Clip ECOSTRESS ET Data to Iowa Boundary

This cell:
1. Loads each ECOSTRESS ETdaily.tif file
2. Applies cloud mask
3. Clips to Iowa state boundary
4. Saves clipped files with metadata
5. Creates temporal summaries

ETdaily units: kg/m2/day (equivalent to mm/day)
Scale factor: 0.1 (stored as integers, multiply by 0.1 to get mm/day)
"""

# =============================================================================
# File Paths
# =============================================================================

input_folder = project_root / "data" / "raw" / "ECOSTRESS_JET"
output_folder = project_root / "data" / "processed" / "ECOSTRESS_Iowa"
output_folder.mkdir(parents=True, exist_ok=True)

iowa_boundary = project_root / "data" / "aoi" / "iowa.geojson"

# Load Iowa boundary
gdf = gpd.read_file(iowa_boundary)
print(f"Iowa boundary CRS: {gdf.crs}")

# Find all ETdaily files
etdaily_files = sorted(input_folder.glob("**/*ETdaily.tif"))
print(f"\nFound {len(etdaily_files)} ETdaily files to process")

# =============================================================================
# Process Each ECOSTRESS File
# =============================================================================

processed_count = 0
skipped_count = 0
metadata = []

for etdaily_file in etdaily_files:
    try:
        # Extract date from filename
        # Format: ECOv002_L3T_JET_28282_005_15TVH_20230702T130331_0712_01_ETdaily.tif
        filename = etdaily_file.stem
        parts = filename.split('_')
        date_str = parts[6]  # e.g., '20230702T130331'
        date = datetime.strptime(date_str[:8], '%Y%m%d')
        
        # Find corresponding cloud mask
        cloud_file = etdaily_file.parent / filename.replace('ETdaily', 'cloud').replace(etdaily_file.stem, filename.replace('ETdaily', 'cloud')) + '.tif'
        
        # Load ET data
        et_data = rxr.open_rasterio(etdaily_file, masked=True)
        
        # Apply scale factor (ECOSTRESS stores as integers, scale by 0.1)
        et_data = et_data * 0.1
        
        # Apply cloud mask if available
        if cloud_file.exists():
            cloud_mask = rxr.open_rasterio(cloud_file, masked=True)
            # Cloud mask: 0=cloud, 1=clear
            et_data = et_data.where(cloud_mask == 1)
        
        # Set CRS if not already set
        if et_data.rio.crs is None:
            et_data = et_data.rio.write_crs("EPSG:4326")
        
        # Clip to Iowa
        gdf_projected = gdf.to_crs(et_data.rio.crs)
        clipped = et_data.rio.clip(gdf_projected.geometry, gdf_projected.crs, drop=True)
        
        # Save clipped file
        output_filename = f"ECOSTRESS_ET_{date.strftime('%Y%m%d')}_Iowa.tif"
        output_path = output_folder / output_filename
        
        clipped.rio.to_raster(
            output_path,
            compress='lzw',
            tiled=True,
            dtype='float32'
        )
        
        # Store metadata
        metadata.append({
            'date': date,
            'filename': output_filename,
            'mean_et': float(clipped.mean()),
            'median_et': float(clipped.median()),
            'valid_pixels': int((~np.isnan(clipped.values[0])).sum())
        })
        
        processed_count += 1
        if processed_count % 50 == 0:
            print(f"Processed {processed_count}/{len(etdaily_files)} files...")
        
    except Exception as e:
        print(f"Error processing {etdaily_file.name}: {e}")
        skipped_count += 1
        continue

print(f"\nProcessing complete!")
print(f"  Processed: {processed_count}")
print(f"  Skipped: {skipped_count}")

# =============================================================================
# Save Metadata
# =============================================================================

if metadata:
    df = pd.DataFrame(metadata)
    df = df.sort_values('date')
    metadata_path = output_folder / "ECOSTRESS_Iowa_metadata.csv"
    df.to_csv(metadata_path, index=False)
    print(f"\nSaved metadata to: {metadata_path}")
    
    # Summary statistics
    print(f"\nTemporal coverage:")
    print(f"  First observation: {df['date'].min()}")
    print(f"  Last observation: {df['date'].max()}")
    print(f"  Total observations: {len(df)}")
    
    print(f"\nET statistics (mm/day):")
    print(f"  Mean: {df['mean_et'].mean():.2f}")
    print(f"  Median: {df['median_et'].median():.2f}")
    print(f"  Range: {df['mean_et'].min():.2f} - {df['mean_et'].max():.2f}")

In [None]:
"""
Visualize ECOSTRESS Temporal Coverage and ET Values
"""

if metadata:
    df = pd.DataFrame(metadata)
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')
    
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # Plot 1: Temporal distribution of observations
    ax1 = axes[0]
    df_monthly = df.groupby(df['date'].dt.to_period('M')).size()
    df_monthly.plot(kind='bar', ax=ax1, color='steelblue')
    ax1.set_title('ECOSTRESS Observations per Month (Iowa, 2019-2023)', 
                  fontsize=14, fontweight='bold')
    ax1.set_xlabel('Month')
    ax1.set_ylabel('Number of Observations')
    ax1.grid(axis='y', alpha=0.3)
    
    # Plot 2: ET time series
    ax2 = axes[1]
    ax2.scatter(df['date'], df['mean_et'], alpha=0.5, s=20, color='darkgreen')
    ax2.set_title('ECOSTRESS Mean Daily ET - Iowa (2019-2023)', 
                  fontsize=14, fontweight='bold')
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Mean ET (mm/day)')
    ax2.grid(True, alpha=0.3)
    
    # Add monthly average line
    df_monthly_et = df.groupby(df['date'].dt.to_period('M'))['mean_et'].mean()
    df_monthly_et.index = df_monthly_et.index.to_timestamp()
    ax2.plot(df_monthly_et.index, df_monthly_et.values, 
             color='red', linewidth=2, alpha=0.7, label='Monthly Average')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig(output_folder / "ECOSTRESS_temporal_coverage.png", dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"\nNote: ECOSTRESS has irregular temporal sampling.")
    print(f"You'll need to aggregate these observations to bi-weekly periods")
    print(f"to match your project timeline and combine with NLDAS data.")

In [None]:
"""
Create Sample Visualization - Mean ET for Summer 2023
"""

if metadata:
    # Filter for summer months (June-August) 2023
    df = pd.DataFrame(metadata)
    df['date'] = pd.to_datetime(df['date'])
    
    summer_2023 = df[(df['date'].dt.year == 2023) & 
                     (df['date'].dt.month.isin([6, 7, 8]))]
    
    print(f"Found {len(summer_2023)} observations for Summer 2023")
    
    if len(summer_2023) > 0:
        # Load and average all summer 2023 files
        summer_files = [output_folder / f for f in summer_2023['filename']]
        
        et_arrays = []
        for file in summer_files:
            data = rxr.open_rasterio(file, masked=True)
            et_arrays.append(data)
        
        # Compute mean across all observations
        et_mean = xr.concat(et_arrays, dim='time').mean(dim='time', skipna=True)
        
        # Plot
        fig, ax = plt.subplots(figsize=(10, 8))
        et_mean.plot(
            ax=ax,
            cmap='YlGnBu',
            vmin=0,
            vmax=8,
            cbar_kwargs={'label': 'Mean ET (mm/day)'}
        )
        ax.set_title('ECOSTRESS Mean Daily ET - Iowa Summer 2023\n(June-August Average)',
                     fontsize=14, fontweight='bold')
        ax.set_xlabel('Longitude')
        ax.set_ylabel('Latitude')
        
        plt.tight_layout()
        plt.savefig(output_folder / "ECOSTRESS_summer2023_mean.png", dpi=300, bbox_inches='tight')
        plt.show()
        
        print(f"\nSummer 2023 mean ET: {float(et_mean.mean()):.2f} mm/day")