# ECOSTRESS Evapotranspiration Visualization - Iowa 2019-2023

This notebook explores and visualizes the full ECOSTRESS (ECOsystem Spaceborne Thermal Radiometer Experiment on Space Station) evapotranspiration dataset for Iowa, spanning 2019-2023.

**Dataset**: ECOSTRESS L3T JET (Jet Propulsion Laboratory Evapotranspiration) - Daily ET estimates at ~70m resolution

**Data Source**: NASA AppEEARS (Application for Extracting and Exploring Analysis Ready Samples)

**Contents:**
1. Environment Setup
2. Data Loading and Inventory
3. Data Quality Assessment
4. Temporal Coverage Analysis
5. Monthly and Seasonal ET Patterns
6. Spatial Maps by Season
7. Year-over-Year Comparison
8. Distribution Analysis
9. Export Processed Data
10. Summary

---

## 1. Environment Setup

Import required libraries and configure the analysis environment.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from pathlib import Path
from datetime import datetime
import warnings

import rasterio
import xarray as xr
import rioxarray
import geopandas as gpd

warnings.filterwarnings('ignore')

plt.style.use('default')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11
plt.rcParams['axes.titlesize'] = 13
plt.rcParams['axes.labelsize'] = 11

print("Libraries loaded successfully!")

---

## 2. Data Loading and Inventory

Scan all ECOSTRESS GeoTIFFs, parse dates from filenames, and build a file inventory.

**Filename convention:** Files contain a datetime string like `20230702T130422` and a day-of-year (DOY) identifier.

In [None]:
# Project paths
project_root = Path("../..").resolve()
data_dir = project_root / "data" / "raw" / "ECOSTRESS"
output_dir = project_root / "data" / "processed" / "ECOSTRESS_Iowa"
figures_dir = project_root / "figures" / "ecostress"
output_dir.mkdir(parents=True, exist_ok=True)
figures_dir.mkdir(parents=True, exist_ok=True)

# Iowa boundary for overlays
iowa_boundary = project_root / "data" / "aoi" / "iowa.geojson"

print(f"Data directory: {data_dir}")
print(f"Output directory: {output_dir}")

# Find all ECOSTRESS TIF files
tif_files = sorted(data_dir.glob("*.tif"))
print(f"\nFound {len(tif_files)} GeoTIFF files")

In [None]:
# Parse dates from filenames and build inventory
# Filename pattern: ECOv002_L3T_JET_{orbit}_{scene}_{tile}_{YYYYMMDDTHHMMSS}_{build}_{ver}_ETdaily.tif

file_records = []

for f in tif_files:
    parts = f.stem.split('_')
    try:
        # Find the datetime part (format: YYYYMMDDTHHMMSS)
        dt_str = [p for p in parts if 'T' in p and len(p) == 15][0]
        dt = datetime.strptime(dt_str, "%Y%m%dT%H%M%S")
        tile_id = parts[5]
        
        file_records.append({
            'file': f,
            'filename': f.name,
            'date': dt.date(),
            'datetime': dt,
            'year': dt.year,
            'month': dt.month,
            'doy': dt.timetuple().tm_yday,
            'tile': tile_id
        })
    except (IndexError, ValueError) as e:
        print(f"Could not parse: {f.name} ({e})")

inventory = pd.DataFrame(file_records)
inventory = inventory.sort_values('datetime').reset_index(drop=True)

print(f"Successfully parsed {len(inventory)} files")
print(f"\nDate range: {inventory['date'].min()} to {inventory['date'].max()}")
print(f"Years: {sorted(inventory['year'].unique())}")
print(f"Tiles: {sorted(inventory['tile'].unique())}")
print(f"\nFiles per year:")
print(inventory.groupby('year').size().to_string())

---

## 3. Data Quality Assessment

Load each file, compute statistics, and flag quality issues. ECOSTRESS has irregular
temporal sampling (not every day), so understanding coverage is important.

In [None]:
# Load each file and compute per-scene statistics
scene_stats = []

for idx, row in inventory.iterrows():
    try:
        da = rioxarray.open_rasterio(row['file']).squeeze()
        nodata = da.rio.nodata
        masked = da.where(da != nodata)
        
        total_px = int(da.size)
        valid_px = int(masked.count().values)
        
        scene_stats.append({
            'date': row['date'],
            'datetime': row['datetime'],
            'year': row['year'],
            'month': row['month'],
            'tile': row['tile'],
            'mean_et': float(masked.mean()) if valid_px > 0 else np.nan,
            'median_et': float(masked.median()) if valid_px > 0 else np.nan,
            'min_et': float(masked.min()) if valid_px > 0 else np.nan,
            'max_et': float(masked.max()) if valid_px > 0 else np.nan,
            'std_et': float(masked.std()) if valid_px > 0 else np.nan,
            'valid_pixels': valid_px,
            'total_pixels': total_px,
            'coverage_pct': 100 * valid_px / total_px if total_px > 0 else 0,
        })
    except Exception as e:
        print(f"Error reading {row['filename']}: {e}")

stats_df = pd.DataFrame(scene_stats)
stats_df = stats_df.sort_values('datetime').reset_index(drop=True)

print(f"Computed statistics for {len(stats_df)} scenes")
print(f"\nET value range across all scenes: {stats_df['min_et'].min():.2f} - {stats_df['max_et'].max():.2f} mm/day")
print(f"Mean ET across all scenes: {stats_df['mean_et'].mean():.2f} mm/day")
print(f"Mean valid pixel coverage: {stats_df['coverage_pct'].mean():.1f}%")

In [None]:
---

## 4. Temporal Coverage Analysis

ECOSTRESS orbits the ISS, so it does not have a fixed revisit schedule like Landsat.
Visualize when observations are available across 2019-2023.

# Temporal coverage: observations per month heatmap
monthly_counts = stats_df.groupby(['year', 'month']).size().reset_index(name='n_scenes')
pivot = monthly_counts.pivot(index='year', columns='month', values='n_scenes').fillna(0).astype(int)

# Ensure all months and years are represented
all_years = range(2019, 2024)
all_months = range(1, 13)
pivot = pivot.reindex(index=all_years, columns=all_months, fill_value=0)

fig, ax = plt.subplots(figsize=(14, 4))
im = ax.imshow(pivot.values, cmap='YlOrRd', aspect='auto', vmin=0)

ax.set_xticks(range(12))
ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax.set_yticks(range(len(pivot.index)))
ax.set_yticklabels(pivot.index)

# Annotate cells with counts
for i in range(len(pivot.index)):
    for j in range(12):
        val = pivot.values[i, j]
        ax.text(j, i, str(val), ha='center', va='center', fontsize=10,
                color='white' if val > pivot.values.max()/2 else 'black')

cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('Number of Scenes')
ax.set_title('ECOSTRESS Temporal Coverage - Iowa 2019-2023\nScenes per Month', fontsize=14, fontweight='bold')
ax.set_xlabel('Month')
ax.set_ylabel('Year')

plt.tight_layout()
plt.savefig(figures_dir / 'ecostress_temporal_coverage.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nTotal scenes by year:")
print(pivot.sum(axis=1).to_string())

In [None]:
---

## 5. Monthly and Seasonal ET Patterns

Time series of mean daily ET from each ECOSTRESS scene, showing the seasonal cycle
and interannual variability.

In [None]:
# Time series of scene-level mean ET
fig, axes = plt.subplots(2, 1, figsize=(16, 10), gridspec_kw={'height_ratios': [2, 1]})

# Top: Full time series colored by year
colors_yr = {2019: '#1f77b4', 2020: '#ff7f0e', 2021: '#2ca02c', 2022: '#d62728', 2023: '#9467bd'}

for year, grp in stats_df.groupby('year'):
    axes[0].scatter(grp['datetime'], grp['mean_et'], s=25, alpha=0.7,
                    color=colors_yr.get(year, 'gray'), label=str(year))
    axes[0].plot(grp['datetime'], grp['mean_et'], alpha=0.3, color=colors_yr.get(year, 'gray'))

axes[0].set_ylabel('Mean Daily ET (mm/day)', fontsize=12)
axes[0].set_title('ECOSTRESS Daily ET Time Series - Iowa 2019-2023', fontsize=14, fontweight='bold')
axes[0].legend(title='Year', loc='upper right')
axes[0].grid(True, alpha=0.3)

# Bottom: Monthly mean ET (aggregated across years)
monthly_mean = stats_df.groupby('month')['mean_et'].agg(['mean', 'std']).reset_index()
axes[1].bar(monthly_mean['month'], monthly_mean['mean'], yerr=monthly_mean['std'],
            color='steelblue', alpha=0.7, capsize=3, edgecolor='black', linewidth=0.5)
axes[1].set_xlabel('Month', fontsize=12)
axes[1].set_ylabel('Mean ET (mm/day)', fontsize=12)
axes[1].set_title('Mean Monthly ET Climatology (2019-2023 Average)', fontsize=13, fontweight='bold')
axes[1].set_xticks(range(1, 13))
axes[1].set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig(figures_dir / 'ecostress_et_timeseries.png', dpi=150, bbox_inches='tight')
plt.show()

# Year-over-year seasonal cycle comparison (DOY on x-axis)
fig, ax = plt.subplots(figsize=(14, 6))

for year, grp in stats_df.groupby('year'):
    grp_sorted = grp.sort_values('doy')
    ax.scatter(grp_sorted['doy'], grp_sorted['mean_et'], s=20, alpha=0.6,
               color=colors_yr.get(year, 'gray'), label=str(year))
    # Smoothed line using rolling mean
    if len(grp_sorted) > 5:
        rolling = grp_sorted.set_index('doy')['mean_et'].rolling(window=5, center=True, min_periods=2).mean()
        ax.plot(rolling.index, rolling.values, color=colors_yr.get(year, 'gray'), alpha=0.7, linewidth=1.5)

ax.set_xlabel('Day of Year', fontsize=12)
ax.set_ylabel('Mean Daily ET (mm/day)', fontsize=12)
ax.set_title('ECOSTRESS ET Seasonal Cycle by Year - Iowa', fontsize=14, fontweight='bold')
ax.legend(title='Year')
ax.set_xlim(1, 365)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(figures_dir / 'ecostress_et_seasonal_byyear.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
---

## 6. Spatial Maps by Season

Select representative scenes from different seasons to show spatial ET patterns.
Picks scenes with highest valid pixel coverage from winter, spring, summer, and fall.

In [None]:
# Define seasons and pick best scene from each (highest coverage)
season_map = {12: 'Winter', 1: 'Winter', 2: 'Winter',
              3: 'Spring', 4: 'Spring', 5: 'Spring',
              6: 'Summer', 7: 'Summer', 8: 'Summer',
              9: 'Fall', 10: 'Fall', 11: 'Fall'}

stats_df['season'] = stats_df['month'].map(season_map)

# Pick the scene with highest coverage per season
season_order = ['Winter', 'Spring', 'Summer', 'Fall']
best_scenes = {}

for season in season_order:
    season_data = stats_df[stats_df['season'] == season]
    if not season_data.empty:
        best_idx = season_data['coverage_pct'].idxmax()
        best_row = season_data.loc[best_idx]
        best_scenes[season] = {
            'file': inventory.loc[inventory['datetime'] == best_row['datetime'], 'file'].values[0],
            'date': best_row['date'],
            'mean_et': best_row['mean_et'],
            'coverage': best_row['coverage_pct']
        }
        print(f"{season}: {best_row['date']} (coverage: {best_row['coverage_pct']:.1f}%, mean ET: {best_row['mean_et']:.2f} mm/day)")
    else:
        print(f"{season}: No data available")

# Plot seasonal spatial maps
available_seasons = [s for s in season_order if s in best_scenes]
n_panels = len(available_seasons)

if n_panels > 0:
    fig, axes = plt.subplots(1, n_panels, figsize=(5 * n_panels, 6))
    if n_panels == 1:
        axes = [axes]
    
    # Find global vmin/vmax across all selected scenes for consistent colorbar
    all_scene_data = []
    for season in available_seasons:
        da = rioxarray.open_rasterio(best_scenes[season]['file']).squeeze()
        masked = da.where(da != da.rio.nodata)
        all_scene_data.append(masked)
    
    vmin = min(float(d.quantile(0.02)) for d in all_scene_data)
    vmax = max(float(d.quantile(0.98)) for d in all_scene_data)
    
    for ax, season, data in zip(axes, available_seasons, all_scene_data):
        im = data.plot(ax=ax, cmap='YlGnBu', vmin=vmin, vmax=vmax, add_colorbar=False)
        info = best_scenes[season]
        ax.set_title(f"{season}\n{info['date']}\nMean: {info['mean_et']:.2f} mm/day",
                     fontsize=11, fontweight='bold')
        ax.set_xlabel('Easting (m)')
        ax.set_ylabel('Northing (m)')
    
    cbar = fig.colorbar(im, ax=axes, orientation='horizontal', fraction=0.04, pad=0.12)
    cbar.set_label('Daily ET (mm/day)', fontsize=11)
    
    plt.suptitle('ECOSTRESS ET Spatial Patterns by Season - Iowa',
                 fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.savefig(figures_dir / 'ecostress_et_seasonal_maps.png', dpi=150, bbox_inches='tight')
    plt.show()
else:
    print("No seasonal data available for mapping.")

In [None]:
---

## 7. Year-over-Year Comparison

Compare annual mean ET and scene counts to identify interannual variability.

In [None]:
# Annual statistics
annual_stats = stats_df.groupby('year').agg(
    n_scenes=('mean_et', 'count'),
    mean_et=('mean_et', 'mean'),
    median_et=('median_et', 'median'),
    std_et=('mean_et', 'std'),
    max_et=('max_et', 'max')
).reset_index()

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Left: Annual mean ET with error bars
axes[0].bar(annual_stats['year'], annual_stats['mean_et'], yerr=annual_stats['std_et'],
            color=[colors_yr.get(y, 'gray') for y in annual_stats['year']],
            capsize=5, edgecolor='black', linewidth=0.5, alpha=0.8)
axes[0].set_xlabel('Year', fontsize=12)
axes[0].set_ylabel('Mean Daily ET (mm/day)', fontsize=12)
axes[0].set_title('Annual Mean ET by Year', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='y')

# Right: Number of scenes per year
axes[1].bar(annual_stats['year'], annual_stats['n_scenes'],
            color=[colors_yr.get(y, 'gray') for y in annual_stats['year']],
            edgecolor='black', linewidth=0.5, alpha=0.8)
axes[1].set_xlabel('Year', fontsize=12)
axes[1].set_ylabel('Number of Scenes', fontsize=12)
axes[1].set_title('ECOSTRESS Scene Availability', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='y')

# Annotate counts
for _, row in annual_stats.iterrows():
    axes[1].text(row['year'], row['n_scenes'] + 0.5, str(int(row['n_scenes'])),
                 ha='center', fontsize=11, fontweight='bold')

plt.suptitle('ECOSTRESS Interannual Comparison - Iowa', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(figures_dir / 'ecostress_annual_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print("\nAnnual Summary:")
print(annual_stats.to_string(index=False))

---

## 8. Distribution Analysis

Examine the pixel-level distribution of ET values across all scenes, grouped by season.

In [None]:
# Box plot of scene-level mean ET by month
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Left: Boxplot by month
month_labels_short = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
month_groups = [stats_df[stats_df['month'] == m]['mean_et'].values for m in range(1, 13)]
# Only include months that have data
valid_months = [(i+1, grp, month_labels_short[i]) for i, grp in enumerate(month_groups) if len(grp) > 0]

bp = axes[0].boxplot([g[1] for g in valid_months], labels=[g[2] for g in valid_months],
                      patch_artist=True, medianprops=dict(color='red', linewidth=2))

# Color boxes by season
season_colors = {'Winter': '#4393c3', 'Spring': '#66c2a5', 'Summer': '#fc8d62', 'Fall': '#e78ac3'}
for i, (month_num, _, _) in enumerate(valid_months):
    season = season_map[month_num]
    bp['boxes'][i].set_facecolor(season_colors[season])
    bp['boxes'][i].set_alpha(0.7)

axes[0].set_ylabel('Mean Daily ET (mm/day)', fontsize=11)
axes[0].set_title('ET Distribution by Month', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='y')

# Right: Histogram of all scene means by season
for season in season_order:
    season_means = stats_df[stats_df['season'] == season]['mean_et']
    if not season_means.empty:
        axes[1].hist(season_means, bins=20, alpha=0.5, label=season,
                     color=season_colors[season], edgecolor='black', linewidth=0.5)

axes[1].set_xlabel('Mean Daily ET (mm/day)', fontsize=11)
axes[1].set_ylabel('Number of Scenes', fontsize=11)
axes[1].set_title('Scene-Level ET Distribution by Season', fontsize=13, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(figures_dir / 'ecostress_et_distributions.png', dpi=150, bbox_inches='tight')
plt.show()

---

## 9. Export Processed Data

Save the scene-level statistics as a CSV for downstream analysis.

In [None]:
# Save scene statistics CSV
stats_export = stats_df.copy()
stats_export['date'] = stats_export['date'].astype(str)
stats_export['datetime'] = stats_export['datetime'].astype(str)

csv_path = output_dir / 'ecostress_scene_statistics_2019_2023.csv'
stats_export.to_csv(csv_path, index=False)
print(f"Saved scene statistics: {csv_path}")

# Save annual summary
annual_path = output_dir / 'ecostress_annual_summary_2019_2023.csv'
annual_stats.to_csv(annual_path, index=False)
print(f"Saved annual summary: {annual_path}")

print(f"\nFigures saved to: {figures_dir}")
print(f"  - ecostress_temporal_coverage.png")
print(f"  - ecostress_et_timeseries.png")
print(f"  - ecostress_et_seasonal_byyear.png")
print(f"  - ecostress_et_seasonal_maps.png")
print(f"  - ecostress_annual_comparison.png")
print(f"  - ecostress_et_distributions.png")

---

## 10. Summary

In [None]:
print("=" * 70)
print("ECOSTRESS Analysis Summary - Iowa 2019-2023")
print("=" * 70)

print(f"\nDataset:")
print(f"  Product: ECO_L3T_JET.002 (Daily Evapotranspiration)")
print(f"  Resolution: ~70m")
print(f"  Total scenes: {len(stats_df)}")
print(f"  Date range: {stats_df['date'].min()} to {stats_df['date'].max()}")
print(f"  Tiles: {', '.join(sorted(stats_df['tile'].unique()))}")

print(f"\nET Statistics (all scenes):")
print(f"  Mean daily ET: {stats_df['mean_et'].mean():.2f} mm/day")
print(f"  Peak daily ET: {stats_df['max_et'].max():.2f} mm/day")
print(f"  Mean valid coverage: {stats_df['coverage_pct'].mean():.1f}%")

print(f"\nSeasonal Means:")
for season in season_order:
    s_data = stats_df[stats_df['season'] == season]
    if not s_data.empty:
        print(f"  {season}: {s_data['mean_et'].mean():.2f} mm/day ({len(s_data)} scenes)")

print(f"\nOutputs:")
print(f"  Statistics CSV: {csv_path.name}")
print(f"  Figures: {figures_dir}")
print(f"\nAnalysis complete!")