# Working with MODIS for Land Cover and LAI data

## Download MODIS data from AppEARS

MODIS provides satellite image data including ET, LAI and etc.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import watershed_workflow.ui
watershed_workflow.ui.setup_logging(1)

In [3]:
import logging
import os
import numpy as np
from matplotlib import pyplot as plt
import cftime, datetime

import watershed_workflow
import watershed_workflow.sources
import watershed_workflow.land_cover_properties
import watershed_workflow.io
import watershed_workflow.data



In [4]:
watershed_shapefile = 'Coweeta/input_data/coweeta_basin.shp'
crs = watershed_workflow.crs.default_crs

### Get the watershed

In [5]:
watershed_shapes = watershed_workflow.sources.ManagerShapefile(watershed_shapefile).getShapes(out_crs=crs)
watershed = watershed_workflow.SplitHUCs(watershed_shapes)
watershed_workflow.split_hucs.simplify(watershed, 60)

bounds = watershed.exterior.bounds
print(bounds)
print(bounds[2] - bounds[0], bounds[3] - bounds[1])

2025-08-29 14:11:48,704 - root - INFO: fixing column: geometry
2025-08-29 14:11:48,709 - root - INFO: Removing holes on 1 polygons
2025-08-29 14:11:48,709 - root - INFO:   -- removed interior
2025-08-29 14:11:48,710 - root - INFO:   -- union
2025-08-29 14:11:48,710 - root - INFO: Parsing 1 components for holes
2025-08-29 14:11:48,710 - root - INFO:   -- complete


(1129295.2795668324, 1404623.589635828, 1134445.5249643093, 1410000.8473600943)
5150.245397476945 5377.257724266266


### Download the data

In [6]:
source = watershed_workflow.sources.ManagerMODISAppEEARS()

In [7]:
start_year = 2010
end_year = 2020

startdate = f"{start_year}-1-1"
enddate = f"{end_year}-12-31"

# note that AppEEARS typically takes a few minutes to prepare your data.  Therefore we return a task, which can be re-checked later.  
# So you could launch this call more-or-less immediately upon entering your workflow, continue the workflow, then call wait after
# all your other work is done.  Here we just immediately call wait, which checks every few minutes to see if AppEEARs is ready.
task = source.requestDataset(watershed.exterior.buffer(1000), watershed.crs, startdate, enddate) 

2025-08-29 14:11:48,736 - root - INFO: calling preprocess
2025-08-29 14:11:48,736 - root - INFO: prepocess called!
2025-08-29 14:11:48,737 - root - INFO: incoming shape area = 0.0035508368181677618
2025-08-29 14:11:48,738 - root - INFO: buffering incoming shape by = 0.0045000000000000005
2025-08-29 14:11:48,738 - root - INFO: buffered shape area = 0.004621760927923991
2025-08-29 14:11:48,740 - root - INFO: Building request for bounds: [np.float64(-83.494), np.float64(35.0139), np.float64(-83.4061), np.float64(35.0873)]
2025-08-29 14:11:48,740 - root - INFO: Requires files:
2025-08-29 14:11:48,740 - root - INFO:  ... /home/ecoon/code/watershed_workflow/data/land_cover/MODIS/modis_LAI_01-01-2010_12-31-2020_35.0873x-83.4940_35.0139x-83.4061.nc
2025-08-29 14:11:48,740 - root - INFO:  ... /home/ecoon/code/watershed_workflow/data/land_cover/MODIS/modis_LULC_01-01-2010_12-31-2020_35.0873x-83.4940_35.0139x-83.4061.nc
2025-08-29 14:11:48,741 - root - INFO: building request.
2025-08-29 14:11:49,

In [11]:
# This task_id is the magic key used to identify with AppEEARs what job you're referring to.  It gets used to check the status of the job, and download the result when it is ready.
# Note that AppEEARs will send you an email when the job is done!
print(task.task_id)

713e0136-e6d4-4b79-a605-6f5db9f7b694


In [12]:
# Do other stuff...
#

# check whether the request is done
if source.isReady(task):
    # fetch the request
    data = source.fetchDataset(task)


2025-08-29 14:15:23,083 - root - INFO: Checking status of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:15:23,389 - root - INFO: ... http error
2025-08-29 14:15:23,391 - root - INFO: Checking for bundle of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:15:23,618 - root - INFO: ... HTTPError checking for bundle:
2025-08-29 14:15:23,619 - root - INFO: 404 Client Error: NOT FOUND for url: https://appeears.earthdatacloud.nasa.gov/api/bundle/713e0136-e6d4-4b79-a605-6f5db9f7b694


In [13]:
# call this as many times as you want until it works!
if source.isReady(task):
    # fetch the request
    data = source.fetchDataset(task)

2025-08-29 14:15:39,357 - root - INFO: Checking status of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:15:39,620 - root - INFO: ... http error
2025-08-29 14:15:39,622 - root - INFO: Checking for bundle of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:15:39,852 - root - INFO: ... HTTPError checking for bundle:
2025-08-29 14:15:39,853 - root - INFO: 404 Client Error: NOT FOUND for url: https://appeears.earthdatacloud.nasa.gov/api/bundle/713e0136-e6d4-4b79-a605-6f5db9f7b694


In [None]:
# then when you really need the data, you can just wait.  This simply calls isReady then sleeps, in a loop, until isReady returns true.
data = source.waitForDataset(task)

2025-08-29 14:16:27,287 - root - INFO: Checking status of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:16:27,562 - root - INFO: ... http error
2025-08-29 14:16:27,565 - root - INFO: Checking for bundle of task: 713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:16:27,818 - root - INFO: ... HTTPError checking for bundle:
2025-08-29 14:16:27,819 - root - INFO: 404 Client Error: NOT FOUND for url: https://appeears.earthdatacloud.nasa.gov/api/bundle/713e0136-e6d4-4b79-a605-6f5db9f7b694
2025-08-29 14:16:27,821 - root - INFO: MODIS request not ready, sleeping 120s...


Check for outliers in the data, and fix if needed.

In [None]:
# LAI extent
print('LAI:', data['LAI'].min(), data['LAI'].max())

# LULC extent
print('LULC:', data['LULC'].min(), data['LULC'].max())


## Compute time series of LAI

First, MODIS is "real" data, and so has leap year.  Our runs do not include leap year, so we first convert to a noleap calendar by dropping leap day data.

In [None]:
# MODIS data comes with time-dependent LAI AND time-dependent LULC -- just take the mode to find the most common LULC
modis_data['LULC'] = watershed_workflow.data.computeMode(modis_data['LULC'], 'time_LULC')

# now it is safe to have only one time
modis_data = modis_data.rename({'time_LAI':'time'})

# remove leap day (366th day of any leap year) to match our Noleap Calendar
modis_data = watershed_workflow.data.filterLeapDay(modis_data)

### Form the dynamic time-series.

The raw data is integrated in space by intersecting each LULC index with the LAI raster and summing over that mask.  This is done for each time of LAI observation to form a time series.

In [None]:
# compute the dynamic time series
lai_df = watershed_workflow.land_cover_properties.computeTimeSeries(modis_data['LAI'], modis_data['LULC'], 
                                                                      polygon=watershed.exterior, polygon_crs=watershed.crs)
lai_df

In [None]:
# plot the dynamic data
fig = plt.figure()
ax = fig.add_subplot(111)

for column in lai_df:
    if column != 'time':
        ax.plot(lai_df['time'], lai_df[column], label=column)
ax.set_ylabel('Leaf Area Index [-]')
ax.set_xlabel('time')
ax.legend()
plt.show()


### Form data for a typical year

In addition to the dynamic data, we need a typical year for cyclic steadystate spinup purposes.

In [None]:
# also compute a typical year of LAI
typical_df = watershed_workflow.data.computeAverageYear(lai_df, 'time', output_nyears=10, 
                                                                  start_year=2000)


In [None]:
# plot the dynamic data
fig = plt.figure()
ax = fig.add_subplot(111)

for column in typical_df:
    if column != 'time':
        ax.plot(typical_df['time'], typical_df[column], label=column)
ax.set_ylabel('Leaf Area Index [-]')
ax.set_xlabel('time')
ax.legend()
plt.show()