In [1]:
from modis_tools.auth import ModisSession
from modis_tools.resources import CollectionApi, GranuleApi
from modis_tools.granule_handler import GranuleHandler

from modis_tools.auth import add_earthdata_netrc, remove_earthdata_netrc

from datetime import datetime, timedelta
from pathlib import Path
import yaml

### Use modis-tools to search and download MODIS granules

In [2]:
# Create an entry for Earthdata in the ~/.netrc file, only needs to be run once

#username = ""
#password = ""
#add_earthdata_netrc(username, password)

In [2]:
# Create a session
session = ModisSession()

In [6]:
# Query the MODIS catalog for collections
collection_client = CollectionApi(session=session)

# Choose a collection to query
#collections = collection_client.query(short_name="MYD02HKM", version="6.1") # Aqua 500m calibrated radiances
#collections = collection_client.query(short_name="MYD03", version="6.1") # Aqua geolocation fields
#collections = collection_client.query(short_name="MYD29", version="61") # Aqua sea ice

#collections = collection_client.query(short_name=["MYD02HKM", "MYD03"], version="6.1")

collections = collection_client.query(short_name="MOD44W", version='061') # MODIS land/water mask


collections

[Collection(id='C2565805847-LPCLOUD', title='MODIS/Terra Land Water Mask Derived from MODIS and SRTM L3 Global 250m SIN Grid V061', dataset_id='MODIS/Terra Land Water Mask Derived from MODIS and SRTM L3 Global 250m SIN Grid V061', coordinate_system='CARTESIAN', time_start='2000-01-01T00:00:00.000Z', updated=datetime.datetime(2024, 2, 22, 0, 0, tzinfo=datetime.timezone.utc), links=[CollectionLink(hreflang='en-US', href=AnyUrl('https://e4ftl01.cr.usgs.gov/MOLT/MOD44W.061/', scheme='https', host='e4ftl01.cr.usgs.gov', tld='gov', host_type='domain', path='/MOLT/MOD44W.061/'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://search.earthdata.nasa.gov/search?q=C2565805847-LPCLOUD', scheme='https', host='search.earthdata.nasa.gov', tld='gov', host_type='domain', path='/search', query='q=C2565805847-LPCLOUD'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://doi.org/10.5067/MODIS/MOD44W.061', scheme='https', host='doi.org', tld='org', host_type='domain', path=

In [15]:
# Criteria to filter the selected granules via spatial and temporal parameters
# e.g.
# nigeria_bbox = [2.1448863675, 4.002583177, 15.289420717, 14.275061098]
# nigeria_granules = granule_client.query(start_date="2016-01-01", end_date="2018-12-31", bounding_box=nigeria_bbox)

# See https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html for more options

# Date range:
start_date = "2022-01-01"
end_date = "2022-12-31"

# Spatial extent:
labsea = [-65.0, 45.0, -40.0, 70.0] # bounding_box for Labrador Sea
points = ["-55.0,53.75", "-64.25,61.5", "-53.25,64.25", "-44.0,59.25"] # four approximate 'corners' of the Labrador Sea
points2 = ["-56.0,52.5", "-68.0,60.0", "-53.0,66.0", "-41.0,60.0"] # four corners of a larger area
points3 = ["-64.0,55.0", "-44.0,63.0"] # two ends of a line bisecting the Labrador Sea

# NB point co-ordinate is in lon, lat (opposite to earthdata web search!)
# Default option for point search is AND, i.e. all points will be within each granule

In [None]:
# Dictionary of arguments to be passed to each query
query_args = dict(start_date=start_date, end_date=end_date, 
                  day_night_flag="day", point=points2)

In [None]:
# Query args for the MOD44W collection
# kwargs must be passed as dict if any options are to be used, e.g OR instead of AND
# e.g. 
# labsea2022_granules = granule_client.query(start_date=start_date, end_date=end_date, 
#                                           **{"point":["-52.5,57.5", "-62.4,57.5"], "options[point][or]":"true",
#                                              "day_night_flag":"day"})

query_args = {'start_date':start_date, 'end_date':end_date, 
                  'day_night_flag':"day", 'point':points2, 'options[point][or]':"true"}



In [17]:
# Query the selected collection for granules (only one collection at a time)
granule_client_data = GranuleApi.from_collection(collections[0], session=session)

# Get list of granules fitting given criteria:
data_granules = granule_client_data.query(**query_args) 

# Check how many granules before downloading!
data_granules = list(data_granules) # make it a list as generator can only be used once
len(data_granules)

4

In [15]:
# Get geolocation granules for the same time period
granule_client_geo = GranuleApi.from_collection(collections[1], session=session)
geo_granules = granule_client_geo.query(**query_args)
geo_granules = list(geo_granules)

In [16]:
# Check name of scene/granule is parsed correctly! e.g. 'MYD02HKM.A2022352.1640'
dict(data_granules[0])['producer_granule_id'][:22]

'MYD02HKM.A2022352.1640'

In [17]:
# Make dictionary of scenes with data and geolocation filenamess

scenes = {}

for granule, geogranule in zip(data_granules, geo_granules):
    filename = dict(granule)['producer_granule_id']
    scenes[filename[:22]] = {'data':filename, 'geolocation':dict(geogranule)['producer_granule_id']}
    # maybe should put a check here to ensure the geolocation file is 
    # for the same date as the data file in case any missing data

In [12]:
# Specify download folder

folder = "AquaHkmLabSea2022b"
filepath = Path("/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/"+folder+"/l1b_data")
filepath.mkdir(exist_ok=True, parents=True)

In [18]:
# File path for the MOD44W water mask data
#filepath = "/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask"

In [13]:
# Write scene_ids and filenames to yaml file

with open(filepath / 'scene_ids.yaml', 'w') as f:
    yaml.dump(scenes, f)


In [19]:
# Download the data granules

GranuleHandler.download_from_granules(data_granules, session, path=str(filepath), threads=10)

Downloading:   0%|          | 0/4 [00:00<?, ?file/s]

[PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask/MOD44W.A2022001.h15v03.061.2024008141206.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask/MOD44W.A2022001.h14v02.061.2024008140430.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask/MOD44W.A2022001.h15v02.061.2024008141302.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask/MOD44W.A2022001.h14v03.061.2024008140319.hdf')]

In [None]:
# Download the geolocation granules

filepath = filepath / "geoloc"
filepath.mkdir(exist_ok=True, parents=True)

GranuleHandler.download_from_granules(geo_granules, session, path=str(filepath), threads=10)