In [1]:
from modis_tools.auth import ModisSession
from modis_tools.resources import CollectionApi, GranuleApi
from modis_tools.granule_handler import GranuleHandler

from modis_tools.auth import add_earthdata_netrc, remove_earthdata_netrc

from datetime import datetime, timedelta
from pathlib import Path
import yaml

### Use modis-tools to search and download MODIS granules

In [2]:
# Create an entry for Earthdata in the ~/.netrc file, only needs to be run once

#username = ""
#password = ""
#add_earthdata_netrc(username, password)

In [2]:
# Create a session
session = ModisSession()

In [8]:
# Query the MODIS catalog for collections
collection_client = CollectionApi(session=session)

# Choose a collection to query
#collections = collection_client.query(short_name="MYD02HKM", version="6.1") # Aqua 500m calibrated radiances
#collections = collection_client.query(short_name="MYD03", version="6.1") # Aqua geolocation fields
#collections = collection_client.query(short_name="MYD29", version="61") # Aqua sea ice
#collections = collection_client.query(short_name="MYD06_L2", version="6.1") # Aqua clouds

#collections = collection_client.query(short_name=["MYD02HKM", "MYD03"], version="6.1")
collections = collection_client.query(short_name=["MYD021KM", "MYD03"], version="6.1")

#collections = collection_client.query(short_name="MOD44W", version='061') # MODIS land/water mask


collections

[Collection(id='C1379758607-LAADS', title='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', dataset_id='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='CARTESIAN', time_start='2002-07-04T00:00:00.000Z', updated=None, links=[CollectionLink(hreflang='en-US', href=AnyUrl('https://mcst.gsfc.nasa.gov/sites/default/files/file_attachments/M1054D_PUG_083112_final.pdf', scheme='https', host='mcst.gsfc.nasa.gov', tld='gov', host_type='domain', path='/sites/default/files/file_attachments/M1054D_PUG_083112_final.pdf'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://doi.org/10.5067/MODIS/MYD021KM.061', scheme='https', host='doi.org', tld='org', host_type='domain', path='/10.5067/MODIS/MYD021KM.061'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://ladsweb.modaps.eosdis.nasa.gov/search/order/2/MYD021KM--61', scheme='https', host='ladsweb.modaps.eosdis.nasa.gov', tld='gov', host_type='domain', path='/search/order/2/MYD021KM--61'), typ

In [57]:
# Criteria to filter the selected granules via spatial and temporal parameters
# e.g.
# nigeria_bbox = [2.1448863675, 4.002583177, 15.289420717, 14.275061098]
# nigeria_granules = granule_client.query(start_date="2016-01-01", end_date="2018-12-31", bounding_box=nigeria_bbox)

# See https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html for more options

# Date range:
start_date = "2022-03-01"
end_date = "2022-03-31"

# Spatial extent (test areas)
labsea = [-65.0, 45.0, -40.0, 70.0] # bounding_box for Labrador Sea
points = ["-55.0,53.75", "-64.25,61.5", "-53.25,64.25", "-44.0,59.25"] # four approximate 'corners' of the Labrador Sea
points2 = ["-56.0,52.5", "-68.0,60.0", "-53.0,66.0", "-41.0,60.0"] # four corners of a larger area
points3 = ["-64.0,55.0", "-44.0,63.0"] # two ends of a line bisecting the Labrador Sea
north_at = ["-40.0,45.0", "-30.0,40.0", "-30.0,55.0", "-40.0,55.0"] #  patch in the North Atlantic

# NB point co-ordinate is in lon, lat (opposite to earthdata web search!)
# Default option for point search is AND, i.e. all points will be within each granule


# bounding boxes and polygons for study area:
aoi_corners = [[-65,45],[-10,45],[-10,60],[10,60],[10,65],[65,65],[65,82],[-30,82],[-30,70],[-65,70],[-65,45]]
bb1_corners = [[-65,45],[-10,45],[-10,70],[-65,70],[-65,45]]
bb2_corners = [[-30,60],[10,60],[10,82],[-30,82],[-30,60]]
bb3_corners = [[10,65],[65,65],[65,82],[10,82],[10,65]]

bb1 = ",".join([str(x) for x in [bb1_corners[0][0], bb1_corners[0][1], bb1_corners[2][0], bb1_corners[2][1]]])
bb2 = ",".join([str(x) for x in [bb2_corners[0][0], bb2_corners[0][1], bb2_corners[2][0], bb2_corners[2][1]]])
bb3 = ",".join([str(x) for x in [bb3_corners[0][0], bb3_corners[0][1], bb3_corners[2][0], bb3_corners[2][1]]])

poly_1 = ",".join([",".join([str(x) for x in y]) for y in bb1_corners])
poly_2 = ",".join([",".join([str(x) for x in y]) for y in bb2_corners])
poly_3 = ",".join([",".join([str(x) for x in y]) for y in bb3_corners])

aoi_poly = ",".join([",".join([str(x) for x in y]) for y in aoi_corners])

In [5]:
# Dictionary of arguments to be passed to each query
query_args = dict(start_date=start_date, end_date=end_date, 
                  day_night_flag="day", point=north_at)

In [6]:
# Query args using bounding box
query_args = {'start_date':start_date, 'end_date':end_date, 
                   'day_night_flag':"day", 'bounding_box[]':bb3}

In [34]:
# Query args using polygon
query_args = {'start_date':start_date, 'end_date':end_date, 
                   'day_night_flag':"day", 'polygon':aoi_poly}
                  

In [58]:
# Query args with multiple bounding boxes
query_args = {'start_date':start_date, 'end_date':end_date, 
                   #'day_night_flag':'day', 
                   'bounding_box[]':[bb1,bb2,bb3], 
                   'options[bounding_box][or]':"true"}
                  

In [24]:
# Query args for the MOD44W collection

# kwargs must be passed as dict if any options are to be used, e.g OR instead of AND
# e.g. 
# labsea2022_granules = granule_client.query(start_date=start_date, end_date=end_date, 
#                                           **{"point":["-52.5,57.5", "-62.4,57.5"], "options[point][or]":"true",
#                                              "day_night_flag":"day"})

query_args = {'start_date':start_date, 'end_date':end_date, 
                  'day_night_flag':"day", 'point':points2, 'options[point][or]':"true"}



In [59]:
# Query the selected collection for granules (only one collection at a time)
granule_client_data = GranuleApi.from_collection(collections[0], session=session)

# Get list of granules fitting given criteria:
data_granules = granule_client_data.query(**query_args) 

# Check how many granules before downloading!
data_granules = list(data_granules) # make it a list as generator can only be used once
len(data_granules)

1210

In [60]:
# Get geolocation granules for the same time period
granule_client_geo = GranuleApi.from_collection(collections[1], session=session)
geo_granules = granule_client_geo.query(**query_args)
geo_granules = list(geo_granules)
len(geo_granules)

56

In [60]:
data_granules

[Granule(id='G2245154233-LAADS', title='LAADS:6897275492', dataset_id='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='GEODETIC', time_start='2022-03-30T22:50:00.000Z', updated=datetime.datetime(2024, 8, 18, 12, 49, 46, 510000, tzinfo=datetime.timezone.utc), links=[GranuleLink(hreflang='en-US', href=AnyUrl('https://data.laadsdaac.earthdatacloud.nasa.gov/prod-lads/MYD021KM/MYD021KM.A2022089.2250.061.2022090152703.hdf', scheme='https', host='data.laadsdaac.earthdatacloud.nasa.gov', tld='gov', host_type='domain', path='/prod-lads/MYD021KM/MYD021KM.A2022089.2250.061.2022090152703.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('s3://prod-lads/MYD021KM/MYD021KM.A2022089.2250.061.2022090152703.hdf', scheme='s3', host='prod-lads', host_type='int_domain', path='/MYD021KM/MYD021KM.A2022089.2250.061.2022090152703.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('https://ladsweb.m

In [62]:
# Check name of scene/granule is parsed correctly! e.g. 'MYD02HKM.A2022352.1640'
dict(data_granules[0])['producer_granule_id'][:22]

'MYD02HKM.A2022360.1545'

In [7]:
# Get polygon for each granule for mapping

polygons = []
for granule in data_granules:
    polygons.append(dict(granule)['polygons'][0][0])
polygons

['58.572999 -25.022957 52.863673 -61.735239 36.450516 -49.960015 40.259408 -23.265827 58.572999 -25.022957',
 '57.286156 -17.606495 51.747089 -53.232965 35.230387 -41.982387 38.975908 -15.751327 57.286156 -17.606495',
 '57.881684 -20.150211 52.267172 -56.26474 35.797013 -44.777044 39.569795 -18.339548 57.881684 -20.150211',
 '58.169255 -15.222073 52.517591 -51.582381 36.07003 -39.977463 39.856055 -13.435504 58.169255 -15.222073',
 '59.219052 -10.246831 53.423016 -47.536701 37.065828 -35.486109 40.905216 -8.547749 59.219052 -10.246831',
 '57.965929 -26.621551 52.336067 -62.83823 35.875598 -51.315025 39.655421 -24.825186 57.965929 -26.621551',
 '57.180954 -19.17057 51.652788 -54.744067 35.130494 -43.535002 38.872661 -17.316979 57.180954 -19.17057',
 '58.741998 -14.164259 53.006827 -51.072345 36.61341 -39.22747 40.432709 -12.43555 58.741998 -14.164259',
 '57.83519 -23.026087 52.223924 -59.167607 35.755173 -47.699859 39.527535 -21.230831 57.83519 -23.026087',
 '57.616594 -15.552275 52.0344

In [63]:
# Make dictionary of scenes with data and geolocation filenamess

scenes = {}

for granule, geogranule in zip(data_granules, geo_granules):
    filename = dict(granule)['producer_granule_id']
    scenes[filename[:22]] = {'data':filename, 'geolocation':dict(geogranule)['producer_granule_id']}
    # maybe should put a check here to ensure the geolocation file is 
    # for the same date as the data file in case any missing data

In [64]:
# Specify download folder

folder = "AquaHkmNorthAt2022"
filepath = Path("/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/"+folder)
filepath.mkdir(exist_ok=True, parents=True)

In [18]:
# File path for the MOD44W water mask data
#filepath = "/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/MODIS_WaterMask"

In [65]:
# Write scene_ids and filenames to yaml file

meta_filepath = filepath / "meta"
meta_filepath.mkdir(exist_ok=True, parents=True)

with open(meta_filepath / 'scene_ids.yaml', 'w') as f:
    yaml.dump(scenes, f)


In [8]:
# Download the pics if needed

data_filepath = filepath / "pics"
data_filepath.mkdir(exist_ok=True, parents=True)

GranuleHandler.download_from_granules(data_granules, session, path=str(data_filepath), threads=10, ext='jpg')

Downloading:   0%|          | 0/71 [00:00<?, ?file/s]

[PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022352.1640.061.2022355090005.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022343.1650.061.2022344180910.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022331.1630.061.2022332170548.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022322.1640.061.2022324011554.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022319.1610.061.2022321102137.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022316.1540.061.2022321092936.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022310.1620.061.2022312103754.jpg'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/pics/MYBRGB.A2022307.1550.061.2022309012310.jpg'),
 PosixPath('/hom

In [9]:
# Download some test granules

test_filepath = filepath / "test_downloads"
test_filepath.mkdir(exist_ok=True, parents=True)

GranuleHandler.download_from_granules(data_granules[0:2], session, path=str(test_filepath), threads=10)

Downloading:   0%|          | 0/2 [00:00<?, ?file/s]

[PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/test_downloads/MYD02HKM.A2022352.1640.061.2022354222030.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/Aqua1kmLabSea2022/test_downloads/MYD02HKM.A2022343.1650.061.2022344163236.hdf')]

In [66]:
# Download the data granules

data_filepath = filepath / "l1b_data"
data_filepath.mkdir(exist_ok=True, parents=True)

GranuleHandler.download_from_granules(data_granules, session, path=str(data_filepath), threads=10)

Downloading:   0%|          | 0/56 [00:00<?, ?file/s]

[PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022360.1545.061.2022362084206.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022357.1515.061.2022358183542.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022348.1525.061.2022354193813.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022336.1505.061.2022337184058.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022324.1445.061.2022325173431.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022314.1550.061.2022321025014.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM.A2022311.1520.061.2022312182209.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/l1b_data/MYD02HKM

In [67]:
# Download the geolocation granules

geo_filepath = filepath / "geoloc"
geo_filepath.mkdir(exist_ok=True, parents=True)

GranuleHandler.download_from_granules(geo_granules, session, path=str(geo_filepath), threads=10)

Downloading:   0%|          | 0/56 [00:00<?, ?file/s]

[PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022360.1545.061.2022362072424.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022357.1515.061.2022358173415.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022348.1525.061.2022349152454.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022336.1505.061.2022337170930.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022324.1445.061.2022325165746.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022314.1550.061.2022321015937.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022311.1520.061.2022312161044.hdf'),
 PosixPath('/home/eefjg/OneDrive/Leeds/PhD/Data/MODIS/AquaHkmNorthAt2022/geoloc/MYD03.A2022299.1500.061.2022300150928.hdf'),
