In [2]:
from modis_tools.auth import ModisSession
from modis_tools.resources import CollectionApi, GranuleApi
from modis_tools.granule_handler import GranuleHandler

from modis_tools.auth import add_earthdata_netrc, remove_earthdata_netrc

from datetime import datetime, timedelta
from pathlib import Path
import yaml

In [None]:
# Create an entry for Earthdata in the ~/.netrc file, only needs to be run once

#username = ""
#password = ""
#add_earthdata_netrc(username, password)

In [3]:
# Create a session
session = ModisSession()

In [4]:
# Query the MODIS catalog for collections
collection_client = CollectionApi(session=session)

# Define the collections to query

#collections = collection_client.query(short_name=["MYD021KM", "MYD03"], version="6.1")      #Aqua
#collections = collection_client.query(short_name=["MOD021KM", "MOD03"], version="6.1")      #Terra

# Or maybe do them all at once?
collections = collection_client.query(short_name=["MYD021KM", "MYD03", "MOD021KM", "MOD03"], version="6.1")

collections

[Collection(id='C1378227407-LAADS', title='MODIS/Terra Calibrated Radiances 5-Min L1B Swath 1km', dataset_id='MODIS/Terra Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='CARTESIAN', time_start='2000-02-24T00:00:00.000Z', updated=None, links=[CollectionLink(hreflang='en-US', href=AnyUrl('https://mcst.gsfc.nasa.gov/content/l1b-documents', scheme='https', host='mcst.gsfc.nasa.gov', tld='gov', host_type='domain', path='/content/l1b-documents'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://doi.org/10.5067/MODIS/MOD021KM.061', scheme='https', host='doi.org', tld='org', host_type='domain', path='/10.5067/MODIS/MOD021KM.061'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://ladsweb.modaps.eosdis.nasa.gov/search/order/2/MOD021KM--61', scheme='https', host='ladsweb.modaps.eosdis.nasa.gov', tld='gov', host_type='domain', path='/search/order/2/MOD021KM--61'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://ladsweb.modaps.eosdis

In [9]:
# Criteria to filter the selected granules via spatial and temporal parameters
# See https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html for more options

# Date range:
start_date = "2022-01-01"
end_date = "2022-01-31"

# Spatial extent

# NB point co-ordinate is in lon, lat (opposite to earthdata web search!)
# Default option for point search is AND, i.e. all points will be within each granule

# bounding boxes and polygons for study area:
aoi_corners = [[-65,45],[-10,45],[-10,60],[10,60],[10,65],[65,65],[65,82],[-30,82],[-30,70],[-65,70],[-65,45]]
aoi_poly = ",".join([",".join([str(x) for x in y]) for y in aoi_corners])

bb1_corners = [[-65,45],[-30,45],[-30,70],[-65,70],[-65,45]] # Labrador Sea West
bb2_corners = [[-30,45],[-10,45],[-10,60],[-30,60],[-30,45]] # Labrador Sea East
bb3_corners = [[-30,60],[15,60],[15,82],[-30,82],[-30,60]] # GIN Seas
bb4_corners = [[15,65],[65,65],[65,82],[15,82],[15,65]] # Barents Sea

bb1 = ",".join([str(x) for x in [bb1_corners[0][0], bb1_corners[0][1], bb1_corners[2][0], bb1_corners[2][1]]])
bb2 = ",".join([str(x) for x in [bb2_corners[0][0], bb2_corners[0][1], bb2_corners[2][0], bb2_corners[2][1]]])
bb3 = ",".join([str(x) for x in [bb3_corners[0][0], bb3_corners[0][1], bb3_corners[2][0], bb3_corners[2][1]]])
bb4 = ",".join([str(x) for x in [bb4_corners[0][0], bb4_corners[0][1], bb4_corners[2][0], bb4_corners[2][1]]])


In [10]:
# Query args with multiple bounding boxes
# Switch on day flag for daylight only
query_args = {'start_date':start_date, 'end_date':end_date, 
                   #'day_night_flag':'day', 
                   'bounding_box[]':[bb1,bb2,bb3,bb4], 
                   'options[bounding_box][or]':"true"}


In [11]:
# Query the selected collection for granules (only one collection at a time)
granule_client_data = GranuleApi.from_collection(collections[0], session=session)

# Get list of granules fitting given criteria:
data_granules = granule_client_data.query(**query_args) 

# Check how many granules before downloading!
data_granules = list(data_granules) # make it a list as generator can only be used once
len(data_granules)

1229

In [12]:
data_granules[:10]

[Granule(id='G2209984661-LAADS', title='LAADS:6841975322', dataset_id='MODIS/Terra Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='GEODETIC', time_start='2022-01-31T00:00:00.000Z', updated=datetime.datetime(2024, 8, 8, 9, 27, 4, 410000, tzinfo=datetime.timezone.utc), links=[GranuleLink(hreflang='en-US', href=AnyUrl('https://data.laadsdaac.earthdatacloud.nasa.gov/prod-lads/MOD021KM/MOD021KM.A2022031.0000.061.2022031134316.hdf', scheme='https', host='data.laadsdaac.earthdatacloud.nasa.gov', tld='gov', host_type='domain', path='/prod-lads/MOD021KM/MOD021KM.A2022031.0000.061.2022031134316.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('s3://prod-lads/MOD021KM/MOD021KM.A2022031.0000.061.2022031134316.hdf', scheme='s3', host='prod-lads', host_type='int_domain', path='/MOD021KM/MOD021KM.A2022031.0000.061.2022031134316.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('https://ladsweb.mod

In [13]:
# Get geolocation granules for the same time period
granule_client_geo = GranuleApi.from_collection(collections[1], session=session)
geo_granules = granule_client_geo.query(**query_args)
geo_granules = list(geo_granules)
len(geo_granules)

1229

In [14]:
# Check name of scene/granule is parsed correctly! e.g. 'MYD02HKM.A2022352.1640'
dict(data_granules[0])['producer_granule_id'][:22]

'MOD021KM.A2022031.0000'

In [15]:
# Make dictionary of scenes with data and geolocation filenames

scenes = {}

for granule, geogranule in zip(data_granules, geo_granules):
    filename = dict(granule)['producer_granule_id']
    scenes[filename[:22]] = {'data':filename, 'geolocation':dict(geogranule)['producer_granule_id']}
    # maybe should put a check here to ensure the geolocation file is 
    # for the same date as the data file in case any missing data

In [18]:
save_dir = "/gws/nopw/j04/sensecdt/users/flojo/data/"

# Specify download folder
folder = "test_data"
filepath = Path(save_dir+folder)
filepath.mkdir(exist_ok=True, parents=True)

In [19]:
# Write scene_ids and filenames to yaml file

meta_filepath = filepath / "meta"
meta_filepath.mkdir(exist_ok=True, parents=True)

with open(meta_filepath / 'scene_ids.yaml', 'w') as f:
    yaml.dump(scenes, f)