In [1]:
from modis_tools.auth import ModisSession
from modis_tools.resources import CollectionApi, GranuleApi
from modis_tools.granule_handler import GranuleHandler

from modis_tools.auth import add_earthdata_netrc, remove_earthdata_netrc

from datetime import datetime, timedelta
from pathlib import Path
import yaml
import datetime as dt

In [None]:
# Create an entry for Earthdata in the ~/.netrc file, only needs to be run once

#username = ""
#password = ""
#add_earthdata_netrc(username, password)

In [2]:
# Create a session
session = ModisSession()

In [3]:
# Query the MODIS catalog for collections
collection_client = CollectionApi(session=session)

# Define the collections to query

collections = collection_client.query(short_name=["MYD021KM", "MYD03"], version="6.1")      #Aqua
#collections = collection_client.query(short_name=["MOD021KM", "MOD03"], version="6.1")      #Terra

# Or maybe do them all at once?
#collections = collection_client.query(short_name=["MYD021KM", "MYD03", "MOD021KM", "MOD03"], version="6.1")

collections

[Collection(id='C1379758607-LAADS', title='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', dataset_id='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='CARTESIAN', time_start='2002-07-04T00:00:00.000Z', updated=None, links=[CollectionLink(hreflang='en-US', href=AnyUrl('https://mcst.gsfc.nasa.gov/sites/default/files/file_attachments/M1054D_PUG_083112_final.pdf', scheme='https', host='mcst.gsfc.nasa.gov', tld='gov', host_type='domain', path='/sites/default/files/file_attachments/M1054D_PUG_083112_final.pdf'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://doi.org/10.5067/MODIS/MYD021KM.061', scheme='https', host='doi.org', tld='org', host_type='domain', path='/10.5067/MODIS/MYD021KM.061'), type=None), CollectionLink(hreflang='en-US', href=AnyUrl('https://ladsweb.modaps.eosdis.nasa.gov/search/order/2/MYD021KM--61', scheme='https', host='ladsweb.modaps.eosdis.nasa.gov', tld='gov', host_type='domain', path='/search/order/2/MYD021KM--61'), typ

In [29]:
# Criteria to filter the selected granules via spatial and temporal parameters
# See https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html for more options

# Date range:
start_date = "2022-01-01"
end_date = "2023-01-01"


In [5]:
# Spatial extent

# NB point co-ordinate is in lon, lat (opposite to earthdata web search!)
# Default option for point search is AND, i.e. all points will be within each granule

# bounding boxes and polygons for study area:
aoi_corners = aoi_corners = [[-65,45],[-10,45],[-10,60],[15,60],[15,70],[65,70],[65,82],[-25,82], [-25,70],[-65,70],[-65,45]]
aoi_poly = ",".join([",".join([str(x) for x in y]) for y in aoi_corners])

bb1_corners = [[-65,45],[-25,45],[-25,70],[-65,70],[-65,45]] # Labrador Sea West
bb2_corners = [[-25,45],[-10,45],[-10,60],[-25,60],[-25,45]] # Labrador Sea East
bb3_corners = [[-25,60],[15,60],[15,82],[-25,82],[-25,60]] # GIN Seas
bb4_corners = [[15,70],[65,70],[65,82],[15,82],[15,70]] # Barents Sea

bb1 = ",".join([str(x) for x in [bb1_corners[0][0], bb1_corners[0][1], bb1_corners[2][0], bb1_corners[2][1]]])
bb2 = ",".join([str(x) for x in [bb2_corners[0][0], bb2_corners[0][1], bb2_corners[2][0], bb2_corners[2][1]]])
bb3 = ",".join([str(x) for x in [bb3_corners[0][0], bb3_corners[0][1], bb3_corners[2][0], bb3_corners[2][1]]])
bb4 = ",".join([str(x) for x in [bb4_corners[0][0], bb4_corners[0][1], bb4_corners[2][0], bb4_corners[2][1]]])

# Similar bounding boxes but with a buffer of 5-10 degrees to avoid granules with only tiny intersection:
buffered_bb1_corners = [[-55,50],[-30,50],[-30,65],[-55,65],[-55,50]] # Labrador Sea West
buffered_bb2_corners = [[-30,50],[-20,50],[-20,65],[-30,65],[-30,50]] # Labrador Sea East
buffered_bb3_corners = [[-15,65],[5,65],[5,80],[-15,80],[-15,65]] # GIN Seas
buffered_bb4_corners = [[5,75],[55,75],[55,80],[5,80],[5,75]] # Barents Sea

buffer_bb1 = ",".join([str(x) for x in [buffered_bb1_corners[0][0], buffered_bb1_corners[0][1], buffered_bb1_corners[2][0], buffered_bb1_corners[2][1]]])
buffer_bb2 = ",".join([str(x) for x in [buffered_bb2_corners[0][0], buffered_bb2_corners[0][1], buffered_bb2_corners[2][0], buffered_bb2_corners[2][1]]])
buffer_bb3 = ",".join([str(x) for x in [buffered_bb3_corners[0][0], buffered_bb3_corners[0][1], buffered_bb3_corners[2][0], buffered_bb3_corners[2][1]]])
buffer_bb4 = ",".join([str(x) for x in [buffered_bb4_corners[0][0], buffered_bb4_corners[0][1], buffered_bb4_corners[2][0], buffered_bb4_corners[2][1]]])


In [30]:
# Query args with multiple bounding boxes
# Switch on day flag for daylight only
query_args = {'start_date':start_date, 'end_date':end_date, 
                   #'day_night_flag':'day', 
                   #'bounding_box[]':[bb1,bb2,bb3,bb4], 
                   'bounding_box[]':[buffer_bb1,buffer_bb2,buffer_bb3,buffer_bb4],
                   'options[bounding_box][or]':"true"}


In [31]:
# Query the selected collection for granules (only one collection at a time)
granule_client_data = GranuleApi.from_collection(collections[0], session=session)

# Get list of granules fitting given criteria:
data_granules = granule_client_data.query(**query_args) 

# Check how many granules before downloading!
data_granules = list(data_granules) # make it a list as generator can only be used once
len(data_granules)

10281

In [23]:
data_granules

[Granule(id='G2245753964-LAADS', title='LAADS:6898192294', dataset_id='MODIS/Aqua Calibrated Radiances 5-Min L1B Swath 1km', coordinate_system='GEODETIC', time_start='2022-03-31T15:10:00.000Z', updated=datetime.datetime(2024, 8, 18, 12, 50, 10, 410000, tzinfo=datetime.timezone.utc), links=[GranuleLink(hreflang='en-US', href=AnyUrl('https://data.laadsdaac.earthdatacloud.nasa.gov/prod-lads/MYD021KM/MYD021KM.A2022090.1510.061.2022091150617.hdf', scheme='https', host='data.laadsdaac.earthdatacloud.nasa.gov', tld='gov', host_type='domain', path='/prod-lads/MYD021KM/MYD021KM.A2022090.1510.061.2022091150617.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('s3://prod-lads/MYD021KM/MYD021KM.A2022090.1510.061.2022091150617.hdf', scheme='s3', host='prod-lads', host_type='int_domain', path='/MYD021KM/MYD021KM.A2022090.1510.061.2022091150617.hdf'), type='application/x-hdfeos', inherited=None), GranuleLink(hreflang='en-US', href=AnyUrl('https://ladsweb.m

In [32]:
# Get geolocation granules for the same time period
granule_client_geo = GranuleApi.from_collection(collections[1], session=session)
geo_granules = granule_client_geo.query(**query_args)
geo_granules = list(geo_granules)
len(geo_granules) # Check same number of geolocation granules as data granules

10281

In [33]:
# Check name of scene/granule is parsed correctly! e.g. 'MYD02HKM.A2022352.1640'
dict(data_granules[0])['producer_granule_id'][:22]

'MYD021KM.A2022365.2250'

In [10]:
# Check time parsed correctly
time = dict(data_granules[0])['time_start']
time = dt.datetime.strptime(time, '%Y-%m-%dT%H:%M:%S.%fZ')
time

datetime.datetime(2022, 3, 31, 15, 10)

In [34]:
# MODIS files on CEDA - subdirectories like YYYY/MM/DD

aqua_data_dir = "/neodc/modis/data/MYD021KM/collection61/"
aqua_geoloc_dir = "/neodc/modis/data/MYD03/collection61/"

terra_data_dir = "/neodc/modis/data/MOD021KM/collection61/"
terra_geoloc_dir = "/neodc/modis/data/MOD03/collection61/"

# ERA5  surface level analysis parameter data - subdirectories like YYYY/MM/DD

era5_dir = "/badc/ecmwf-era5/data/oper/an_sfc/"

# Will need to parse granule names to get folders

In [35]:
# Make dictionary of scenes with data and geolocation filenames and locations on CEDA

# Also include path to relevant sea ice concentration file


scenes = {}

for granule, geogranule in zip(data_granules, geo_granules):
    granule_info = dict(granule)
    filename = granule_info['producer_granule_id']
    time = dt.datetime.strptime(granule_info['time_start'], '%Y-%m-%dT%H:%M:%S.%fZ')
    era_siconc_filename = f"ecmwf-era5_oper_an_sfc_{time:%Y%m%d%H}00.ci.nc"

    subdir = f"{time.year}/{time.month:02d}/{time.day:02d}/"
    if filename[1] == "Y": # Aqua
        data_dir = aqua_data_dir + subdir
        geoloc_dir = aqua_geoloc_dir + subdir
    elif filename[1] == "O": # Terra
        data_dir = terra_data_dir + subdir
        geoloc_dir = terra_geoloc_dir + subdir
    
    scenes[filename[:22]] = {                                                   # scene_id
        'data':data_dir + filename,                                             # data file  
        'geolocation':geoloc_dir + dict(geogranule)['producer_granule_id'],      # geolocation file
        'seaice':era5_dir + subdir + era_siconc_filename,                       # ERA5 sea ice concentration file
        'day_night_flag': granule_info['day_night_flag'],                       # day/night flag
        }

# Could put a check in here that data and geoloc dates match (any reason why they wouldn't?)

In [37]:
len(scenes)

10281

In [38]:
save_dir = "/gws/nopw/j04/sensecdt/users/flojo/data/"

# Specify save folder for current dataset
#folder = "test_data"
folder = "2022/aqua"
filepath = Path(save_dir+folder)
filepath.mkdir(exist_ok=True, parents=True)

In [39]:
# Write scene_ids and filenames to yaml file

meta_filepath = filepath / "meta"
meta_filepath.mkdir(exist_ok=True, parents=True)

with open(meta_filepath / 'scene_ids.yaml', 'w') as f:
    yaml.dump(scenes, f)

In [30]:
# optional...
# Get polygon corners for each granule for mapping

polygons = []
for granule in data_granules:
    polygons.append(dict(granule)['polygons'][0][0])
polygons

['62.701894 20.645219 56.888587 62.600547 69.265475 93.384348 80.776895 11.079099 62.701894 20.645219',
 '80.360472 13.960098 68.877821 93.702143 69.173032 146.386309 80.538654 -131.883933 80.360472 13.960098',
 '66.670594 45.105119 60.081395 92.158707 70.638846 129.299635 84.446975 23.933265 66.670594 45.105119',
 '82.953286 -30.803906 69.667017 -120.370827 58.5815 -85.811147 64.846505 -41.415666 82.953286 -30.803906',
 '65.32226 -42.39235 58.514522 -86.250366 42.804302 -71.002899 47.005119 -41.373433 65.32226 -42.39235',
 '82.360542 134.037909 69.691682 -137.153556 68.175887 -84.851073 78.534922 -11.049649 82.360542 134.037909',
 '79.104455 -12.534496 67.955178 -85.420141 55.260693 -56.756706 60.869197 -16.939512 79.104455 -12.534496',
 '61.318066 -17.746412 55.218148 -57.135056 39.057687 -44.117497 43.005353 -16.280522 61.318066 -17.746412',
 '85.958534 141.383497 70.700537 -100.775945 65.861032 -51.526599 74.631123 10.661454 85.958534 141.383497',
 '75.16902 9.384176 65.686757 -52.