In [1]:
!pip3 install -q --upgrade pip
!pip3 install -q pandas matplotlib seaborn openpyxl climateserv requests earthaccess

In [2]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import climateserv.api
import earthaccess
earthaccess.__version__

  from .autonotebook import tqdm as notebook_tqdm


'0.8.2'

In [6]:
auth = earthaccess.login(strategy="environment", persist=True)

In [7]:
# The first step is to create a DataCollections query 

Query = earthaccess.collection_query()

# Use chain methods to customize our query
Query.keyword('elevation').bounding_box(-134.7,58.9,-133.9,59.2).temporal("2020-01-01","2020-02-01")

print(f'Collections found: {Query.hits()}')

# filtering what UMM fields to print, to see the full record we omit the fields filters
# meta is always included as 
collections = Query.fields(['ShortName','Version']).get(5)
# Inspect some results printing just the ShortName and Abstract
collections[0:3]

Collections found: 367


[{
   "meta": {
     "concept-id": "C1431413941-NSIDC_ECS",
     "granule-count": 61,
     "provider-id": "NSIDC_ECS"
   },
   "umm": {
     "ShortName": "RDEFT4",
     "Version": "1"
   }
 },
 {
   "meta": {
     "concept-id": "C1299783579-LPDAAC_ECS",
     "granule-count": 0,
     "provider-id": "LPDAAC_ECS"
   },
   "umm": {
     "ShortName": "AST14DEM",
     "Version": "003"
   }
 },
 {
   "meta": {
     "concept-id": "C1299783651-LPDAAC_ECS",
     "granule-count": 0,
     "provider-id": "LPDAAC_ECS"
   },
   "umm": {
     "ShortName": "AST14DMO",
     "Version": "003"
   }
 }]

In [8]:
# We can now search for collections using a pythonic API client for CMR.
Query = earthaccess.collection_query().daac("ASF")

print(f'Collections found: {Query.hits()}')
collections = Query.fields(['ShortName']).get(10)
# Printing the first collection
collections[0]

Collections found: 130


{
  "meta": {
    "concept-id": "C1214470488-ASF",
    "granule-count": 1603112,
    "provider-id": "ASF"
  },
  "umm": {
    "ShortName": "SENTINEL-1A_SLC"
  }
}

In [9]:
# What if we want cloud collections
Query = earthaccess.collection_query().daac("ASF").cloud_hosted(True)

print(f'Collections found: {Query.hits()}')
collections = Query.fields(['ShortName']).get(10)
# Printing 3 collections
collections[0]

Collections found: 50


{
  "meta": {
    "concept-id": "C1214470488-ASF",
    "granule-count": 1603112,
    "provider-id": "ASF"
  },
  "umm": {
    "ShortName": "SENTINEL-1A_SLC"
  }
}

In [10]:
# Printing the concept-id for the first 10 collections
[collection.concept_id() for collection in collections]

['C1214470488-ASF',
 'C1327985661-ASF',
 'C1214470533-ASF',
 'C1595422627-ASF',
 'C1327985645-ASF',
 'C1214470576-ASF',
 'C1214470496-ASF',
 'C1214470532-ASF',
 'C1214472977-ASF',
 'C1327985617-ASF']

In [13]:
from pprint import pprint

# We build our query

Query = earthaccess.granule_query().short_name('HLSL30').bounding_box(-134.7,58.9,-133.9,59.2)
# We get 5 metadata records
granules = Query.get(1)

In [14]:
[display(g) for g in granules]

[None]

In [15]:
Query = earthaccess.granule_query().short_name("ATL06").temporal("2020-03-01", "2020-03-30").bounding_box(-134.7,58.9,-133.9,59.2).version("005")
# Always inspects the hits before retrieven the granule metadata, just because it's very verbose.
print(f"Granules found: {Query.hits()}")

Granules found: 8


In [16]:
# Now we can print some info about these granules using the built-in methods
granules = Query.get(5)
data_links = [{'links': g.data_links(access="on_prem"), 'size (MB):': g.size()} for g in granules]
data_links

[{'links': ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/005/2020/03/06/ATL06_20200306122320_10810606_005_01.h5'],
  'size (MB):': 2.7875404357910156},
 {'links': ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/005/2020/03/08/ATL06_20200308234154_11190602_005_01.h5'],
  'size (MB):': 4.364532470703125},
 {'links': ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/005/2020/03/10/ATL06_20200310121504_11420606_005_01.h5'],
  'size (MB):': 2.6717844009399414},
 {'links': ['https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL06/005/2020/03/12/ATL06_20200312233336_11800602_005_01.h5'],
  'size (MB):': 14.138877868652344},
 {'links': ['https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL06.005/2020.03.06/ATL06_20200306122320_10810606_005_01.h5'],
  'size (MB):': 2.7875404358}]

In [17]:
granules = []

# we just grab 1 granule from May for each year of the dataset
for year in range(1999, 2019):
    results = earthaccess.search_data(
        doi = "10.5067/SLREF-CDRV3",
        temporal=(f"{year}-05", f"{year}-06")
    )
    if len(results)>0:
        granules.append(results[0])

Granules found: 7
Granules found: 7
Granules found: 7
Granules found: 7
Granules found: 7
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 6
Granules found: 7
Granules found: 7
Granules found: 7


In [18]:
import earthaccess

earthaccess.login()

Query = earthaccess.granule_query().doi("10.5067/SLREF-CDRV3").bounding_box(-134.7,54.9,-100.9,69.2)
print(f"Granule hits: {Query.hits()}")

# getting more than 6,000 metadata records for demo purposes is going to slow us down a bit so let's get only a few
granules = Query.get(10)
# Does this granule belong to a cloud-based collection?
granules[0].cloud_hosted

Granule hits: 2207


True

In [19]:
%%time
files = earthaccess.download(granules[0:2], "./data/C1972955240-PODAAC/")

 Getting 2 granules, approx download size: 0.02 GB


QUEUEING TASKS | : 100%|██████████| 2/2 [00:00<00:00, 346.68it/s]
PROCESSING TASKS | : 100%|██████████| 2/2 [00:06<00:00,  3.48s/it]


Error while downloading the file ssh_grids_v2205_1992101512.nc
Traceback (most recent call last):
  File "/Users/franciscofurey/00DataScience/OpenAi/venv/lib/python3.9/site-packages/earthaccess/store.py", line 595, in _download_file
    r.raise_for_status()
  File "/Users/franciscofurey/00DataScience/OpenAi/venv/lib/python3.9/site-packages/requests/models.py", line 1021, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 502 Server Error: Bad Gateway for url: https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/ssh_grids_v2205_1992101512.nc



COLLECTING RESULTS | : 100%|██████████| 2/2 [00:00<00:00, 10591.68it/s]

CPU times: user 114 ms, sys: 75.9 ms, total: 190 ms
Wall time: 7.14 s





In [20]:
import earthaccess


print(f"using earthaccess v{earthaccess.__version__}")

auth = earthaccess.login()
# are we authenticated?
if not auth.authenticated:
    # ask for credentials and persist them in a .netrc file
    auth.login(strategy="interactive", persist=True)

using earthaccess v0.8.2


In [21]:
from pprint import pprint
# We'll get 4 collections that match with our keywords
collections = earthaccess.search_datasets(
    keyword = "SEA SURFACE HEIGHT",
    cloud_hosted = True,
    count = 4
)

# Let's print 2 collections
for collection in collections[0:2]:
    # pprint(collection.summary())
    print(pprint(collection.summary()), collection.abstract(), "\n", collection["umm"]["DOI"], "\n\n")

Datasets found: 250
{'cloud-info': {'Region': 'us-west-2',
                'S3BucketAndObjectPrefixNames': ['podaac-ops-cumulus-public/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/',
                                                 'podaac-ops-cumulus-protected/SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205/'],
                'S3CredentialsAPIDocumentationURL': 'https://archive.podaac.earthdata.nasa.gov/s3credentialsREADME',
                'S3CredentialsAPIEndpoint': 'https://archive.podaac.earthdata.nasa.gov/s3credentials'},
 'concept-id': 'C2270392799-POCLOUD',
 'file-type': "[{'Format': 'netCDF-4', 'FormatType': 'Native', "
              "'AverageFileSize': 9.7, 'AverageFileSizeUnit': 'MB'}]",
 'get-data': ['https://cmr.earthdata.nasa.gov/virtual-directory/collections/C2270392799-POCLOUD',
              'https://search.earthdata.nasa.gov/search/granules?p=C2270392799-POCLOUD'],
 'short-name': 'SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205'

In [23]:
import earthaccess

# Autenticación
auth = earthaccess.login(strategy="interactive", persist=True)

# Definir las coordenadas para Gambia (ejemplo aproximado)

GAMBIA_BOUNDARIES = {
    "min_lat": 13.052113101601,  # Lower latitude
    "max_lat": 13.837920583180,  # Upper latitude
    "min_lon": -17.06499856438,  # More western longitude
    "max_lon": -13.77921401885   # More eastern longitude
}

bounding_box_gambia = (-16.8252, 13.1303, -13.8441, 13.8266)  # (min_lon, min_lat, max_lon, max_lat)

# Buscar granules de un dataset específico que cubra Gambia
results = earthaccess.search_data(
    short_name='MODIS_Terra_L3_SST_Thermal',  # Ejemplo de nombre corto de un dataset
    bounding_box=bounding_box_gambia,
    temporal=("2023-01-01", "2023-12-31"),  # Ejemplo de rango temporal
    count=10
)

# Explorar los resultados
for granule in results:
    print(granule)


Granules found: 0


In [33]:
import earthaccess

# Autenticación
auth = earthaccess.login(strategy="interactive", persist=True)

# Crear una consulta para colecciones de datos
query = earthaccess.collection_query()

# Ajustar la consulta para filtrar por palabras clave, por ejemplo, "clima"
query.keyword('groundwater')

# Obtener los resultados de la consulta (primeros 5 datasets)
collections = query.get(5)

# Imprimir la información básica de cada dataset
for collection in collections:
    print(f"Nombre: {collection['umm']['ShortName']}")
    print(f"Descripción: {collection['umm']['Abstract']}")
    print(f"ID del Concepto: {collection['meta']['concept-id']}")
    print("-----")


Nombre: GRACEDADM_CLSM025GL_7D
Descripción: Scientists at NASA Goddard Space Flight Center generate groundwater and soil moisture drought indicators each week. They are based on terrestrial water storage observations derived from GRACE-FO satellite data and integrated with other observations, using a sophisticated numerical model of land surface water and energy processes.

This data product is GRACE Data Assimilation for Drought Monitoring (GRACE-DA-DM) Global Version 3.0 from a global GRACE and GRACE-FO data assimilation and drought indicator product generation (Li et al., 2019). It varies from the other GRACE-DA-DM products which are from the U.S. GRACE-based drought indicator product generation (Houborg et al., 2012).
The GRACE-DA-DM Global V3.0 is similar to the GRACE-DA-DM U.S. V4.0 product. Both products are based on the Catchment Land Surface Model (CLSM) Fortuna 2.5 version simulation that was created within the Land Information System data assimilation framework (Kumar et al.

In [31]:
import earthaccess

# Autenticación
auth = earthaccess.login(strategy="interactive", persist=True)

# Coordenadas para Gambia
GAMBIA_BOUNDARIES = {
    "min_lat": 13.052113101601,
    "max_lat": 13.837920583180,
    "min_lon": -17.06499856438,
    "max_lon": -13.77921401885
}

# Rango de fechas
fecha_inicio = "2015-01-01"
fecha_fin = "2022-12-31"

# Usar el concept_id del dataset elegido, por ejemplo, GPM_3DPRD
concept_id = 'C2179081590-GES_DISC'  # Reemplazar con el concept_id del dataset seleccionado

# Buscar granules en el dataset de precipitación
results = earthaccess.search_data(
    concept_id=concept_id,
    bounding_box=(GAMBIA_BOUNDARIES["min_lon"], GAMBIA_BOUNDARIES["min_lat"], GAMBIA_BOUNDARIES["max_lon"], GAMBIA_BOUNDARIES["max_lat"]),
    temporal=(fecha_inicio, fecha_fin),
    count=10  # Ajusta este número según sea necesario
)

# Imprimir resultados para inspección
for granule in results:
    print(granule)



Granules found: 2922
Collection: {'ShortName': 'GPM_3DPRD', 'Version': '07'}
Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'BoundingRectangles': [{'WestBoundingCoordinate': -180.0, 'EastBoundingCoordinate': 180.0, 'NorthBoundingCoordinate': 67.0, 'SouthBoundingCoordinate': -67.0}]}}}
Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2015-01-01T00:00:00.000Z', 'EndingDateTime': '2015-01-01T23:59:59.999Z'}}
Size(MB): 6.759087562561035
Data: ['https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3DPRD.07/2015/01/3A-DAY.GPM.DPR.V3-20210628.20150101-S000000-E235959.001.V07A.HDF5']
Collection: {'ShortName': 'GPM_3DPRD', 'Version': '07'}
Spatial coverage: {'HorizontalSpatialDomain': {'Geometry': {'BoundingRectangles': [{'WestBoundingCoordinate': -180.0, 'EastBoundingCoordinate': 180.0, 'NorthBoundingCoordinate': 67.0, 'SouthBoundingCoordinate': -67.0}]}}}
Temporal coverage: {'RangeDateTime': {'BeginningDateTime': '2015-01-02T00:00:00.000Z', 'EndingDateTime': '2015-0