In [None]:
import os
import requests
from pathlib import Path

from varinfo import VarInfoFromDmr
from cmr import CollectionQuery, GranuleQuery, ToolQuery, ServiceQuery, VariableQuery
import xarray as xr
import numpy as np
import pandas as pd

In [None]:
provider = 'GES_DISC'
api = CollectionQuery()
collections = api.provider(provider).get_all()

print('provider ' + provider + ' has ' + str(np.shape(collections)[0]) + ' to check')

provider GES_DISC has 1791 to check


In [None]:
# sad that because GES DISC didn't leverage opendap flags we have to use this
# silly way to identify those collections :( )

# should we add filter for just checking level 3/4 products? 
# for now will summarize by this info 

In [None]:
opendap_base_urls = []
short_names = []
versions = []
processing_levels = []
native_ids = []

for collection in collections:
    for element in collection['links']:
        if element['rel'] == 'http://esipfed.org/ns/fedsearch/1.1/service#':
            # this should return default url so can be on prem or cloud depending on granule
            opendap_base_urls.append(element['href'])
            processing_levels.append(collection['processing_level_id'])
            versions.append(collection['version_id'])
            short_names.append(collection['short_name'])
            native_ids.append(collection['id'])
            break

print('provider ' + provider + ' has ' + str(np.shape(short_names)[0]) + ' opendap collections to check')

provider GES_DISC has 1305 opendap collections to check


In [None]:
# get granule for each collection, try to open with xarray


In [None]:
api = GranuleQuery()
granule = api.short_name(short_names[0]).version(versions[0]).get(1)

#### Check if dimension variables were read as dimension variables with xarray

In [None]:
def get_dmr(opendap_url: str) -> str:
    ''' Given an OPeNDAP url use the requests library to save the
        `.dmr` file locally for `earthdata-varinfo` 
    '''
    out_path = os.path.join(os.getcwd(), 'dmr_data')
    if not os.path.exists(out_path):
        os.mkdir(out_path)
    out_filename = os.path.join(out_path, Path(opendap_url).stem + '.dmr')
    dmr_opendap_url = opendap_url + '.dmr.xml'
    response = requests.get(url=dmr_opendap_url)
    if response.ok:
        with open(out_filename, 'wb') as f:
            f.write(response.content)
    return out_filename


def is_spatial_temporal_dimension(out_filename: str,
                                  var_name: str) -> str:
    ''' Use `VarInfoFromDmr` to check if a variable
        is a spatial temporal variable
    '''
    varinfo_dmr = VarInfoFromDmr(out_filename)
    if (varinfo_dmr.get_variable('/' + var_name).is_temporal()
        or varinfo_dmr.get_variable('/' + var_name).is_geographic()
        or varinfo_dmr.get_variable('/' + var_name).is_projection_x_or_y()):
        return 'Dimension variable not mapped correctly'
    else:
        return 'n/a'

In [None]:
message_out = []
short_names_out = []
versions_out = []
opendap_urls_out = []
processing_levels_out = []

for i in range(0,np.shape(short_names)[0]):
    print(i)
    granule = api.short_name(short_names[i]).version(versions[i]).get(1) 
    for element in granule[0]['links']:
        if 'title' in element:
            if element['title'] == 'The OPENDAP location for the granule. (GET DATA : OPENDAP DATA)':
                # this should return default url so can be on prem or cloud depending on granule
                opendap_url = (element['href'])
                opendap_urls_out.append(opendap_url)
                try:
                    dataset = xr.open_dataset(opendap_url)
                    message_out.append('success')
                    short_names_out.append(short_names[i])
                    versions_out.append(versions[i])
                    processing_levels_out.append(processing_levels[i])
                    
                    # Check if data variables were determined to be dimensions
                    dmr_filename = get_dmr(opendap_url)
                    data_vars = list(dataset.data_vars)
                    for var_name in data_vars:
                        print(var_name, is_spatial_temporal_dimension(dmr_filename, var_name))
                except Exception as e:
                    message_out.append(e)
                    short_names_out.append(short_names[i])
                    versions_out.append(versions[i])
                    processing_levels_out.append(processing_levels[i])

        #else:
            #message_out.append('no opendap url found')
    df_out = pd.DataFrame({'short_name':short_names_out,'version':versions_out,'processing_level':processing_levels_out,'message':message_out,'opendap_url':opendap_urls_out})
    df_out.to_csv('results_' + provider +  '.csv')
