In [69]:
from elasticsearch import Elasticsearch
import yaml
import json
from collections import defaultdict

In [70]:
def get_config():
    with open('fastapi.yml', encoding='utf-8') as reader:
        conf = yaml.safe_load(reader)
    
    es_conf = conf.get("ELASTICSEARCH")

    return es_conf

In [71]:
def print_geoextent(extent):
    cords = extent['coordinates']
    print(f'   {cords[0][1]:.2f}\t')
    print(f'{cords[0][0]:.2f}\t{cords[1][0]:.2f}')
    print(f'   {cords[1][1]:.2f}\t')


In [72]:
def print_record(record):
    print(f'Record_type: {record["model"]}')
    print(f'Title: {record["title"]}')
    print(f'Keywords: {record["keywords"]}')
    print(f'Abstract: {record["abstract"][:100]}...')
    if record['geographicExtent']:
        print_geoextent(record['geographicExtent'])
    print()

In [73]:
def generate_dict(dep=1):
    if dep > 1:
        return defaultdict(lambda: generate_dict(dep - 1))
    if dep == 1:
        return defaultdict(dict)
    return dict()
    

In [74]:
def construct_query(bbox = '', bbox_relation = 'intersect', fields = [], source = True, size = '', match_phrase='', **terms):
    # fancy thing found on stackoverflow
    my_query = generate_dict(7)

    my_query['query']['bool']['must'] = [{'match_all': dict()}]

    
    for field, values in terms.items():
        term_constraint = generate_dict(1)
        term_constraint['terms'][field] = [i.lower() for i in values]
        my_query['query']['bool']['must'].append(term_constraint)

    if bbox:
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['shape']['type'] = 'envelope'
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['shape']['coordinates'] = bbox
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['relation'] = bbox_relation

    if fields:
        my_query['fields'] = fields

    if not source:
        my_query['_source'] = source
    
    if size:
        my_query['size'] = size

    if match_phrase:
        my_query['query']['bool']['must'] += [{'query_string': {'query': match_phrase}}]

    return f"{json.dumps(my_query, indent = 2)}"

In [81]:
from datetime import datetime

conf = get_config()
es = Elasticsearch(**conf['SESSION_KWARGS'])

def search(bbox = '', bbox_relation = 'intersect', fields = [], source = True, size = '', query_string='',  **terms):

    my_query = construct_query(bbox=bbox, bbox_relation=bbox_relation, fields=fields, source=source,  size=size, match_phrase=query_string, **terms)

    # save query
    with open('my_query.json', 'a') as f:
        f.write(f"\n{datetime.now()}\n")
        f.write(my_query)
    
    response = es.search(index="stac-moles-test", body=my_query)
    return response['hits']['hits']



In [125]:
def get_related_objects_observation(my_record):
    related_uuids = []

    # instruments and platforms through acquistion route
    acquisition_uuid = my_record['procedureAcquisition']
    if acquisition_uuid is not None:
        acqusition = search(uuid=[acquisition_uuid])[0]
        for ipp in acqusition['_source']['instrumentPlatformPair']:
            related_uuids += [ipp['instrument'], ipp['platform']]
    
    # computations, instruments and platforms through composite process route
    composite_process_uuid = my_record['procedureCompositeProcess']
    if composite_process_uuid is not None:
        composite_process = search(uuid=[composite_process])[0]
        computations_uuid = composite_process['computationComponent']
        related_uuids += computations_uuid

        acquisitions_uuid = composite_process['_source']['acquisitionComponent']

        if acquisitions_uuid:
            acquisitions = search(uuid = acquisition_uuid)
            for acc in acquisitions:
                for ipp in acqusition['_source']['instrumentPlatformPair']:
                    related_uuids += [ipp['instrument'], ipp['platform']]
    
    # projects 
    projects_uuid = my_record['projects']
    related_uuids += projects_uuid

    collections = search(model=['observationcollection'], member = [my_record['uuid']])
    collections_uuid = [i['_source']['uuid'] for i in collections]
    related_uuids += collections_uuid
    


    return related_uuids   



def get_related_objects_project(my_record):
    uuid = my_record['uuid']
    related_uuids = search(query_string=uuid, fields=['uuid'], size=10000, source=False)
    related_uuids = [i['fields']['uuid'][0] for i in related_uuids]
    related_uuids.remove(uuid)
    
    return related_uuids

def get_related_objects_collection(my_record):
    uuid = my_record['uuid']
    related_uuids = search(query_string=uuid, model = ['project'], fields=['uuid'], size=10000, source=False)
    related_uuids = [i['fields']['uuid'][0] for i in related_uuids]

    related_uuids += my_record['member']
    
    return related_uuids    

def get_related_objects_instrument(my_record):
    uuid = my_record['uuid']

    related_uuids = []

    acquisitions = search(query_string=uuid, model = ['acquisition'], fields=['uuid', 'responsiblePartyInfo'], size=10000)

    acquisitions_uuid = [i['fields']['uuid'][0] for i in acquisitions]
    query_string = ' OR '.join([f"({i})" for i in acquisitions_uuid])
    rel_to_acq = search(query_string=query_string, fields=['uuid', 'model'], size=10000, source=False)
    related_uuids += [i['fields']['uuid'][0] for i in rel_to_acq if i['fields']['model'] == ['observation']]

    comps_uuid = [ i['fields']['uuid'][0] for i in rel_to_acq if i['fields']['model'] == ['compositeprocess']]
    query_string = ' OR '.join([f"({i})" for i in comps_uuid])
    rel_to_comps = search(query_string=query_string, fields=['uuid', 'model'], size=10000, source=False)
    related_uuids += [i['fields']['uuid'][0] for i in rel_to_comps if i['fields']['model'] == ['observation']]


    inst_plat_pairs = []
    for i in acquisitions:
        ipp = i['_source']['instrumentPlatformPair']
        if isinstance(ipp, list):
            inst_plat_pairs += ipp
        else:
            inst_plat_pairs += [ipp]
        
    for ipp in inst_plat_pairs:
        if ipp['instrument'] == uuid:
            related_uuids += [ipp['platform']]
    
    related_uuids += my_record['subInstrument']
   

    return related_uuids

 def 


In [120]:
def get_related_objects(uuid):
    functions_map = {
        'observation': get_related_objects_observation,
        'project': get_related_objects_project,
        'observationcollection': get_related_objects_collection,
        'instrument': get_related_objects_instrument,
             
    }


    related_uuids = []
    hits = search(uuid=[uuid])

    for record in hits:
        model = record['_source']['model']
        related_uuids += functions_map[model](record['_source'])
    
    return list(set(related_uuids))

    

In [123]:
rel = get_related_objects('c7fa005e2095425392b18adbd7b40617')

{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '922c7e6cc7d04fa78ca9b30cd4d646c8'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '1a354cb774d14dec9fe9a5cdc17b4507'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '99b435052c5a4547bfc03f31249d5bf3'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '76765687413a4a698a6c4b056de2e66c'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '9f680dcc8eb943ccbecbbe62f4b95752'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': '23ca6bfcca9342a18cc15e7b2f3e7e60'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': 'edbc618730c043a383b8fa9b8200cfb6'}
{'instrument': 'c7fa005e2095425392b18adbd7b40617', 'platform': 'c5a778cd1ebc4d928890ec1ffca83521'}
{'instrument': '3a58d003ac2f4236b17dd66c147b6f51', 'platform': 'edbc618730c043a383b8fa9b8200cfb6'}
{'instrument': '3539b96f18644d78aae71517fecfb7e2', 'platform': 'edbc618730c043a383b8fa9b8200cfb6'}
{'instrume

  from ipykernel import kernelapp as app


In [124]:
hits = search(uuid = rel, fields=['model', 'uuid'], source=False, size = 100)
[print(i['fields']) for i in hits]
print('Number of records:', len(hits))
grouped_data = defaultdict(int)
for i in hits:
    model = i['fields']['model'][0]
    grouped_data[model] += 1
    
print(grouped_data)

{'uuid': ['edbc618730c043a383b8fa9b8200cfb6'], 'model': ['platform']}
{'uuid': ['23ca6bfcca9342a18cc15e7b2f3e7e60'], 'model': ['platform']}
{'uuid': ['922c7e6cc7d04fa78ca9b30cd4d646c8'], 'model': ['platform']}
{'uuid': ['1a354cb774d14dec9fe9a5cdc17b4507'], 'model': ['platform']}
{'uuid': ['99b435052c5a4547bfc03f31249d5bf3'], 'model': ['platform']}
{'uuid': ['76765687413a4a698a6c4b056de2e66c'], 'model': ['platform']}
{'uuid': ['9f680dcc8eb943ccbecbbe62f4b95752'], 'model': ['platform']}
{'uuid': ['c5a778cd1ebc4d928890ec1ffca83521'], 'model': ['platform']}
{'uuid': ['7e23b82ec3bdc8e5297c0b623697c559'], 'model': ['observation']}
{'uuid': ['7a222739bad13cc3e60b3d615335f2f5'], 'model': ['observation']}
{'uuid': ['da4da4fad5a74db486db3e4a13adbe9d'], 'model': ['observation']}
Number of records: 11
defaultdict(<class 'int'>, {'platform': 8, 'observation': 3})


  from ipykernel import kernelapp as app


In [34]:
hits = search(model=['observation'], size=1000, fields=['procedureCompositeProcess', 'uuid'], source=False)


  if sys.path[0] == '':


In [35]:
hits_filtered = [i for i in hits if 'procedureCompositeProcess' in i['fields']]
hits_filtered

[{'_index': 'stac-moles-test',
  '_type': '_doc',
  '_id': '4lbPUYgBwXMFoKp9sO-P',
  '_score': 2.0,
  '_ignored': ['abstract.keyword',
   'responsiblePartyInfo.party.description.keyword'],
  'fields': {'uuid': ['aaa8bc3bbc355e950b601f06e4f8cd47'],
   'procedureCompositeProcess': ['74e6cc08d687478ba039a5798b31998a']}},
 {'_index': 'stac-moles-test',
  '_type': '_doc',
  '_id': '6FbPUYgBwXMFoKp9se_l',
  '_score': 2.0,
  '_ignored': ['abstract.keyword',
   'responsiblePartyInfo.party.description.keyword'],
  'fields': {'uuid': ['768d0d9528c87b0cb406243564ea8dd6'],
   'procedureCompositeProcess': ['ee8de65a7a3a49d19d58efe9a5958c4f']}},
 {'_index': 'stac-moles-test',
  '_type': '_doc',
  '_id': '6VbPUYgBwXMFoKp9su8I',
  '_score': 2.0,
  '_ignored': ['abstract.keyword',
   'responsiblePartyInfo.party.description.keyword'],
  'fields': {'uuid': ['00e2ba205fd43a390c168841c3ca8aa3'],
   'procedureCompositeProcess': ['97a21cca08234186b64e3466643bcd96']}},
 {'_index': 'stac-moles-test',
  '_type'

In [26]:
rec['_source']

{'title': 'TOVS data onboard POES (1978-1998)',
 'abstract': 'These data consist of sets of 3-dimensional gridpoint analyses of the stratosphere which are produced by the Met Office using data from the TIROS Operational Vertical Sounder (TOVS) instruments onboard the NOAA (National Ocean and Atmospheric Administration) operational polar orbiters. TOVS consists of 3 instruments, the Stratospheric Sounding Unit (SSU) the Microwave Sounding Unit (MSU) and the High Resolution Infrared Sounder (HIRS). Daily radiance and geopotential height data are available on a 5 degree latitude / longitude global grid from December 1978 to April 1997. Software is provided to derive potential vorticity. Access permission required so that PI can monitor usage of data.',
 'submissionUserID': None,
 'creationDate': '2022-07-22T09:15:57.183Z',
 'lastUpdatedDate': '2022-07-22T09:15:57.272Z',
 'latestDataUpdateTime': '2014-09-28T09:45:14Z',
 'updateFrequency': 'notPlanned',
 'dataLineage': 'VMS files originally

In [None]:
response = search(model=['observation'], bbox = [[-8, 59], [2, 49]], bbox_relation='within', size=20)
response