In [1]:
from elasticsearch import Elasticsearch
import yaml
import json
from collections import defaultdict

In [2]:
def get_config():
    with open('fastapi.yml', encoding='utf-8') as reader:
        conf = yaml.safe_load(reader)
    
    es_conf = conf.get("ELASTICSEARCH")

    return es_conf

In [3]:
def print_geoextent(extent):
    cords = extent['coordinates']
    print(f'   {cords[0][1]:.2f}\t')
    print(f'{cords[0][0]:.2f}\t{cords[1][0]:.2f}')
    print(f'   {cords[1][1]:.2f}\t')


In [4]:
def print_record(record):
    print(f'Record_type: {record["model"]}')
    print(f'Title: {record["title"]}')
    print(f'Keywords: {record["keywords"]}')
    print(f'Abstract: {record["abstract"][:100]}...')
    if record['geographicExtent']:
        print_geoextent(record['geographicExtent'])
    print()

In [5]:
def generate_dict(dep=1):
    if dep > 1:
        return defaultdict(lambda: generate_dict(dep - 1))
    if dep == 1:
        return defaultdict(dict)
    return dict()
    

In [42]:
def construct_query(bbox = '', bbox_relation = 'intersect', fields = [], source = True, size = '', **terms):
    # fancy thing found on stackoverflow
    my_query = generate_dict(7)

    my_query['query']['bool']['must'] = [{'match_all': dict()}]

    if terms:
        # my_query['query']['bool']['must'] = []
        for field, values in terms.items():
            term_constraint = generate_dict(1)
            term_constraint['terms'][field] = [i.lower() for i in values]
            my_query['query']['bool']['must'].append(term_constraint)

    if bbox:
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['shape']['type'] = 'envelope'
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['shape']['coordinates'] = bbox
        my_query['query']['bool']['filter']['geo_shape']['geographicExtent']['relation'] = bbox_relation

    if fields:
        my_query['fields'] = fields

    if not source:
        my_query['_source'] = source
    
    if size:
        my_query['size'] = size
        
    return f"{json.dumps(my_query, indent = 2)}"

In [34]:
conf = get_config()
es = Elasticsearch(**conf['SESSION_KWARGS'])

In [38]:
my_query = construct_query(model=['observation'], bbox = [[-8, 59], [2, 49]], bbox_relation='within', size=20)

In [43]:
with open('my_query.json', 'w') as f:
    f.write(my_query)

response = es.search(index="stac-moles-test", body=my_query)

print('Number of responses', response['hits']['total']['value'])

for i in response['hits']['hits']:
    for k,v in i['_source'].items():
        print(k, v)

  after removing the cwd from sys.path.


Number of responses 1178
title Daily Mean, Minimum and Maximum Central England Temperature series
abstract The longest available instrumental record of temperature in the world is now available at the BADC. The daily data starts in 1772. 

The mean, minimum and maximum datasets are updated monthly, with data for a month usually available by the 3rd of the next month. A provisional CET value for the current month is calculated on a daily basis. The mean daily data series begins in 1772. Mean maximum and minimum daily and monthly data are also available, beginning in 1878. Yearly files are provided from 1998 onwards.

These historical temperature series are representative of the Midlands region in England, UK (a roughly triangular area of the United Kingdom enclosed by Bristol, Lancashire and London). 

The following stations are used by the Met Office to compile the CET data: Rothamsted, Malvern, Squires Gate and Ringway.

But in November 2004, the weather station Stonyhurst replaced Ri