Install dependencies

In [None]:
# Stanza - tokenizing, POS tagging, named entities (Apache v2 license)
!pip install stanza

In [None]:
import stanza
stanza.download('en')       # This downloads the English models for the neural pipeline
nlp = stanza.Pipeline('en')

In [None]:
!pip install nltk

In [None]:
import nltk
nltk.download('popular')

In [None]:
# geotext - detect places (MIT license)
# geogapy is better, but has dependency issues
!pip install geotext

Saisier une requete en langue naturelle:

In [None]:
#input query text
#ex: Sentinel-2 over Ottawa from april to september 2020 with cloud cover less than 20%
query=input()

In [None]:
query

In [None]:
# extract named entities
import stanza

def get_what(text):
    print("WHAT:")
    nouns = []
    doc = nlp(text+".")
    for sentence in doc.sentences:
        #print(sentence.dependencies)
        print("word\t\tlemma\t\tPOS\t\tdeprel")
        for word in sentence.words:
            print(word.text, "\t\t", word.lemma, "\t\t", word.pos, "\t\t", word.deprel)
        # return just noun-related tags
        nouns += [word.text for word in sentence.words if word.pos in {"PROPN", "NOUN"}]
    return nouns

#try it out
get_what(query)

In [None]:
# extract named entities
import nltk

def get_what2(text):
    print("WHAT:")
    chunks = []
    sentences = nltk.sent_tokenize(text) 
    #print("Sentences: ",sentences)
    for sent in sentences:
        tokens = nltk.word_tokenize(sent)
        print("Tokens: ",tokens)
        tags = nltk.pos_tag(tokens)
        print("POS tags: ",tags)
        # create grammar regex to match the chunks we want
        grammar = "CHUNK: {<NN|NNP><CD>?}"
        cp = nltk.RegexpParser(grammar)
        result = cp.parse(tags)
        print("RegEx grammar matches: ")
        for subtree in result.subtrees():
             if subtree.label() == 'CHUNK': 
                    print(subtree)
                    # return just noun-related tags
                    chunks += [child[0] for child in subtree]
    return chunks

#try it out
get_what2(query)

In [None]:
# extract geographical named entities
from geotext import GeoText

def get_where(text):
    print("WHERE:")
    places = GeoText(text)
    print("Countries: %s %s" % (places.countries, places.country_mentions))
    print("Cities: %s" % places.cities)
    # return bbox of place?
    if places.cities:
        return places.cities[0]
    elif places.countries:
        return places.countries[0]

#try it out
get_where(query)

In [None]:

def get_when(text):
    print("WHEN:")
    return ""

#try it out
get_when(query)

In [None]:
def get_conditions(text):
    print("CONDITIONS:")
    conditions = []
    # create chunk matching rules?
    doc = nlp(text)
    for sentence in doc.sentences:
        ents = sentence.ents
        if ents:
            print([(ent.text, ent.type) for ent in ents])
            conditions += [ent.text for ent in ents 
                           if ent.type in {"PERCENT", "CARDINAL", "ORDINAL", "QUANTITY"}]
    return conditions

#try it out
get_conditions(query)

In [None]:
# process the query and return key-value dictionary with extracted parameters
def process_query(text):
    # the resulting dictionary
    params = {}
    # What? - platform/collection
    params['what'] = get_what(text)
    # Where? - GeoNER
    params['where'] = get_where(text)
    # When? - detect time
    params['when'] = get_when(text)
    # Conditions? - other variables
    params['conditions'] = get_conditions(text)
    return params
    
process_query(query) 