In [1]:
from luqum.parser import parser

In [2]:
start_query = 'usa AND uav'
end_query = '(usa OR "United States" OR US OR "United States of America") AND (drone OR UAV)'

In [3]:
end_tree = parser.parse(end_query)
str(end_tree)

'(usa OR "United States" OR US OR "United States of America") AND (drone OR UAV)'

In [4]:
end_tree

AndOperation(Group(OrOperation(Word('usa'), Phrase('"United States"'), Word('US'), Phrase('"United States of America"'))), Group(OrOperation(Word('drone'), Word('UAV'))))

In [5]:
start_tree = parser.parse(start_query)
str(start_tree)

'usa AND uav'

In [6]:
start_tree

AndOperation(Word('usa'), Word('uav'))

In [7]:
syns = {'usa': ['United States', 'US', 'United States of America'],
        'uav': ['drone']}
syns

{'usa': ['United States', 'US', 'United States of America'], 'uav': ['drone']}

In [8]:
for word in end_tree.children:
    print(word)

(usa OR "United States" OR US OR "United States of America")
(drone OR UAV)


In [9]:
from luqum.visitor import TreeTransformer
from luqum.tree import Item, Word, Phrase, OrOperation, Group, AndOperation

In [10]:
def get_word_node(word: str):
    if ' ' in word:
        return Phrase(f'"{word}"')
    else:
        return Word(word)

In [18]:
from luqum.auto_head_tail import auto_head_tail, AutoHeadTail

In [24]:
class MyTransformer(TreeTransformer):
        
    def visit_word(self, node: Item, context):
        if type(node) in [Word, Phrase]:
            word = node.value
            new_nodes = []
            if word in syns:
                for s in syns[word]:
                    new_word_node = get_word_node(s)
                    new_nodes.append(new_word_node)
                new_nodes.append(get_word_node(word))
                yield Group(OrOperation(*new_nodes))
            else:
                yield from self.generic_visit(node, context)
        else:
            yield from self.generic_visit(node, context)
            

print(str(start_tree))
transformer = MyTransformer()
new_tree = transformer.visit(start_tree)
print(str(new_tree))
new_tree = auto_head_tail(new_tree)
print(str(new_tree))

usa AND uav
("United States"ORUSOR"United States of America"ORusa)AND(droneORuav)
("United States" OR US OR "United States of America" OR usa) AND (drone OR uav)


In [12]:
q = AndOperation(Group(OrOperation(Word('usa'), Phrase('"United States"'), Word('US'), Phrase('"United States of America"'))), Group(OrOperation(Word('drone'), Word('UAV'))))
str(q)

'(usaOR"United States"ORUSOR"United States of America")AND(droneORUAV)'

In [15]:
str(auto_head_tail(q))

'(usa OR "United States" OR US OR "United States of America") AND (drone OR UAV)'

In [None]:
from luqum.pretty import prettify
print(prettify(q))