## Load topics

In [1]:
import pandas as pd

#path = "G:/python/anserini/src/main/resources/topics-and-qrels/"
path = "C:/Users/Thijs-Jan-Luttikholt/Documents/Study/Master/Year 1/Information retrieval/anserini/src/main/resources/topics-and-qrels/"
topics_file = "topics.dl19-doc.txt"

topics = pd.read_csv(path+topics_file,sep="\t",names=["ID","Question"])
topics["ID"]= topics["ID"].astype(str)

topics.head()

Unnamed: 0,ID,Question
0,156493,do goldfish grow
1,1110199,what is wifi vs bluetooth
2,1063750,why did the us volunterilay enter ww1
3,130510,definition declaratory judgment
4,489204,right pelvic pain causes


## Experiments
Results file format from: http://www.rafaelglater.com/en/post/learn-how-to-use-trec_eval-to-evaluate-your-information-retrieval-system

In [2]:
from pyserini.search import SimpleSearcher

#searcher = SimpleSearcher('G:/python/anserini/lucene-index.robust04.pos+docvectors+rawdocs')
#searcher = SimpleSearcher('C:/msmarco-doc/lucene-index.msmarco-doc.pos+docvectors+rawdocs')
searcher = SimpleSearcher('C:/Users/Thijs-Jan-Luttikholt/Documents/Study/Master/Year 1/Information retrieval/Project/lucene-index.msmarco-doc.pos+docvectors+rawdocs/lucene-index.msmarco-doc.pos+docvectors+rawdocs')

### BM25

In [None]:
searcher.set_bm25(0.9, 0.4)

f = open("runs/bm25.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')   
f.close()

### BM25+RM3

In [21]:
searcher.set_bm25(0.9, 0.4)
searcher.set_rm3(10, 10, 0.5,rm3_output_query=True)

f = open("runs/bm25+rm3.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

searcher.unset_rm3()

### Word2Vec QE

In [53]:
import gensim 
from gensim.models import Word2Vec
import gensim.downloader as api
import spacy
from pyserini.analysis import Analyzer, get_lucene_analyzer

path = "G:/data/glove.6B.300d.w2v.txt"
model = gensim.models.KeyedVectors.load_word2vec_format(path, binary = False)
word_vectors = model.wv

nlp = spacy.load("en_core_web_sm")
analyzer = Analyzer(get_lucene_analyzer())

ModuleNotFoundError: No module named 'spacy'

In [75]:
from pyserini.search import querybuilder as qb

f = open("runs/word2vec.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # word2vec expansion
            if token in word_vectors.vocab and not token in nlp.Defaults.stop_words:
                token_lemma = nlp(token)[0].lemma_
                syns = model.most_similar(token)
                synonyms = [syn for (syn,score) in syns if nlp(syn)[0].lemma_ != token_lemma][:1]
                for synonym in synonyms:
                    if len(analyzer.analyze(synonym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(token), 0.1),should)
                        print("    "+token+" -> "+synonym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:
    goldfish -> koi
    grow -> growth


'what is wifi vs bluetooth' expansions:
    wifi -> wi-fi
    vs -> vs.
    bluetooth -> wifi


'why did the us volunterilay enter ww1' expansions:
    enter -> entry
    ww1 -> ww2


'definition declaratory judgment' expansions:
    definition -> defined
    declaratory -> injunctive
    judgment -> judgement


'right pelvic pain causes' expansions:
    right -> left
    pelvic -> abdominal
    pain -> discomfort
    causes -> disease


'what are the social determinants of health' expansions:
    social -> welfare
    determinants -> predictors
    health -> care


'how is the weather in jamaica' expansions:
    weather -> inclement
    jamaica -> barbados


'types of dysarthria from cerebral palsy' expansions:
    types -> kinds
    dysarthria -> ataxic
    cerebral -> palsy
    palsy -> cerebral


'who is robert gray' expansions:
    robert -> william
    gray -> grey


'what types of food can you cook sous vide

## Wordnet QE

In [3]:
import nltk
from nltk.corpus import wordnet as wn 
from nltk.tokenize import sent_tokenize, word_tokenize 
import warnings 
from nltk.corpus import words
from operator import itemgetter
from pyserini.search import querybuilder as qb
from pyserini.analysis import Analyzer, get_lucene_analyzer
import spacy

In [4]:
nltk.download('words')

[nltk_data] Downloading package words to C:\Users\Thijs-Jan-
[nltk_data]     Luttikholt\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [5]:
#The following function creates a set containing synonyms for each word in the input list.
def add_synonyms(myList):
    synonyms = []
    for word in myList:
        for syn in wn.synsets(word):
            for l in syn.lemmas():
                synonyms.append(l.name())
    return set(synonyms)

#The following function creates a set containing hypernyms for each word in the input list.
def add_hypernyms(myList):
    hypernyms = []
    for word in myList:
        for syn in wn.synsets(word):
            for h in syn.hypernyms():
                hypernyms.append(h.name().split('.')[0])
    return set(hypernyms)

#The following function creates a set containing hyponyms for each word in the input list.
def add_hyponyms(myList):
    hyponyms = []
    for word in myList:
        for syn in wn.synsets(word):
            for h in syn.hyponyms():
                hyponyms.append(h.name().split('.')[0])
    return set(hyponyms)

#The following function creates a set containing meronyms for each word in the input list.
def add_meronyms(myList):
    meronyms = []
    for word in myList:
        for syn in wn.synsets(word):
            for m in syn.part_meronyms():
                meronyms.append(m.name().split('.')[0])
    return set(meronyms)

#The following function creates a set containing holonyms for each word in the input list.
def add_holonyms(myList):
    holonyms = []
    for word in myList:
        for syn in wn.synsets(word):
            for m in syn.part_holonyms():
                holonyms.append(m.name().split('.')[0])
    return set(holonyms)

In [6]:
def best_similars(original, synonyms, max_len, min_sim):
    syns_scores = []
    orig_set = wn.synsets(original)[0]
    set_type = orig_set.name().split('.')[1]
    
    i = 1
    for word in synonyms:
        word_sets = wn.synsets(word)
        word_set = None
        for item in word_sets:
            if item.name().split('.')[1] is set_type:
                word_set = item
                break
        if word_set is None:
            continue
        score = orig_set.path_similarity(word_set) 
        if score is None:
            continue
        if score >= min_sim:
            if len(syns_scores) < max_len:
                syns_scores.append([word,score])
            elif len(syns_scores) == max_len and score > syns_scores[max_len-1][1]:
                syns_scores[max_len-1] = [word,score]
            syns_scores = sorted(syns_scores, key=itemgetter(1))
    final_syns = [word for word,score in syns_scores if word != original]
    return final_syns

def filter_words(myList):
    new_list = []
    for item in myList:
        if '_' not in item:
            new_list.append(item)
    return new_list


best_syns = best_similars('dog', add_synonyms(['dog']), 100, 0.2)
print(filter_words(best_syns))

['hound']


In [7]:
analyzer = Analyzer(get_lucene_analyzer())
nlp = spacy.load("en_core_web_sm")

##### Only adding synonyms

In [13]:
f = open("runs/wordnet.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # wordnet expansion
            if token not in nlp.Defaults.stop_words and token in words.words():
                token_lemma = nlp(token)[0].lemma_
                
                wordnet_synonyms = add_synonyms([token])
                synonyms = best_similars(token, wordnet_synonyms, 3, 0.2)
                synonyms = filter_words(synonyms)
                for synonym in synonyms:
                    if len(analyzer.analyze(synonym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(synonym), 0.1),should)
                        print("    "+token+" -> "+synonym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:
    grow -> uprise
    grow -> rise


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:


'definition declaratory judgment' expansions:
    declaratory -> declarative
    declaratory -> asserting
    judgment -> opinion


'right pelvic pain causes' expansions:
    pain -> hurting


'what are the social determinants of health' expansions:
    social -> sociable
    social -> mixer
    health -> wellness


'how is the weather in jamaica' expansions:


'types of dysarthria from cerebral palsy' expansions:
    palsy -> paralysis


'who is robert gray' expansions:
    gray -> Gray
    gray -> grayness


'what types of food can you cook sous vide' expansions:
    food -> nutrient
    cook -> Cook


'how long is life cycle of flea' expansions:
    long -> hanker
    long -> yearn
    life -> aliveness
    life -> animation


'what can contour plowing reduce' expansions:
    plowing -> ploughing
    reduce -> slenderize

##### Only adding hypernyms

In [15]:
f = open("runs/wordnet_hypernyms.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # wordnet expansion
            if token not in nlp.Defaults.stop_words and token in words.words():
                token_lemma = nlp(token)[0].lemma_
                
                wordnet_hypernyms = add_hypernyms([token])
                hypernyms = best_similars(token, wordnet_hypernyms, 3, 0.2)
                hypernyms = filter_words(hypernyms)
                for hypernym in hypernyms:
                    if len(analyzer.analyze(hypernym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(hypernym), 0.1),should)
                        print("    "+token+" -> "+hypernym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:
    goldfish -> cyprinid
    grow -> develop
    grow -> increase
    grow -> become


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:
    enter -> save


'definition declaratory judgment' expansions:
    definition -> explanation
    judgment -> wisdom
    judgment -> opinion


'right pelvic pain causes' expansions:
    right -> abstraction
    pain -> symptom


'what are the social determinants of health' expansions:
    health -> wellbeing


'how is the weather in jamaica' expansions:


'types of dysarthria from cerebral palsy' expansions:
    palsy -> dysfunction


'who is robert gray' expansions:
    gray -> color


'what types of food can you cook sous vide' expansions:
    food -> solid
    food -> substance


'how long is life cycle of flea' expansions:
    long -> desire
    life -> animation
    life -> being
    flea -> ectoparasite
    flea -> insect


'what can contour plowing reduce' expansions:


##### Only adding hyponyms

In [17]:
f = open("runs/wordnet_hyponyms.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # wordnet expansion
            if token not in nlp.Defaults.stop_words and token in words.words():
                token_lemma = nlp(token)[0].lemma_
                
                wordnet_hyponyms = add_hyponyms([token])
                hyponyms = best_similars(token, wordnet_hyponyms, 3, 0.2)
                hyponyms = filter_words(hyponyms)
                for hyponym in hyponyms:
                    if len(analyzer.analyze(hyponym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(hyponym), 0.1),should)
                        print("    "+token+" -> "+hyponym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:
    grow -> fledge
    grow -> come
    grow -> bald


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:
    enter -> score
    enter -> re-enter
    enter -> intrude


'definition declaratory judgment' expansions:
    definition -> redefinition
    judgment -> opinion


'right pelvic pain causes' expansions:
    right -> access
    pain -> neuralgia
    pain -> photalgia


'what are the social determinants of health' expansions:


'how is the weather in jamaica' expansions:
    weather -> elements


'types of dysarthria from cerebral palsy' expansions:
    palsy -> alalia
    palsy -> paresis
    palsy -> hemiplegia


'who is robert gray' expansions:
    gray -> iron-grey
    gray -> dapple-grey


'what types of food can you cook sous vide' expansions:
    food -> chocolate
    food -> pabulum
    food -> beverage
    cook -> preserver
    cook -> seasoner


'how long is life cycle of flea' expansions:
    long

##### Only adding meronyms

In [21]:
f = open("runs/wordnet_meronyms.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # wordnet expansion
            if token not in nlp.Defaults.stop_words and token in words.words():
                token_lemma = nlp(token)[0].lemma_
                
                wordnet_meronyms = add_meronyms([token])
                meronyms = best_similars(token, wordnet_meronyms, 3, 0.1)
                meronyms = filter_words(meronyms)
                for meronym in meronyms:
                    if len(analyzer.analyze(meronym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(meronym), 0.1),should)
                        print("    "+token+" -> "+meronym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:


'definition declaratory judgment' expansions:


'right pelvic pain causes' expansions:


'what are the social determinants of health' expansions:


'how is the weather in jamaica' expansions:


'types of dysarthria from cerebral palsy' expansions:


'who is robert gray' expansions:


'what types of food can you cook sous vide' expansions:


'how long is life cycle of flea' expansions:
    life -> age
    life -> past
    cycle -> sprocket
    cycle -> chain
    cycle -> phase


'what can contour plowing reduce' expansions:


'when was the salvation army founded' expansions:


'what is a active margin' expansions:


'difference between rn and bsn' expansions:


'medicare's definition of mechanical ventilation' expansions:


'how to find the midsegment of a trapezoid' expansions:


'what is an aml surveillance analyst' expansions:


'what is the daily life of t

##### Only adding holonyms

In [20]:
f = open("runs/wordnet_holonyms.txt", "w") 
for (topicid,question) in zip(topics["ID"], topics["Question"]):
    # Build the query
    builder = qb.get_boolean_query_builder()
    should = qb.JBooleanClauseOccur['should'].value # should occur
    print("'"+question+"' expansions:")
    for token in question.split(" "):
        if len(analyzer.analyze(token))>0:
            # add question token
            builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
            
            # wordnet expansion
            if token not in nlp.Defaults.stop_words and token in words.words():
                token_lemma = nlp(token)[0].lemma_
                
                wordnet_holonyms = add_holonyms([token])
                holonyms = best_similars(token, wordnet_holonyms, 3, 0.1)
                holonyms = filter_words(holonyms)
                for holonym in holonyms:
                    if len(analyzer.analyze(holonym))>0:
                        # add query term
                        builder.add(qb.get_boost_query(qb.get_term_query(holonym), 0.1),should)
                        print("    "+token+" -> "+holonym)
            
    question = builder.build()
    print("\n")

    hits = searcher.search(question)
    for i in range(0, 10):
        f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
f.close()

'do goldfish grow' expansions:


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:


'definition declaratory judgment' expansions:


'right pelvic pain causes' expansions:


'what are the social determinants of health' expansions:


'how is the weather in jamaica' expansions:


'types of dysarthria from cerebral palsy' expansions:


'who is robert gray' expansions:


'what types of food can you cook sous vide' expansions:


'how long is life cycle of flea' expansions:
    cycle -> kilohertz


'what can contour plowing reduce' expansions:


'when was the salvation army founded' expansions:


'what is a active margin' expansions:


'difference between rn and bsn' expansions:


'medicare's definition of mechanical ventilation' expansions:


'how to find the midsegment of a trapezoid' expansions:


'what is an aml surveillance analyst' expansions:


'what is the daily life of thai people' expansions:


'definition of a sigmet' expansions:


'cost

##### Generic: possibility of using multiple wordnet extensions: Version 1

This version requires separate min and max values for each extension type.

In [57]:
def extend_syn(token, expansions, max_size, min_score):
    wordnet_synonyms = add_synonyms([token])
    synonyms = best_similars(token, wordnet_synonyms, max_size, min_score)
    synonyms = filter_words(synonyms)
    expansions.extend(synonyms)
    return expansions

def extend_hyper(token, expansions, max_size, min_score):
    wordnet_hypernyms = add_hypernyms([token])
    hypernyms = best_similars(token, wordnet_hypernyms, max_size, min_score)
    hypernyms = filter_words(hypernyms)
    expansions.extend(hypernyms)
    return expansions

def extend_hypo(token, expansions, max_size, min_score):
    wordnet_hyponyms = add_hyponyms([token])
    hyponyms = best_similars(token, wordnet_hyponyms, max_size, min_score)
    hyponyms = filter_words(hyponyms)
    expansions.extend(hyponyms)
    return expansions

def extend_mero(token, expansions, max_size, min_score):
    wordnet_meronyms = add_meronyms([token])
    meronyms = best_similars(token, wordnet_meronyms, max_size, min_score)
    meronyms = filter_words(meronyms)
    expansions.extend(meronyms)
    return expansions

def extend_holo(token,expansions, max_size, min_score):
    wordnet_holonyms = add_holonyms([token])
    holonyms = best_similars(token, wordnet_holonyms, max_size, min_score)
    holonyms = filter_words(holonyms)
    expansions.extend(holonyms)
    return expansions

def extend_index(index, token, expansions, max_size, min_score):
    switch = {
        0: extend_syn,
        1: extend_hyper,
        2: extend_hypo,
        3: extend_mero,
        4: extend_holo
    }
    func = switch.get(index, "invalid index")
    return func(token, expansions,max_size, min_score)

In [58]:

#Note that in the below function, the 'to_add' parameter should contain 5 3-tuples. 
#The tuples represent synonyms, hypernyms, hyponyms, meronyms and holonyms in that order.
#Each 3-tuple should have: Boolean (should this type be added), max_size (how many 
#words may be added for this type), and min_score (what should the additions have as minimum similarity score)
def wordnet_expansions(filename, topics, to_add):#syn, hyper, hypo, mero, holo):
    f = open(filename, "w") 
    for (topicid,question) in zip(topics["ID"], topics["Question"]):
        # Build the query
        builder = qb.get_boolean_query_builder()
        should = qb.JBooleanClauseOccur['should'].value # should occur
        print("'"+question+"' expansions:")
        for token in question.split(" "):
            if len(analyzer.analyze(token))>0:
                # add question token
                builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
                
                # wordnet expansion
                if token not in nlp.Defaults.stop_words and token in words.words():
                    token_lemma = nlp(token)[0].lemma_
                    
                    expansions = []
                    for ind, item in enumerate(to_add):
                        if item[0]:
                            expansions = extend_index(ind, token, expansions, item[1], item[2])
                    
                    for word in set(expansions):
                        if len(analyzer.analyze(word))>0:
                            # add query term
                            builder.add(qb.get_boost_query(qb.get_term_query(word), 0.1),should)
                            print("    "+token+" -> "+word)
            
        question = builder.build()
        print("\n")

        hits = searcher.search(question)
        for i in range(0, 10):
            f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
    f.close()

In [59]:
wordnet_expansions('runs/wordnet_all.txt', topics, [(True,3,0.2),(True,3,0.2),(True,3,0.2),(True,3,0.1),(True,3,0.1)])

'do goldfish grow' expansions:
    goldfish -> cyprinid
    grow -> originate
    grow -> bald
    grow -> change
    grow -> swell
    grow -> cultivate
    grow -> rise
    grow -> develop


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:
    enter -> intrude
    enter -> succeed
    enter -> embark
    enter -> save


'definition declaratory judgment' expansions:
    definition -> explanation
    declaratory -> declarative
    declaratory -> asserting
    judgment -> opinion
    judgment -> wisdom


'right pelvic pain causes' expansions:
    right -> prerogative
    right -> abstraction
    pain -> smart
    pain -> hurting
    pain -> symptom
    pain -> odynophagia


'what are the social determinants of health' expansions:
    social -> mixer
    social -> sociable
    health -> wellness
    health -> wellbeing


'how is the weather in jamaica' expansions:


'types of dysarthria from cerebral palsy' expansions:
    palsy -> dysfunction

##### Generic: possibility of using multiple wordnet extensions: Version 2

This version requires one min and max value for all extension types together.

In [10]:
def extend_syn2(token, expansions):
    synonyms = add_synonyms([token])
    expansions.extend(synonyms)
    return expansions

def extend_hyper2(token, expansions):
    hypernyms = add_hypernyms([token])
    expansions.extend(hypernyms)
    return expansions

def extend_hypo2(token, expansions):
    hyponyms = add_hyponyms([token])
    expansions.extend(hyponyms)
    return expansions

def extend_mero2(token, expansions):
    meronyms = add_meronyms([token])
    expansions.extend(meronyms)
    return expansions

def extend_holo2(token,expansions):
    holonyms = add_holonyms([token])
    expansions.extend(holonyms)
    return expansions

def extend_index2(index, token, expansions):
    switch = {
        0: extend_syn2,
        1: extend_hyper2,
        2: extend_hypo2,
        3: extend_mero2,
        4: extend_holo2
    }
    func = switch.get(index, "invalid index")
    return func(token, expansions)

In [13]:
#Note that in the below function, the 'to_add' parameter should contain 5 booleans. 
#The tuples represent synonyms, hypernyms, hyponyms, meronyms and holonyms in that order.
def wordnet_expansions2(filename, topics, to_add, max_size, min_score):
    f = open(filename, "w") 
    for (topicid,question) in zip(topics["ID"], topics["Question"]):
        # Build the query
        builder = qb.get_boolean_query_builder()
        should = qb.JBooleanClauseOccur['should'].value # should occur
        print("'"+question+"' expansions:")
        for token in question.split(" "):
            if len(analyzer.analyze(token))>0:
                # add question token
                builder.add(qb.get_boost_query(qb.get_term_query(token), 1),should)
                
                # wordnet expansion
                if token not in nlp.Defaults.stop_words and token in words.words():
                    token_lemma = nlp(token)[0].lemma_
                    
                    expansions = []
                    for ind, item in enumerate(to_add):
                        if item:
                            expansions = extend_index2(ind, token, expansions)
                    
                    expansions = best_similars(token, expansions, max_size, min_score)
                    expansions = filter_words(expansions)
                    
                    for word in set(expansions):
                        if len(analyzer.analyze(word))>0:
                            # add query term
                            builder.add(qb.get_boost_query(qb.get_term_query(word), 0.1),should)
                            print("    "+token+" -> "+word)
            
        question = builder.build()
        print("\n")

        hits = searcher.search(question)
        for i in range(0, 10):
            f.write(f'{topicid}\tQ0\t{hits[i].docid}\t{i+1:2}\t{hits[i].score:.5f}\tSTANDARD\n')
    f.close()

In [14]:
wordnet_expansions2('runs/wordnet_all_v2.txt', topics, [True,True,True,True,True], 10, 0.1)

'do goldfish grow' expansions:
    goldfish -> cyprinid
    grow -> arise
    grow -> turn
    grow -> uprise
    grow -> raise
    grow -> rise
    grow -> farm
    grow -> originate
    grow -> maturate


'what is wifi vs bluetooth' expansions:


'why did the us volunterilay enter ww1' expansions:
    enter -> recruit
    enter -> inscribe
    enter -> enroll
    enter -> embark
    enter -> insert
    enter -> record


'definition declaratory judgment' expansions:
    definition -> redefinition
    definition -> distinctness
    definition -> explanation
    declaratory -> declarative
    declaratory -> asserting
    judgment -> discernment
    judgment -> sagacity
    judgment -> opinion
    judgment -> sagaciousness
    judgment -> perspicacity
    judgment -> mind


'right pelvic pain causes' expansions:
    right -> change
    right -> preemption
    right -> due
    right -> abstraction
    pain -> somesthesia
    pain -> hurting
    pain -> nuisance
    pain -> trouble
    pai

    hypertrophy -> giantism
    hypertrophy -> splenomegaly
    hypertrophy -> elephantiasis
    hypertrophy -> acromegaly
    hypertrophy -> adenomegaly
    hypertrophy -> dysplasia
    hypertrophy -> dactylomegaly


'lps laws definition' expansions:
    definition -> redefinition
    definition -> distinctness
    definition -> explanation


'causes of military suicide' expansions:
    suicide -> harakiri
    suicide -> suttee
    suicide -> self-annihilation
    suicide -> self-destruction


'what is theraderm used for' expansions:


'what is famvir prescribed for' expansions:


'anthropological definition of environment' expansions:
    definition -> redefinition
    definition -> distinctness
    definition -> explanation
    environment -> context
    environment -> sphere
    environment -> surroundings
    environment -> ambiance
    environment -> milieu
    environment -> situation
    environment -> ecology


'axon terminals or synaptic knob definition' expansions:
    axon 