In [8]:
from pyserini.index import IndexReader
from pyserini.search import SimpleSearcher
from pyserini.search import get_topics

# Location of the generated index
index_loc = "../anserini/indexes/msmarco-passage/lucene-index-msmarco"

# Create a searcher object
searcher = SimpleSearcher(index_loc)
# Set the active scorer to BM25
searcher.set_bm25(k1=0.9, b=0.4)
# Fetch 3 results for the given test query
results = searcher.search('this is a test query', k=3)
# For all results print the docid and the score
for result in results:
    print(result.docid, result.score)
    # Can also print the raw text of the matched document.
    #print(result.raw)

5578280 8.106900215148926
2016011 7.880499839782715
7004677 7.646299839019775


In [9]:
# IndexeReader can give information about the index
indexer = IndexReader(index_loc)
# Print stats (number of terms/documents/etc.)
print(indexer.stats())
# Can print the document vector of the given docid
print(indexer.get_document_vector(results[0].docid))


{'total_terms': 352316036, 'documents': 8841823, 'non_empty_documents': 8841823, 'unique_terms': -1}
{'code': 1, 'string': 2, 'paramet': 2, 'construct': 1, 'manual': 1, 'can': 2, 'either': 1, 'queri': 4, "you'r": 3, 'tag': 2, 'write': 3, 'us': 3, 'you': 2, 'add': 2, 'parameter:if': 2, 'setparamet': 2, 'apexpages.currentpag': 2, 'set': 2, 'method': 4, 'test': 2, 'edit': 1, 'cleaner': 1, 'custom': 2, 'control': 2, 'url': 1, '1': 1, '2': 1, '3': 1, '4': 1, 'exampl': 1, 'page': 1, 'child': 1}


In [27]:
# Fetch the dev queries of msmarco
topics = get_topics('msmarco_passage_dev_subset')
# Print the first entry of the topic list
first_query = topics[list(topics.keys())[0]]['title']
print(first_query)
# Search the most relevant item in the index
doc = searcher.search(first_query, k=1)[0]
print(doc.raw)
# Print the document vector of the matching document
print(indexer.get_document_vector(doc.docid))

why do people grind teeth in sleep
{
  "id" : "7867446",
  "contents" : "Do you suffer from frequent headaches and jaw pain? Do you wake up with a dull headache and sore jaw? Do you grind your teeth at night? It could be Bruxism or teeth grinding. Why do people grind their teeth? Teeth grinding often occurs during sleep and it might be caused by an abnormal bite or missing teeth. Teeth grinding may also occur due to a sleep disorder, such as sleep apnea or due to stress and anxiety. How do I know if I am grinding my teeth at night?"
}
{'anxieti': 1, 'headach': 2, 'abnorm': 1, 'frequent': 1, 'why': 1, 'do': 5, 'your': 1, 'miss': 1, 'peopl': 1, 'sleep': 3, 'how': 1, 'caus': 1, 'suffer': 1, 'wake': 1, 'mai': 1, 'bite': 1, 'from': 1, 'teeth': 7, 'up': 1, 'you': 3, 'jaw': 2, 'stress': 1, 'pain': 1, 'bruxism': 1, 'could': 1, 'might': 1, 'occur': 2, 'night': 2, 'i': 2, 'also': 1, 'often': 1, 'dure': 1, 'am': 1, 'my': 1, 'apnea': 1, 'disord': 1, 'grind': 6, 'due': 2, 'know': 1, 'sore': 1, 'dul

In [28]:
# Using the pyserini tokenizer/stemmer/etc. to create queries from scratch
query = "This is a test query in which things are tested. Found using www.google.com of course!"
# Tokenizing in pyserini is called Analyzing
indexer.analyze(query)

['test',
 'queri',
 'which',
 'thing',
 'test',
 'found',
 'us',
 'www.google.com',
 'cours']