In [1]:
from pathlib import Path
import json

from whoosh.index import create_in
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser

In [2]:
INDEX_DIR = Path('./indexes')
if not INDEX_DIR.exists():
    INDEX_DIR.mkdir()

In [3]:
DOC_DIR = Path('./data/documents.json')
documents = json.loads(DOC_DIR.read_text())

In [4]:
# Creating a Whoosh index to store the dataset 
schema = Schema(
    doc_id = ID(stored=True, unique=True),
    title=TEXT(stored=True),
    content=TEXT,
)
index = create_in(INDEX_DIR, schema)

In [5]:
#  Index the dataset
writer = index.writer()
for doc in documents:
    writer.add_document(doc_id=doc['doc_id'], title=doc['title'], content=doc['content'])

In [6]:
writer.commit()

In [7]:
# Creating the parser 
parser = QueryParser("title", index.schema)


In [8]:
# Adding some Query plugins
from whoosh.qparser import FuzzyTermPlugin, PrefixPlugin
parser.add_plugin(FuzzyTermPlugin())
parser.add_plugin(PrefixPlugin())

In [15]:
# Creating the query
query_str = "Bi*"
query = parser.parse(query_str)

In [16]:
# Search the index
with index.searcher() as searcher:
    results = searcher.search(query, terms=True)
    for hit in results:
        print(hit["title"], ":")
        print(
            documents[
                int(hit['doc_id'])
            ]['content']
        )

Bing Search Engine :
A privacy-focused search engine that doesn't track users' online activities, providing a secure search experience.
