In [8]:
import os, time
from whoosh.fields import Schema, TEXT, ID
from whoosh.index import create_in
from whoosh.qparser import QueryParser, AndGroup, OrGroup

In [9]:
# ---------- Step 1: Schema & index ----------
schema = Schema(title=ID(stored=True), content=TEXT)
index_dir = f"indexdir_{int(time.time())}"
os.mkdir(index_dir)
ix = create_in(index_dir, schema)

In [10]:
# ---------- Step 2: Add documents ----------
writer = ix.writer()
writer.add_document(title="doc1.txt", content=open("doc1.txt").read())
writer.add_document(title="doc2.txt", content=open("doc2.txt").read())
writer.commit()
print("Documents indexed successfully!\n")

Documents indexed successfully!



In [11]:
# ---------- Step 3: Search function ----------
def search_docs(query_str, use_or=False):
    with ix.searcher() as searcher:
        group = OrGroup.factory(0.9) if use_or else AndGroup
        qp = QueryParser("content", ix.schema, group=group)
        query = qp.parse(query_str)
        results = searcher.search(query)

        print(f"Results for query: '{query_str}'")
        if results:
            for r in results:
                print("-", r["title"])
        else:
            print("No matching documents found.")
        print()

In [12]:
# ---------- Step 4: Run Boolean search ----------
search_docs("python AND web")       # AND query
search_docs("python OR web", True)  # OR query

Results for query: 'python AND web'
- doc1.txt

Results for query: 'python OR web'
- doc1.txt
- doc2.txt

