In [13]:
import whoosh.searching
import whoosh.index as index
from whoosh.qparser import QueryParser

indexdir = "pubmed_index"
ix = index.open_dir(indexdir)
limit = 3

# standard query
def run_squery(text):
    qp = QueryParser("title", schema=ix.schema)
    q = qp.parse(text)

    with ix.searcher() as s:
        results = s.search(q, limit=limit)
        print(f"hits: {len(results)}")
        for hit in results:
            print(f'pmid: {hit["pmid"]}')
            print(f'title: {hit["title"]}')
            print(f'abstract: {hit["abstract"]}')

In [14]:
# relatively uncommon search term
run_squery("title:penis")

hits: 67
pmid: 318755
title: lymphedema of the penis .
abstract: lymphangiectomy with primary closure or skin grafting appears to be the treatment of choice for lymphedema of the penis. the method offers good cosmetic and functional results .
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 520038
title: keratoacanthoma of the glans penis .
abstract: a patient with a solitary keratoacanthoma of the glans penis is presented. since this area is devoid of hair, this keratoacanthoma could not have arisen from a hair follicle as has been suggested as the cause of keratoacanthoma. keratoacanthoma should be included in the differential

In [15]:
# unsurprisingly, the scope is widened with OR
run_squery("title:penis OR abstract:accident")

hits: 411
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 521159
title: roadside medical care in cambridgeshire .
abstract: the mid-anglia general practitioner accident service (magpas), established in 1972 to provide medical care at accident sites before the arrival of the ambulance and to assist ambulance crews with the severely injured, is discussed. analysis of accident report forms returned by magpas doctors is given .
pmid: 318755
title: lymphedema of the penis .
abstract: lymphangiectomy with primary closure or skin grafting appears to be the treatment of choice for lymphedema of the penis. the method offers good cosmeti

In [16]:
# AND narrows it to a single result
run_squery("title:penis AND abstract:accident")

hits: 1
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .


In [17]:
from whoosh.qparser import MultifieldParser

# multi field query
def run_mquery(text):
    qp = MultifieldParser(["title", "abstract"], schema=ix.schema)
    q = qp.parse(text)
    with ix.searcher() as s:
        results = s.search(q, limit=limit)
        print(f"hits: {len(results)}")
        for hit in results:
            print(f'pmid: {hit["pmid"]}')
            print(f'title: {hit["title"]}')
            print(f'abstract: {hit["abstract"]}')

In [18]:
# the multifield query reveals an additional result we missed earlier
run_mquery("penis accident")

hits: 2
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 600738
title: traumatic rupture of the corpus cavernosum. three cases (author's transl) .
abstract: "fracture of the penis" by damage to the erectile structures is rare. this accident usually occurs when the erect penis is subjected to trauma, usually during intercourse. the diagnosis is easy in the presence of a haematoma spreading from the fraenulum to the scrotum, with sometimes an obvious breach in the albuginosa of one corpus cavernosum. in the three cases reported here, immediate surgery was possible in two. early operation, with repair of the ruptured albuginosa, of

In [19]:
# the same behavior can be achieve with a combined (OR) AND (OR) query,
# although it would get tedious with more than two fields
run_squery("(title:penis OR abstract:penis) AND (title:accident OR abstract:accident)")

hits: 2
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 600738
title: traumatic rupture of the corpus cavernosum. three cases (author's transl) .
abstract: "fracture of the penis" by damage to the erectile structures is rare. this accident usually occurs when the erect penis is subjected to trauma, usually during intercourse. the diagnosis is easy in the presence of a haematoma spreading from the fraenulum to the scrotum, with sometimes an obvious breach in the albuginosa of one corpus cavernosum. in the three cases reported here, immediate surgery was possible in two. early operation, with repair of the ruptured albuginosa, of

In [20]:
# apparently phrase search omits certain words
run_squery('title:"penis is"')

hits: 67
pmid: 318755
title: lymphedema of the penis .
abstract: lymphangiectomy with primary closure or skin grafting appears to be the treatment of choice for lymphedema of the penis. the method offers good cosmetic and functional results .
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 520038
title: keratoacanthoma of the glans penis .
abstract: a patient with a solitary keratoacanthoma of the glans penis is presented. since this area is devoid of hair, this keratoacanthoma could not have arisen from a hair follicle as has been suggested as the cause of keratoacanthoma. keratoacanthoma should be included in the differential

In [21]:
# no increase in search results due to fuzzy search
run_squery("title:penis~")

hits: 67
pmid: 318755
title: lymphedema of the penis .
abstract: lymphangiectomy with primary closure or skin grafting appears to be the treatment of choice for lymphedema of the penis. the method offers good cosmetic and functional results .
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 520038
title: keratoacanthoma of the glans penis .
abstract: a patient with a solitary keratoacanthoma of the glans penis is presented. since this area is devoid of hair, this keratoacanthoma could not have arisen from a hair follicle as has been suggested as the cause of keratoacanthoma. keratoacanthoma should be included in the differential

In [22]:
from whoosh import scoring

# multi field query with scoring parameter
def run_query(text, weighting=None):
    qp = MultifieldParser(["title", "abstract"], schema=ix.schema)
    q = qp.parse(text)
    with ix.searcher(weighting=weighting) as s:
        results = s.search(q, limit=limit)
        print(f"hits: {len(results)}")
        for hit in results:
            print(f'pmid: {hit["pmid"]}')
            print(f'title: {hit["title"]}')
            print(f'abstract: {hit["abstract"]}')


In [23]:
# whoosh permits partial search *
run_query("penis *ture", weighting=scoring.TF_IDF())

hits: 27
pmid: 546277
title: os penis of the rat. ii. morphology of the mature bone .
abstract: a morphological description of the mature os penis and its related structures in the rat is presented. the description aims at being a natural introduction to studies of histomorphological and histochemical events during the development of the bone. the causal interest of this topic is due to a recently published observation of a rare reaction for presence of alkaline phosphatase in the proximally positioned growth cartilage of os penis. the study, thus, presents a systematical description of the position of os penis in relation to the soft tissue structures of glans penis, of gross morphology of the bone, of its periosteal covering and of the distribution of the bone types, which together form the mature bone. the observations result in an introduction of a systematical terminology, based on the latin language. furthermore, certain phenomena concerning a distally positioned cartilagenous pr

In [24]:
# the top result for BM25F is the same result as without partial search
run_query("penis *ture", weighting=scoring.BM25F())

hits: 27
pmid: 507742
title: rupture of the penis .
abstract: traumatic rupture of the penis is a rare accident. one or both corpora cavernosa may be affected. approximately one third of the cases are associated with rupture of the urethra. in view of previous studies, and the present case reports both of which occurred during the same night, the authors propose primary surgical repair of the corpora cavernosa .
pmid: 546277
title: os penis of the rat. ii. morphology of the mature bone .
abstract: a morphological description of the mature os penis and its related structures in the rat is presented. the description aims at being a natural introduction to studies of histomorphological and histochemical events during the development of the bone. the causal interest of this topic is due to a recently published observation of a rare reaction for presence of alkaline phosphatase in the proximally positioned growth cartilage of os penis. the study, thus, presents a systematical description of