In [1]:
#!pip install sentence_transformers
from sentence_transformers import models, SentenceTransformer

word_embedding_model = models.Transformer('michiyasunaga/BioLinkBERT-base')
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())

embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])

Downloading:   0%|          | 0.00/559 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/379 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/225k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/447k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

max_seq_length  512


In [5]:
import torch
from sentence_transformers import util
# Corpus with example sentences
corpus = [
    '(1) Host-derived factors alter gut microenvironment, and changes in gut microbiota also affect biological functions of host.',
    '(1) lterations of gut microbiota have been reported in a wide variety of diseases, but the whole picture of alterations in pancreatic diseases remains to be clarified.',
    '(2) Exposure to chiral pesticides poses many potential health risks. In this study, we examined the impacts of exposure to penconazole and its enantiomers on gut microbiota and metabolic profiles in mice.',
    '(2) In this study, we examined the impacts of exposure to penconazole and its enantiomers on gut microbiota and metabolic profiles in mice.',
    '(3) The objective of this study was to identify associations between fecal menaquinone profiles, gut microbiota composition, and biomarkers of cardiometabolic health.',
    '(3) The menaquinone profile and gut microbiota structure were periodically measured in fecal samples collected from 77 overweight Chinese adults who consumed a prescribed diet previously shown to alter gut microbiota composition and to improve cardiometabolic biomarkers.'
]
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

# Query sentences:
queries = [
    'Differences in Gut Microbiota Profiles between Autoimmune Pancreatitis and Chronic Pancreatitis.',
    'Impacts of Penconazole and Its Enantiomers Exposure on Gut Microbiota and Metabolic Profiles in Mice.',
    'Fecal menaquinone profiles of overweight adults are associated with gut microbiota composition during a gut microbiota-targeted dietary intervention.'
]


# Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
top_k = min(5, len(corpus))
for query in queries:
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    # We use cosine-similarity and torch.topk to find the highest 5 scores
    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)

    print("\n\n======================\n\n")
    print("Query:", query)
    print("\nTop 5 most similar sentences in corpus:")

    for score, idx in zip(top_results[0], top_results[1]):
        print(corpus[idx], "(Score: {:.4f})".format(score))

    """
    # Alternatively, we can also use util.semantic_search to perform cosine similarty + topk
    hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
    hits = hits[0]      #Get the hits for the first query
    for hit in hits:
        print(corpus[hit['corpus_id']], "(Score: {:.4f})".format(hit['score']))
    """





Query: Differences in Gut Microbiota Profiles between Autoimmune Pancreatitis and Chronic Pancreatitis.

Top 5 most similar sentences in corpus:
(1) Host-derived factors alter gut microenvironment, and changes in gut microbiota also affect biological functions of host. (Score: 0.8919)
(1) lterations of gut microbiota have been reported in a wide variety of diseases, but the whole picture of alterations in pancreatic diseases remains to be clarified. (Score: 0.8782)
(2) In this study, we examined the impacts of exposure to penconazole and its enantiomers on gut microbiota and metabolic profiles in mice. (Score: 0.8750)
(3) The menaquinone profile and gut microbiota structure were periodically measured in fecal samples collected from 77 overweight Chinese adults who consumed a prescribed diet previously shown to alter gut microbiota composition and to improve cardiometabolic biomarkers. (Score: 0.8725)
(2) Exposure to chiral pesticides poses many potential health risks. In this study