In [None]:
!pip install requests sentence-transformers faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [None]:
import requests
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# STEP 1: Retrieve PubMed Articles
def get_pubmed_articles(query, max_results=10):
    # PubMed search endpoint URL
    pubmed_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"

    # Search parameters (query, max results, and output format)
    params = {
        "db": "pubmed",         # PubMed database
        "term": query,          # Search term (the medical query)
        "retmax": str(max_results),  # Limit the number of results
        "usehistory": "y",      # Allow ESearch history
        "retmode": "xml"        # Output format: XML
    }

    # Send the request
    response = requests.get(pubmed_url, params=params)

    if response.status_code != 200:
        return f"Error fetching data from PubMed: {response.status_code}"

    # Parse the returned XML to get article IDs
    articles = response.text.split("<Id>")
    article_ids = [article.split("</Id>")[0] for article in articles[1:]]  # Extract article IDs

    return article_ids

# STEP 2: Retrieve Abstracts for the Articles
def get_abstracts_from_pubmed(article_ids):
    # PubMed E-utilities fetch abstracts
    pubmed_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

    ids = ",".join(article_ids)
    params = {
        "db": "pubmed",         # PubMed database
        "id": ids,              # List of article IDs
        "retmode": "xml",       # Output format: XML
        "rettype": "abstract"   # We want abstracts
    }

    response = requests.get(pubmed_url, params=params)

    if response.status_code != 200:
        return f"Error fetching abstracts from PubMed: {response.status_code}"

    # Parse the XML response to extract abstracts
    abstracts = []
    articles = response.text.split("<PubmedArticle>")

    for article in articles[1:]:
        abstract_start = article.find("<AbstractText>")
        abstract_end = article.find("</AbstractText>")

        if abstract_start != -1 and abstract_end != -1:
            abstract = article[abstract_start + len("<AbstractText>"):abstract_end]
            abstracts.append(abstract.strip())

    return abstracts

# STEP 3: Create embeddings for abstracts using SentenceTransformers
def create_embeddings(abstracts):
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Define model here
    embeddings = model.encode(abstracts, show_progress_bar=True)
    return embeddings

# STEP 4: Create FAISS Index for fast similarity search
def create_faiss_index(embeddings):
    dim = embeddings.shape[1]  # Dimension of embeddings
    index = faiss.IndexFlatL2(dim)  # L2 distance for similarity search
    index.add(np.array(embeddings))  # Add embeddings to FAISS index
    return index

# STEP 5: Query the FAISS Index
def query_faiss(query, faiss_index, abstracts, model, k=3):
    query_embedding = model.encode([query])  # Use the same model for querying
    distances, indices = faiss_index.search(query_embedding, k)

    # Return the most relevant abstracts
    results = []
    for idx, distance in zip(indices[0], distances[0]):
        results.append({
            "abstract": abstracts[idx],
            "distance": distance
        })

    return results

# Main function for user input
def main():
    query = input("Enter your medical query: ")

    # STEP 1: Get PubMed Articles
    article_ids = get_pubmed_articles(query, max_results=5)

    if isinstance(article_ids, str):
        print(article_ids)
        return

    # STEP 2: Get Abstracts for the Articles
    abstracts = get_abstracts_from_pubmed(article_ids)

    if isinstance(abstracts, str):
        print(abstracts)
        return

    # STEP 3: Create embeddings and FAISS index
    embeddings = create_embeddings(abstracts)
    faiss_index = create_faiss_index(embeddings)

    # STEP 4: Query for relevant answers
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Define model here to use it for querying
    results = query_faiss(query, faiss_index, abstracts, model, k=3)

    # STEP 5: Display the top 3 relevant abstracts
    print("\nTop 3 Relevant Articles and Abstracts:")
    for result in results:
        print(f"Distance: {result['distance']:.4f}")
        print(f"Abstract: {result['abstract']}\n")

# Call the main function
if __name__ == "__main__":
    main()


Enter your medical query: What are the causes of cancer? 


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Top 3 Relevant Articles and Abstracts:
Distance: 1.4855
Abstract: The microRNA miR-126 supports endothelial cells and blood vessel integrity. Recent research has shown that it also serves as a key link between exercise and cancer. This article delves into how exercise affects the expression of miR-126, impacting cardiovascular well-being and metabolic control. The article also examines the various contributions of miR-126 in cancer, acting as both a suppressor and an enhancer depending on the particular context. Regular aerobic exercises, including HIIT, consistently increase levels of miR-126, leading to enhanced angiogenesis, endothelial repair, and improved vascular function through mechanisms involving VEGF, HIF-1&#x3b1;, and EPC mobilization. Resistance training affects similar pathways, but does not cause a significant change in miR-126 levels.MiR-126 involves in cancer by suppressing tumor growth and controlling key pathways such as PI3K/Akt, ERK/MAPK, and EMT. Lower levels are