<a href="https://colab.research.google.com/github/Tryphy18/SDClab/blob/main/medicalchatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install requests beautifulsoup4



In [2]:
import requests
import json
from typing import List, Dict, Optional
import time

class PubMedChatbot:
    def __init__(self):
        self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
        self.max_retries = 3
        self.retry_delay = 2  # seconds

    def search_pubmed(self, query: str, max_results: int = 5) -> Optional[List[Dict]]:
        """
        Search PubMed for articles related to the query
        """
        search_url = f"{self.base_url}esearch.fcgi"
        params = {
            'db': 'pubmed',
            'term': query,
            'retmax': max_results,
            'retmode': 'json',
            'sort': 'relevance'
        }

        for attempt in range(self.max_retries):
            try:
                response = requests.get(search_url, params=params)
                response.raise_for_status()
                data = response.json()
                id_list = data.get('esearchresult', {}).get('idlist', [])
                return self.fetch_article_details(id_list)
            except (requests.RequestException, json.JSONDecodeError) as e:
                if attempt == self.max_retries - 1:
                    print(f"Failed to search PubMed after {self.max_retries} attempts: {e}")
                    return None
                time.sleep(self.retry_delay)

    def fetch_article_details(self, pmid_list: List[str]) -> Optional[List[Dict]]:
        """
        Fetch details for PubMed articles given their PMIDs
        """
        if not pmid_list:
            return None

        fetch_url = f"{self.base_url}efetch.fcgi"
        params = {
            'db': 'pubmed',
            'id': ','.join(pmid_list),
            'retmode': 'xml',
            'rettype': 'abstract'
        }

        for attempt in range(self.max_retries):
            try:
                response = requests.get(fetch_url, params=params)
                response.raise_for_status()
                return self.parse_pubmed_xml(response.text)
            except (requests.RequestException, ValueError) as e:
                if attempt == self.max_retries - 1:
                    print(f"Failed to fetch PubMed details after {self.max_retries} attempts: {e}")
                    return None
                time.sleep(self.retry_delay)

    def parse_pubmed_xml(self, xml_content: str) -> List[Dict]:
        """
        Parse PubMed XML response into structured data
        """
        # In a real implementation, you would use an XML parser like lxml or BeautifulSoup
        # This is a simplified placeholder
        articles = []

        # Example parsing (simplified - actual implementation would properly parse XML)
        # You might want to use a proper XML parser here
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(xml_content, 'xml')

        for article in soup.find_all('PubmedArticle'):
            try:
                title = article.find('ArticleTitle').get_text()
                abstract = article.find('AbstractText')
                abstract_text = abstract.get_text() if abstract else "No abstract available"

                authors = []
                for author in article.find_all('Author'):
                    last_name = author.find('LastName').get_text() if author.find('LastName') else ""
                    fore_name = author.find('ForeName').get_text() if author.find('ForeName') else ""
                    authors.append(f"{fore_name} {last_name}".strip())

                journal = article.find('Journal')
                journal_title = journal.find('Title').get_text() if journal and journal.find('Title') else ""
                pub_date = journal.find('PubDate') if journal else None
                year = pub_date.find('Year').get_text() if pub_date and pub_date.find('Year') else ""

                pmid = article.find('PMID').get_text()
                doi = article.find('ELocationID', {'EIdType': 'doi'})
                doi_text = doi.get_text() if doi else ""

                articles.append({
                    'pmid': pmid,
                    'title': title,
                    'abstract': abstract_text,
                    'authors': authors,
                    'journal': journal_title,
                    'year': year,
                    'doi': doi_text
                })
            except Exception as e:
                print(f"Error parsing article: {e}")
                continue

        return articles

    def format_response(self, articles: List[Dict]) -> str:
        """
        Format the PubMed results into a user-friendly response
        """
        if not articles:
            return "I couldn't find any relevant medical research on that topic."

        response = "Here are some recent medical research articles that might be relevant:\n\n"

        for i, article in enumerate(articles, 1):
            authors = ', '.join(article['authors'][:3]) + (' et al.' if len(article['authors']) > 3 else '')
            response += (
                f"{i}. {article['title']}\n"
                f"   - Authors: {authors}\n"
                f"   - Journal: {article['journal']} ({article['year']})\n"
                f"   - Abstract: {article['abstract'][:200]}...\n"
                f"   - PMID: {article['pmid']}\n"
                f"   - DOI: {article['doi']}\n\n"
            )

        response += (
            "Note: This information is for educational purposes only and not medical advice. "
            "Always consult a healthcare professional for medical concerns."
        )

        return response

    def chat(self):
        """
        Interactive chat interface for the PubMed chatbot
        """
        print("Medical Research Chatbot (Powered by PubMed)")
        print("Type 'quit' to exit.\n")
        print("Please describe your medical question or topic of interest:")

        while True:
            user_input = input("You: ").strip()

            if user_input.lower() in ['quit', 'exit', 'bye']:
                print("Goodbye! Stay healthy!")
                break

            if not user_input:
                print("Please enter a medical question or topic.")
                continue

            print("\nSearching PubMed for relevant research...\n")
            articles = self.search_pubmed(user_input)
            response = self.format_response(articles)
            print(f"Bot: {response}\n")
            print("Do you have another medical question?")


if __name__ == "__main__":
    bot = PubMedChatbot()
    bot.chat()

Medical Research Chatbot (Powered by PubMed)
Type 'quit' to exit.

Please describe your medical question or topic of interest:
You: how to avoid cancer

Searching PubMed for relevant research...

Bot: Here are some recent medical research articles that might be relevant:

1. Epidemiology of Cancer.
   - Authors: Stephen M Schwartz
   - Journal: Clinical chemistry (2024)
   - Abstract: Cancers are a large and heterogeneous group of malignant tumors that collectively accounted for approximately 600 000 US deaths in 2020; only heart disease claimed more lives. A large amount of knowle...
   - PMID: 38175589
   - DOI: 10.1093/clinchem/hvad202

2. Colorectal Cancer in Inflammatory Bowel Disease: Mechanisms and Management.
   - Authors: Shailja C Shah, Steven H Itzkowitz
   - Journal: Gastroenterology (2022)
   - Abstract: Patients with inflammatory bowel disease (IBD) are at increased risk of developing colorectal cancer (CRC), despite decreases in CRC incidence in recent years. Chronic inf