In [None]:
!pip install xmltodict
!pip install requests




In [None]:

import requests
import xml
import xmltodict

class PubMedFetcher:
    BASE_SEARCH_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    BASE_SUMMARY_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"


    #initialize the fetcher which reqs email for api compilance
    def __init__(self, email: str):

        self.email = email

    def fetch_papers(self, query: str, max_results: int = 5):

        params = {
            "db": "pubmed",                  # db: Specifies the database to search (pubmed here).
            "term": query,                   # term: The search query.
            "retmax": max_results,           # retmax: Maximum number of results to retrieve.
            "retmode": "xml",                # retmode: Specifies that results should be returned in xml format.
            "email": self.email,             # email: User's email address for API compliance
        }

        response = requests.get(self.BASE_SEARCH_URL, params=params)

        if response.status_code != 200: #should be 200
            raise Exception(f"Failed to fetch data: {response.status_code}") #429

        data = xmltodict.parse(response.text)
        pubmed_ids = data["eSearchResult"].get("IdList", {}).get("Id", [])

        return pubmed_ids


    def fetch_paper_details(self, pubmed_id: str):
        params = {
            "db": "pubmed", # ##fetch details for a specific paper using pubmed_id
            "id": pubmed_id,
            "retmode": "xml", #specifies that result should be in xml format
            "email": self.email,
        }
        response = requests.get(self.BASE_SUMMARY_URL, params=params)

        if response.status_code != 200:
            raise Exception(f"Failed to fetch data: {response.status_code}") #429

        data = xmltodict.parse(response.text)
         #docsum is a structured part of the XML response returned by the esummary API endpoint(document summary)
        docsum = data["eSummaryResult"].get("DocSum", {})
        if not docsum:
            return {"PubMedID": pubmed_id, "Title": "N/A", "Publication_Date": "N/A"}

        items = docsum.get("Item", [])
        if not isinstance(items, list):  # If there's only one item, wrap it in a list

            items = [items]
###"isinstance" is a built-in Python function that checks if an object belongs to a specific class or a tuple of classes.

        title = "N/A"
        pub_date = "N/A"

        for item in items:
            if item.get("@Name") == "Title":
                title = item.get("#text", "N/A")
            elif item.get("@Name") == "PubDate":
                pub_date = item.get("#text", "N/A")

        return {"PubmedID": pubmed_id, "Title": title, "Publication Date": pub_date}

import os
import sys
import csv

def main():
    # Initialize the PubMedFetcher with your email
    fetcher = PubMedFetcher(email="armaan0sharma7@examole.com")

    # Use a sample query to fetch papers (you can change the query)
    query = "cancer AND vaccine" #any biotech topic
    max_results = 5


    try:
        # fetch papers based on the query
        pubmed_ids = fetcher.fetch_papers(query=query, max_results=max_results)

        # print the results
        print(f"Fetched {len(pubmed_ids)} PubMed IDs:")
        for pubmed_id in pubmed_ids:
            print(pubmed_id)

        ##fetch details for each paper
        paper_details = []
        for pubmed_id in pubmed_ids:
            print(f"Fetching details for PubMed ID: {pubmed_id}...")
            try:
                details = fetcher.fetch_paper_details(pubmed_id=pubmed_id)
                paper_details.append(details)
            except Exception as e:
                print(f"Error fetching details for PubMed ID {pubmed_id}: {e}")

        # Save the paper details to a CSV file
        csv_filename = "pubmed_papers.csv"
        with open(csv_filename, mode="w", newline="", encoding="utf-8") as csv_file:#"utf-8": Ensures proper handling of special characters@#$
            writer = csv.DictWriter(csv_file, fieldnames=["PubmedID", "Title", "Publication Date"])# A writer object that works with dictionaries
            writer.writeheader()
            writer.writerows(paper_details)

        print(f"Paper details saved to {csv_filename}")


    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()



Fetched 5 PubMed IDs:
39781294
39780958
39780583
39780444
39780285
Fetching details for PubMed ID: 39781294...
Fetching details for PubMed ID: 39780958...
Fetching details for PubMed ID: 39780583...
Error fetching details for PubMed ID 39780583: Failed to fetch data: 429
Fetching details for PubMed ID: 39780444...
Error fetching details for PubMed ID 39780444: Failed to fetch data: 429
Fetching details for PubMed ID: 39780285...
Error fetching details for PubMed ID 39780285: Failed to fetch data: 429
Paper details saved to pubmed_papers.csv


In [None]:
from google.colab import files
files.download('pubmed_papers.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>