<a href="https://colab.research.google.com/github/ankitojha07/assignment-python/blob/main/PubMed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pandas



In [None]:
pip install requests



In [None]:
pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.2


In [None]:
import requests
import csv
import re
from typing import List, Dict, Optional

def fetch_pubmed_papers(query: str) -> List[Dict[str, str]]:
    """
    Fetch research papers from PubMed API based on a user-specified query.
    """
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": query,
        "retmode": "json",
        "retmax": 10,  # Limit results for testing
    }
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    data = response.json()
    paper_ids = data.get("esearchresult", {}).get("idlist", [])
    return get_paper_details(paper_ids)

def get_paper_details(paper_ids: List[str]) -> List[Dict[str, str]]:
    """
    Retrieve paper details including title, authors, and affiliations.
    """
    if not paper_ids:
        return []

    details_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    params = {
        "db": "pubmed",
        "id": ",".join(paper_ids),
        "retmode": "json",
    }
    response = requests.get(details_url, params=params)
    response.raise_for_status()
    data = response.json()

    results = []
    for paper_id in paper_ids:
        paper_data = data.get("result", {}).get(paper_id, {})
        title = paper_data.get("title", "Unknown")
        pub_date = paper_data.get("pubdate", "Unknown")
        authors = extract_authors(paper_data.get("authors", []))

        results.append({
            "PubmedID": paper_id,
            "Title": title,
            "Publication Date": pub_date,
            "Non-academic Author(s)": authors["non_academic"],
            "Company Affiliation(s)": authors["companies"],
            "Corresponding Author Email": authors["email"]
        })

    return results

def extract_authors(authors_list: List[Dict[str, str]]) -> Dict[str, Optional[str]]:
    """
    Identify non-academic authors and extract company affiliations.
    """
    non_academic_authors = []
    company_affiliations = []
    corresponding_email = None

    for author in authors_list:
        name = author.get("name", "Unknown")
        affiliation = author.get("affiliation", "")
        email = author.get("email", "")

        if affiliation and not re.search(r"university|college|institute", affiliation, re.I):
            non_academic_authors.append(name)
            company_affiliations.append(affiliation)

        if email and not corresponding_email:
            corresponding_email = email

    return {
        "non_academic": ", ".join(non_academic_authors) if non_academic_authors else "None",
        "companies": ", ".join(company_affiliations) if company_affiliations else "None",
        "email": corresponding_email or "Unknown"
    }

def save_to_csv(papers: List[Dict[str, str]], filename: str):
    """Save the fetched papers to a CSV file."""
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=papers[0].keys())
        writer.writeheader()
        writer.writerows(papers)

def run_colab(query: str, filename: str = "pubmed_results.csv"):
    """Run the script in Google Colab."""
    papers = fetch_pubmed_papers(query)
    save_to_csv(papers, filename)
    print(f"Results saved to {filename}")
    return papers

papers = run_colab("cancer research")
print(papers)

Results saved to pubmed_results.csv
[{'PubmedID': '39955607', 'Title': 'Tumour-derived exosomal miR-205 promotes ovarian cancer cell progression through M2 macrophage polarization via the PI3K/Akt/mTOR pathway.', 'Publication Date': '2025 Feb 15', 'Non-academic Author(s)': 'None', 'Company Affiliation(s)': 'None', 'Corresponding Author Email': 'Unknown'}, {'PubmedID': '39955585', 'Title': 'Mcl-1 downregulation enhances BCG treatment efficacy in bladder cancer by promoting macrophage polarization.', 'Publication Date': '2025 Feb 15', 'Non-academic Author(s)': 'None', 'Company Affiliation(s)': 'None', 'Corresponding Author Email': 'Unknown'}, {'PubmedID': '39955575', 'Title': 'Computational development of mushroom-6-glucan/paclitaxel as a synergistic complementary medicine for breast cancer therapy.', 'Publication Date': '2025 Feb 15', 'Non-academic Author(s)': 'None', 'Company Affiliation(s)': 'None', 'Corresponding Author Email': 'Unknown'}, {'PubmedID': '39955573', 'Title': 'Mechanist