<a href="https://colab.research.google.com/github/harshaguntuku/fetches-research-papers/blob/main/fetches_research_papers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import requests
import csv
import re
from typing import List, Dict, Any, Optional

def fetch_pubmed_papers(query: str) -> List[Dict[str, Any]]:
    """
    Fetch research papers from PubMed based on a query.
    """
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        "db": "pubmed",
        "term": query,
        "retmode": "json",
        "retmax": 10  # Adjust the number of results as needed
    }
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    data = response.json()
    pmids = data.get("esearchresult", {}).get("idlist", [])
    return fetch_paper_details(pmids)

def fetch_paper_details(pmids: List[str]) -> List[Dict[str, Any]]:
    """
    Fetch detailed information for a list of PubMed IDs.
    """
    if not pmids:
        return []

    details_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    params = {
        "db": "pubmed",
        "id": ",".join(pmids),
        "retmode": "json"
    }
    response = requests.get(details_url, params=params)
    response.raise_for_status()
    details_data = response.json()
    return parse_paper_details(details_data)

def parse_paper_details(data: Dict[str, Any]) -> List[Dict[str, Any]]:
    """
    Extract relevant paper details from PubMed API response.
    """
    papers = []
    for pmid, details in data.get("result", {}).items():
        if pmid == "uids":
            continue

        title = details.get("title", "N/A")
        pub_date = details.get("pubdate", "N/A")
        authors = details.get("authors", [])

        non_academic_authors, company_affiliations, corresponding_email = identify_non_academic_authors(authors)

        papers.append({
            "PubmedID": pmid,
            "Title": title,
            "Publication Date": pub_date,
            "Non-academic Author(s)": ", ".join(non_academic_authors),
            "Company Affiliation(s)": ", ".join(company_affiliations),
            "Corresponding Author Email": corresponding_email
        })
    return papers

def save_to_csv(papers: List[Dict[str, Any]], filename: str):
    """
    Save fetched papers to a CSV file.
    """
    if not papers:
        print("No papers found.")
        return

    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=papers[0].keys())
        writer.writeheader()
        writer.writerows(papers)
    print(f"Results saved to {filename}")

def main():
  # Manually specify the search query
  query = "cancer treatment biotech"

# Fetch research papers
  papers = fetch_pubmed_papers(query)

# Save to CSV in Colab's filesystem
  csv_filename = "pubmed_results.csv"
  save_to_csv(papers, csv_filename)

# Download the CSV file
  from google.colab import files
  files.download(csv_filename)
main()

Results saved to pubmed_results.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>