In [None]:
library(rentrez)
library(XML)

# Function to retrieve papers via PubMed API from multiple journals with exclusive date ranges
get_recent_papers <- function(journal_names, years_back = NULL, months_back = NULL, weeks_back = NULL, start_date = NULL, end_date = NULL) {
  
  # Ensure only one of the time-back arguments is provided, unless start_date and end_date are used
  if (!is.null(start_date) || !is.null(end_date)) {
    if (!is.null(years_back) || !is.null(months_back) || !is.null(weeks_back)) {
      stop("Error: Specify either 'start_date' and 'end_date' or one of 'years_back', 'months_back', or 'weeks_back', but not both.")
    }
  } else {
    if (sum(!is.null(c(years_back, months_back, weeks_back))) != 1) {
      stop("Error: You must specify exactly one of 'years_back', 'months_back', or 'weeks_back' if 'start_date' and 'end_date' are not provided.")
    }
  }
  
  # Calculate the start_date and end_date if not provided
  if (is.null(start_date) || is.null(end_date)) {
    end_date <- Sys.Date()
    
    if (!is.null(years_back)) {
      start_date <- as.Date(end_date) - years_back * 365
    } else if (!is.null(months_back)) {
      start_date <- as.Date(end_date) - months_back * 30
    } else if (!is.null(weeks_back)) {
      start_date <- as.Date(end_date) - weeks_back * 7
    }
    
    # Format dates
    start_date <- format(start_date, "%Y/%m/%d")
    end_date <- format(end_date, "%Y/%m/%d")
  } else {
    # Validate date formats
    start_date <- as.Date(start_date)
    end_date <- as.Date(end_date)
    if (is.na(start_date) || is.na(end_date)) {
      stop("Error: 'start_date' and 'end_date' must be valid dates in 'YYYY-MM-DD' format.")
    }
    
    # Convert to PubMed date format
    start_date <- format(start_date, "%Y/%m/%d")
    end_date <- format(end_date, "%Y/%m/%d")
  }
  
  # Initialize an empty list to store results for each journal
  all_papers <- list()

  # Loop over each journal and retrieve papers
  for (journal_name in journal_names) {
    # Create the search term for the journal and date range
    search_term <- paste0(journal_name, "[Journal] AND (", start_date, "[Date - Publication] : ", end_date, "[Date - Publication])")
    
    # Search PubMed for the relevant articles
    search_results <- rentrez::entrez_search(db = "pubmed", term = search_term, retmax = 100)
    
    # Check if any papers were found
    if (length(search_results$ids) == 0) {
      print(paste("No papers found for", journal_name))
      next
    }
    
    paper_ids <- search_results$ids
    
    # Function to fetch the abstract for each paper
    fetch_abstract <- function(paper_id) {
      # Fetch the detailed paper data
      paper_xml <- rentrez::entrez_fetch(db = "pubmed", id = paper_id, rettype = "xml", parsed = TRUE)
      # Extract the abstract from the XML
      abstract <- XML::xpathSApply(paper_xml, "//AbstractText", xmlValue)
      # Return the abstract (collapsed into one string if there are multiple sections)
      if (length(abstract) == 0) {
        return("No abstract available")
      } else {
        return(paste(abstract, collapse = " "))
      }
    }
    
    # Extract relevant fields: title, abstract, and DOI
    paper_data <- lapply(paper_ids, function(paper_id) {
      # Fetch summary for title and DOI
      paper_summary <- rentrez::entrez_summary(db = "pubmed", id = paper_id)
      
      fulljournalname <- paper_summary$fulljournalname
      title <- paper_summary$title
      
      # Use elocationid to extract the DOI, cleaning it up if necessary
      doi <- ifelse(is.null(paper_summary$elocationid), "No DOI available", gsub("doi: ", "https://doi.org/", paper_summary$elocationid))
      
      # Fetch the abstract
      abstract <- fetch_abstract(paper_id)
      
      return(list(fulljournalname = fulljournalname, title = title, abstract = abstract, doi = doi))
    })
    
    # Convert the list to a tibble for this journal
    journal_papers <- tibble::tibble(
      subject = sapply(paper_data, function(x) x$fulljournalname),
      title = sapply(paper_data, function(x) x$title),
      url = sapply(paper_data, function(x) x$doi),
      abstract = sapply(paper_data, function(x) x$abstract)
    )
    
    # Append this journal's results to the master list
    all_papers[[journal_name]] <- journal_papers
  }

  # Combine all journal results into a single tibble
  all_papers_tibble <- dplyr::bind_rows(all_papers)
  
  # Add attributes for start and end dates
  attr(all_papers_tibble, "start_date") <- start_date
  attr(all_papers_tibble, "end_date") <- end_date
  
  # Same output as the biorecap::get_preprints output
  class(all_papers_tibble) <- c("preprints", class(all_papers_tibble))
                 
  return(all_papers_tibble)
}


In [None]:
# Retrieve papers from multiple journals within the specified amount of time
# recent_papers <- get_recent_papers(c("Cell", "Nature" , "Science"), months_back = 1)
recent_papers <- get_recent_papers(c("Nature Biomedical Engineering", "Nature Immunology" , "Science Immunology"), start_date='2024/12/01', end_date='2024/12/31')

In [None]:
model_llm <- "llama3.2"

library(biorecap)

paper_summaries <- recent_papers |> 
                   add_prompt() |> 
                   add_summary(model=model_llm)
paper_summaries[paper_summaries$abstract == 'No abstract available', 'summary'] = 'No abstract available'

# Add a new attribute
attr(paper_summaries, 'model') <- model_llm

paper_summaries

In [None]:
skeleton <- 'templates/skeleton.Rmd'
output_dir <- normalizePath(".")
output_file <- paste0("pubmed-report-", format(Sys.time(), "%Y-%m-%d-%H%M%S"), ".html")
output_csv <- file.path(output_dir, sub("\\.html$", ".csv", output_file))
if (tools::file_ext(output_file) != "html") stop("Output file must have an .html extension.") #nocov
rmarkdown::render(input=skeleton,
                  output_file=output_file,
                  output_dir=output_dir,
                  params=list(paper_summaries=paper_summaries,
                              output_csv=output_csv)
                 )