# Plot the median acceptance delays per year for select journals

In [None]:
library(dplyr, warn=F)
library(ggplot2)

In [None]:
# Read history dates for all articles
delay_df = file.path('data', 'delays.tsv.gz') %>%
  readr::read_tsv(col_types = list(date = readr::col_date())) %>%
  dplyr::mutate(year = lubridate::year(date)) %>%
  dplyr::mutate(date_decimal = lubridate::decimal_date(date))

In [None]:
median_df = delay_df %>%
  dplyr::group_by(journal_nlm_id, delay_type, year) %>%
  dplyr::summarize(
    n_articles = n(),
    median_delay = median(delay)
  ) %>%
  dplyr::ungroup()

In [None]:
journal_df = file.path('data', 'pubmed-journals.tsv') %>%
  readr::read_tsv() %>%
  dplyr::transmute(journal_nlm_id = NlmId, journal_abbrev = MedAbbr)

In [None]:
journals = c('Nature'='#801819', 'Cell'='#00558B', 'PLoS One'='#F8AF2D')

gg = median_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(journal_abbrev %in% names(journals)) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(n_articles > 35) %>%
  ggplot2::ggplot(aes(x = year, y = median_delay, color = journal_abbrev)) +
    ggplot2::geom_line(size = 1.5) +
    ggplot2::scale_color_manual(name = NULL, values = journals) +
    ggplot2::expand_limits(y = 0) +
    ggplot2::theme_bw() +
    ggplot2::theme(plot.margin=grid::unit(c(2, 2, 2, 2), 'points')) +
    ggplot2::xlab('Year of acceptance') +
    ggplot2::ylab('Median acceptance delay (days)') +
    ggplot2::scale_x_continuous(breaks = seq(1950, 2016, 2)) +
    ggplot2::scale_y_continuous(breaks = seq(0, 1000, 25)) +
    ggplot2::theme(
      legend.justification = c(1, 0),
      legend.position = c(1, 0),
      legend.key.width = grid::unit(2, 'lines'),
      legend.key = ggplot2::element_blank(),
      legend.text = ggplot2::element_text(face = 'italic')
    )

file.path('viz', 'specific-journals.pdf') %>%
  ggplot2::ggsave(gg, width = 5.5, height = 4)