# Plot the median acceptance delays per year for select journals

In [1]:
library(dplyr, warn=F)
library(ggplot2)

In [2]:
# Read history dates for all articles
delay_df = file.path('data', 'delays.tsv.gz') %>%
  readr::read_tsv(col_types = list(date = readr::col_date())) %>%
  dplyr::mutate(year = lubridate::year(date)) %>%
  dplyr::mutate(date_decimal = lubridate::decimal_date(date))

In [4]:
median_df = delay_df %>%
  dplyr::group_by(journal_nlm_id, delay_type, year) %>%
  dplyr::summarize(
    n_articles = n(),
    median_delay = median(delay)
  ) %>%
  dplyr::ungroup()

In [3]:
journal_df = file.path('data', 'pubmed-journals.tsv') %>%
  readr::read_tsv() %>%
  dplyr::transmute(journal_nlm_id = NlmId, journal_abbrev = MedAbbr)

In [None]:
journals = c('Nature'='#801819', 'Cell'='#00558B', 'PLoS One'='#F8AF2D')

gg = median_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(journal_abbrev %in% names(journals)) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(n_articles > 35) %>%
  ggplot2::ggplot(aes(x = year, y = median_delay, color = journal_abbrev)) +
    ggplot2::geom_line(size = 1.5) +
    ggplot2::scale_color_manual(name = NULL, values = journals) +
    ggplot2::expand_limits(y = 0) +
    ggplot2::theme_bw() +
    ggplot2::theme(plot.margin=grid::unit(c(2, 2, 2, 2), 'points')) +
    ggplot2::xlab('Year of acceptance') +
    ggplot2::ylab('Median acceptance delay (days)') +
    ggplot2::scale_x_continuous(breaks = seq(1950, 2016, 2)) +
    ggplot2::scale_y_continuous(breaks = seq(0, 1000, 25)) +
    ggplot2::theme(
      legend.justification = c(1, 0),
      legend.position = c(1, 0),
      legend.key.width = grid::unit(2, 'lines'),
      legend.key = ggplot2::element_blank(),
      legend.text = ggplot2::element_text(face = 'italic')
    )

file.path('viz', 'specific-journals.pdf') %>%
  ggplot2::ggsave(gg, width = 5.5, height = 4)

## Diagnose difference between dhimmel/plostime and dhimmel/delays medians

In [38]:
# Articles accepted by Nature in 2015 with acceptance delay
accept_2015_pmids = delay_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(journal_abbrev == 'Nature') %>%
  dplyr::filter(year == 2015, delay_type == 'Acceptance') %>%
  .[['pubmed_id']]

length(accept_2015_pmids)

Joining by: "journal_nlm_id"


In [28]:
# Articles in Nature with publication delay
all_publish_pmids = delay_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(journal_abbrev == 'Nature') %>%
  dplyr::filter(delay_type == 'Publication') %>%
  .[['pubmed_id']]

Joining by: "journal_nlm_id"


In [40]:
# Articles accepted by Nature in 2015 with acceptance delay without a publication delay
accept_only_df = delay_df %>%
  dplyr::filter(pubmed_id %in% setdiff(accept_2015_pmids, all_publish_pmids))
nrow(accept_only_df)

In [35]:
# Median of these articles
median(accept_only_df$delay)

In [36]:
accept_only_df

Unnamed: 0,journal_nlm_id,pubmed_id,delay_type,date,delay,year,date_decimal
1,410462,25693563,Acceptance,2015-01-21,286,2015,2015.055
2,410462,25693564,Acceptance,2015-01-12,413,2015,2015.03
3,410462,25693565,Acceptance,2015-01-14,405,2015,2015.036
4,410462,25693566,Acceptance,2015-01-07,408,2015,2015.016
5,410462,25693567,Acceptance,2015-01-07,397,2015,2015.016
6,410462,25693568,Acceptance,2015-01-22,380,2015,2015.058
7,410462,25693571,Acceptance,2015-01-15,169,2015,2015.038
8,410462,25719667,Acceptance,2015-01-15,212,2015,2015.038
9,410462,25719668,Acceptance,2015-01-15,107,2015,2015.038
10,410462,25719670,Acceptance,2015-01-16,190,2015,2015.041
