# Plot the median acceptance delays per year for select journals

In [1]:
library(dplyr, warn=F)
library(ggplot2)

In [2]:
# Read history dates for all articles
delay_df = file.path('data', 'delays.tsv.gz') %>%
  readr::read_tsv(col_types = list(date = readr::col_date())) %>%
  dplyr::mutate(year = lubridate::year(date)) %>%
  dplyr::mutate(date_decimal = lubridate::decimal_date(date))

In [6]:
median_df = delay_df %>%
  dplyr::group_by(journal_nlm_id, delay_type, year) %>%
  dplyr::summarize(
    n_articles = n(),
    median_delay = median(delay)
  ) %>%
  dplyr::ungroup()

In [11]:
journal_df = file.path('data', 'pubmed-journals.tsv') %>%
  readr::read_tsv() %>%
  dplyr::transmute(journal_nlm_id = NlmId, journal_abbrev = MedAbbr)

In [47]:
journals = c('Nature'='#801819', 'Cell'='#00558B', 'PLoS One'='#F8AF2D')

gg = median_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(journal_abbrev %in% names(journals)) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(n_articles > 35) %>%
  ggplot2::ggplot(aes(x = year, y = median_delay, color = journal_abbrev)) +
    ggplot2::geom_line(size = 1.5) +
    ggplot2::scale_color_manual(name = NULL, values = journals) +
    ggplot2::expand_limits(y = 0) +
    ggplot2::theme_bw() +
    ggplot2::theme(plot.margin=grid::unit(c(2, 2, 2, 2), 'points')) +
    ggplot2::xlab('Year of acceptance') +
    ggplot2::ylab('Median acceptance delay (days)') +
    ggplot2::scale_x_continuous(breaks = seq(1950, 2016, 2)) +
    ggplot2::scale_y_continuous(breaks = seq(0, 1000, 25)) +
    ggplot2::theme(
      legend.justification = c(1, 0),
      legend.position = c(1, 0),
      legend.key.width = grid::unit(2, 'lines'),
      legend.key = ggplot2::element_blank(),
      legend.text = ggplot2::element_text(face = 'italic')
    )

file.path('viz', 'specific-journals.pdf') %>%
  ggplot2::ggsave(gg, width = 5.5, height = 4)

Joining by: "journal_nlm_id"


# Identify the cause of the 2000 drop in acceptance delays

#### [Acta Crystallogr C (crysal structure communications)](https://raw.githubusercontent.com/dhimmel/delays/881cf6ce2207c1d62b79219206bc28660f83f57c/viz/journal/accept/8305826.png) starts with a blob of short acceptance times
+ http://doi.org/10.1107/s0108270100005576
+ http://doi.org/10.1107/s0108270100002791
+ http://doi.org/10.1107/s0108270100003292


#### [AJHG](https://raw.githubusercontent.com/dhimmel/delays/881cf6ce2207c1d62b79219206bc28660f83f57c/viz/journal/accept/0370475.png) starts with quick acceptance times

#### [Environ Pollut](https://raw.githubusercontent.com/dhimmel/delays/881cf6ce2207c1d62b79219206bc28660f83f57c/viz/journal/accept/8804476.png), generally slow, takes a break from depositing

#### [Astrophysical journal](https://raw.githubusercontent.com/dhimmel/delays/881cf6ce2207c1d62b79219206bc28660f83f57c/viz/journal/accept/9890633.png), which is quick, only deposited in 2000.

In [55]:
# Median acceptance delays by journal in 1999 and 2000
median_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(year == 1999 | year == 2000) %>%
  dplyr::arrange(desc(n_articles))
  #dplyr::select(-n_articles) %>%
  #tidyr::spread(year, median_delay) %>% na.omit

Joining by: "journal_nlm_id"


Unnamed: 0,journal_nlm_id,delay_type,year,n_articles,median_delay,journal_abbrev
1,8305826,Acceptance,2000,618,52.5,Acta Crystallogr C
2,2984816R,Acceptance,2000,320,122.0,Talanta
3,0370475,Acceptance,2000,310,57.0,Am J Hum Genet
4,2984816R,Acceptance,1999,306,121.0,Talanta
5,9305878,Acceptance,2000,250,92.0,Acta Crystallogr D Biol Crystallogr
6,100888800,Acceptance,1999,249,71.0,J Biosci Bioeng
7,8804476,Acceptance,1999,245,164.0,Environ Pollut
8,100888800,Acceptance,2000,243,72.0,J Biosci Bioeng
9,8806653,Acceptance,2000,222,97.0,Surg Endosc
10,101160862,Acceptance,1999,168,126.5,Meat Sci


In [57]:
# Short acceptance delays for Acta C in2000
delay_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(journal_nlm_id == '8305826') %>%
  dplyr::filter(year == 2000) %>%
  dplyr::filter(delay < 20)

Joining by: "journal_nlm_id"


Unnamed: 0,journal_nlm_id,pubmed_id,delay_type,date,delay,year,date_decimal,journal_abbrev
1,8305826,10851629,Acceptance,2000-02-02,15,2000,2000.087,Acta Crystallogr C
2,8305826,10851631,Acceptance,2000-02-10,14,2000,2000.109,Acta Crystallogr C
3,8305826,10851636,Acceptance,2000-02-03,13,2000,2000.09,Acta Crystallogr C
4,8305826,10851637,Acceptance,2000-02-18,18,2000,2000.131,Acta Crystallogr C
5,8305826,10851638,Acceptance,2000-02-18,17,2000,2000.131,Acta Crystallogr C
6,8305826,10851639,Acceptance,2000-03-02,6,2000,2000.167,Acta Crystallogr C
7,8305826,10902008,Acceptance,2000-03-07,19,2000,2000.18,Acta Crystallogr C
8,8305826,10902009,Acceptance,2000-03-10,18,2000,2000.189,Acta Crystallogr C
9,8305826,10902010,Acceptance,2000-03-10,14,2000,2000.189,Acta Crystallogr C
10,8305826,10902022,Acceptance,2000-02-29,19,2000,2000.161,Acta Crystallogr C


In [60]:
# Acceptance delays between 30 and 50 days in 2000
delay_df %>%
  dplyr::inner_join(journal_df) %>%
  dplyr::filter(delay_type == 'Acceptance') %>%
  dplyr::filter(year == 2000) %>%
  dplyr::filter(delay > 30, delay < 50) %>%
  dplyr::arrange(delay)

Joining by: "journal_nlm_id"


Unnamed: 0,journal_nlm_id,pubmed_id,delay_type,date,delay,year,date_decimal,journal_abbrev
1,0370475,11083946,Acceptance,2000-10-27,31,2000,2000.82,Am J Hum Genet
2,0410462,11206549,Acceptance,2000-11-20,31,2000,2000.885,Nature
3,100886622,11326317,Acceptance,2000-11-23,31,2000,2000.893,Neoplasia
4,100886622,11420748,Acceptance,2000-12-15,31,2000,2000.954,Neoplasia
5,100913255,11178118,Acceptance,2000-12-15,31,2000,2000.954,Arthritis Res
6,100963049,11256630,Acceptance,2000-04-27,31,2000,2000.32,EMBO Rep
7,100965523,11920212,Acceptance,2000-05-05,31,2000,2000.342,Hematol J
8,100968052,11714430,Acceptance,2000-11-10,31,2000,2000.858,Curr Control Trials Cardiovasc Med
9,101090633,11667982,Acceptance,2000-11-10,31,2000,2000.858,Respir Res
10,7808448,11400051,Acceptance,2000-10-02,31,2000,2000.751,Curr Microbiol
