# Investigating associations between journal prestige and delays

In [1]:
library(dplyr, warn=F)
library(ggplot2)

In [2]:
journal_df = 'data/pubmed-journals.tsv' %>%
  readr::read_tsv() %>% 
  dplyr::transmute(journal_nlm_id = NlmId, journal_abbrev = MedAbbr)

In [3]:
# Read 2014 SJR values
metric_df = 'https://github.com/dhimmel/journalmetrics/raw/fb6797b680ff6d119ef21de8791c0573d44cf6c0/data/pubmed-metrics.tsv.gz' %>%
  readr::read_tsv() %>%
  tidyr::spread(key=metric, value=value) %>%
  dplyr::filter(year == 2014) %>%
  dplyr::select(-year)

metric_df = journal_df %>%
  dplyr::inner_join(metric_df)

head(metric_df, 2)

Joining by: "journal_nlm_id"


Unnamed: 0,journal_nlm_id,journal_abbrev,IPP,SJR,SNIP
1,0431420,AANA J,0.719,0.247,0.6
2,14510400R,Acarologia,0.806,0.433,0.944


In [4]:
# Read journal delay slopes
slope_df = 'data/slopes.tsv' %>%
  readr::read_tsv() %>%
  dplyr::select(journal_nlm_id, delay_type, articles, slope)

metric_df = metric_df %>%
  dplyr::inner_join(slope_df)

head(metric_df, 2)

Joining by: "journal_nlm_id"


Unnamed: 0,journal_nlm_id,journal_abbrev,IPP,SJR,SNIP,delay_type,articles,slope
1,370270,Acta Anaesthesiol Scand,2.506,1.065,1.44,publish,545,-4.45365
2,370270,Acta Anaesthesiol Scand,2.506,1.065,1.44,accept,140,170.5078


In [5]:
# Read median journal delays
delay_df = 'data/journal-summaries.tsv' %>%
  readr::read_tsv() %>%
  dplyr::select(journal_nlm_id, median_acceptance_delay, median_publication_delay) %>%
  tidyr::gather(key=delay_type, value=delay, median_acceptance_delay, median_publication_delay) %>%
  dplyr::mutate(delay_type = c(median_acceptance_delay='accept', median_publication_delay='publish')[delay_type])

metric_df = metric_df %>%
  dplyr::inner_join(delay_df)

head(metric_df, 2)

Joining by: c("journal_nlm_id", "delay_type")


Unnamed: 0,journal_nlm_id,journal_abbrev,IPP,SJR,SNIP,delay_type,articles,slope,delay
1,370270,Acta Anaesthesiol Scand,2.506,1.065,1.44,publish,545,-4.45365,42.0
2,370270,Acta Anaesthesiol Scand,2.506,1.065,1.44,accept,140,170.5078,56.5


In [6]:
metric_df %>%
  readr::write_tsv(file.path('data', 'journal-metrics.tsv'), na='')

In [7]:
# Show journal counts with SJR and slope
metric_df %>%
  dplyr::filter(! is.na(SJR)) %>%
  dplyr::group_by(delay_type) %>%
  dplyr::summarize(journals = n())

Unnamed: 0,delay_type,journals
1,accept,2577
2,publish,2319


In [10]:
mean_CI <- function(x) {t.test(x)$conf.int[1:2]}

n_quantiles = 20

format_gg <- function(gg) {
  gg = gg + 
  ggplot2::facet_grid(. ~ delay_type, scales='free_x') +
  ggplot2::stat_summary(fun.data = 'mean_cl_normal', geom = 'errorbar', width = 0.0, color = '#80a5f9') +
  ggplot2::theme_bw() +
  ggplot2::theme(plot.margin=grid::unit(c(2, 2, 2, 2), 'points')) +
  ggplot2::theme(axis.ticks.x = element_blank(), axis.text.x = element_blank()) +
  ggplot2::theme(strip.background = element_rect(fill = '#fef2e2')) +
  ggplot2::scale_x_discrete(name=NULL)
  return(gg)
}

delay_converter = c(accept='Acceptance', publish='Publication')

gg_delay = metric_df %>%
  dplyr::mutate(delay_type = delay_converter[delay_type]) %>%
  dplyr::filter(! is.na(SJR)) %>%
  dplyr::mutate(sjr_quantile = dplyr::ntile(SJR, n = n_quantiles)) %>%
  ggplot2::ggplot(aes(x = sjr_quantile, y = delay)) %>%
  format_gg() +
  ggplot2::ylab('Median delay')

gg_slope = metric_df %>%
  dplyr::mutate(delay_type = delay_converter[delay_type]) %>%
  dplyr::filter(! is.na(SJR)) %>%
  dplyr::mutate(sjr_quantile = dplyr::ntile(SJR, n = n_quantiles)) %>%
  ggplot2::ggplot(aes(x = sjr_quantile, y = slope)) +
  ggplot2::geom_hline(yintercept = 0, linetype = 'dashed')
gg_slope = format_gg(gg_slope) +
  ggplot2::ylab('Δ days per year')

gg = gridExtra::arrangeGrob(gg_delay, gg_slope, nrow=1, bottom = 'SJR quantile (journal prestige in 2014, low to high)')
path = file.path('viz', 'journal-SJR.png')
ggplot2::ggsave(filename = path, plot = gg, width = 5.5, height = 2.0)

In [9]:
plot_df = metric_df %>%
  dplyr::mutate(delay_type = delay_converter[delay_type]) %>%
  tidyr::gather(key='metric', value='value', IPP, SJR, SNIP) %>%
  dplyr::filter(! is.na(value)) %>%
  dplyr::group_by(delay_type, metric) %>%
  dplyr::mutate(metric_quantile = dplyr::ntile(value, n = n_quantiles)) %>%
  dplyr::ungroup()

gg_delay = plot_df %>%
  ggplot2::ggplot(aes(x = metric_quantile, y = delay)) %>%
  format_gg() +
  ggplot2::facet_grid(metric ~ delay_type) +
  ggplot2::ylab('Median delay')

gg_slope = plot_df %>%
  ggplot2::ggplot(aes(x = metric_quantile, y = slope)) +
  ggplot2::geom_hline(yintercept = 0, linetype = 'dashed')
gg_slope = format_gg(gg_slope) +
  ggplot2::facet_grid(metric ~ delay_type) +
  ggplot2::ylab('Δ days per year')

gg = gridExtra::arrangeGrob(gg_delay, gg_slope, nrow=1, bottom = 'Prestige quantile (2014 journal metrics, low to high)')
path = file.path('viz', 'journal-metrics.png')
ggplot2::ggsave(filename = path, plot = gg, width = 5.5, height = 3.5)