# Map journalmetrics to NLM IDs for PubMed compatibility

In [1]:
library(dplyr, warn=F)
library(ggplot2)

In [2]:
# Read journalmetrics data
col_types = list(scopus_id = readr::col_character())
metric_df = file.path('data', 'metrics.tsv.gz') %>%
  readr::read_tsv(col_types = col_types)

col_types = list(scopus_id = readr::col_character(), issn = readr::col_character())
issn_df = file.path('data', 'issn.tsv') %>%
  readr::read_tsv(col_types = col_types)

metric_df = issn_df %>%
  dplyr::inner_join(metric_df)

head(metric_df, 2)

Joining by: "scopus_id"


Unnamed: 0,scopus_id,issn_type,issn,year,metric,value
1,18500162600,print,15343219,2004,IPP,0.0
2,18500162600,print,15343219,2004,SJR,0.102


In [3]:
# Read PubMed Journals
journal_df = 'https://raw.githubusercontent.com/dhimmel/delays/756ffebf309499a500ec1f83d68803c044ec8729/data/pubmed-journals.tsv' %>%
  readr::read_tsv()

metric_df = journal_df %>%
  tidyr::gather(key = 'issn_type', value = 'issn', `ISSN (Print)`, `ISSN (Online)`) %>%
  dplyr::distinct() %>%
  dplyr::transmute(journal_nlm_id = NlmId, issn) %>%
  dplyr::mutate(issn = sub('-', '', issn)) %>%
  dplyr::inner_join(metric_df) %>%
  dplyr::group_by(journal_nlm_id, year, metric) %>%
  dplyr::summarize(
    value = mean(value)
  ) %>% dplyr::ungroup()

head(metric_df, 2)

Joining by: "issn"


Unnamed: 0,journal_nlm_id,year,metric,value
1,266,1999,IPP,3.578
2,266,1999,SJR,2.389


In [4]:
# Save pubmed metrics to a gzipped TSV
path = file.path('data', 'pubmed-metrics.tsv')
metric_df %>%
  readr::write_tsv(path)
system2('gzip', c('--force', path))