# Map journalmetrics to NLM IDs for PubMed compatibility

In [1]:
library(dplyr, warn=F)
library(ggplot2)

In [2]:
# Read scopus-ISSN mapping
col_types = list(scopus_id = readr::col_character(), issn = readr::col_character())
issn_df = file.path('data', 'issn.tsv') %>%
  readr::read_tsv(col_types = col_types)

# Read PubMed Journals
journal_df = 'https://raw.githubusercontent.com/dhimmel/delays/756ffebf309499a500ec1f83d68803c044ec8729/data/pubmed-journals.tsv' %>%
  readr::read_tsv()

head(issn_df, 2)

Unnamed: 0,scopus_id,issn_type,issn
1,18500162600,print,15343219
2,19700200922,print,1285157


In [3]:
# Create a pubmed-scopus mapping
pubmed_map_df = journal_df %>%
  tidyr::gather(key = 'issn_type', value = 'issn', `ISSN (Print)`, `ISSN (Online)`) %>%
  dplyr::distinct() %>%
  dplyr::transmute(journal_nlm_id = NlmId, issn) %>%
  dplyr::mutate(issn = sub('-', '', issn)) %>%
  dplyr::inner_join(issn_df) %>%
  dplyr::select(journal_nlm_id, scopus_id) %>%
  dplyr::distinct()

pubmed_map_df %>%
  readr::write_tsv(file.path('data', 'pubmed-map.tsv'))

head(pubmed_map_df, 2)

Joining by: "issn"


Unnamed: 0,journal_nlm_id,scopus_id
1,431420,26729
2,1251052,70264


In [4]:
# Read journalmetrics data
col_types = list(scopus_id = readr::col_character())
metric_df = file.path('data', 'metrics.tsv.gz') %>%
  readr::read_tsv(col_types = col_types)

In [5]:
# Transform metrics to pubmed
metric_df = pubmed_map_df %>%
  dplyr::inner_join(metric_df) %>%
  dplyr::select(-scopus_id) %>%
  dplyr::group_by(journal_nlm_id, year, metric) %>%
  dplyr::summarize(
    value = mean(value)
  ) %>% dplyr::ungroup()

head(metric_df, 2)

Joining by: "scopus_id"


Unnamed: 0,journal_nlm_id,year,metric,value
1,266,1999,IPP,3.578
2,266,1999,SJR,2.389


In [6]:
# Save pubmed metrics to a gzipped TSV
path = file.path('data', 'pubmed-metrics.tsv')
metric_df %>%
  readr::write_tsv(path)
system2('gzip', c('--force', path))