# Map journalmetrics to NLM IDs for PubMed compatibility

In [1]:
# Read scopus-ISSN mapping
col_types = list(scopus_id = readr::col_character(), issn = readr::col_character())
issn_df = file.path('data', 'issn.tsv') |>
  readr::read_tsv(col_types = col_types)

head(issn_df, 2)

scopus_id,issn_type,issn
<chr>,<chr>,<chr>
12000,print,1527-6228
12001,print,0022-5002


In [2]:
# Read PubMed Journals
journal_df = 'https://raw.githubusercontent.com/dhimmel/delays/83577d4bb774bb90533d2cfe0db7032b70fdbbc1/data/pubmed-journals.tsv' |>
  readr::read_tsv()
head(journal_df, 2)

[33m![39m [34m[34mcurl[34m[39m package not installed, falling back to using [30m[47m[30m[47m`url()`[47m[30m[49m[39m


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  JrId = [32mcol_double()[39m,
  JournalTitle = [31mcol_character()[39m,
  NlmId = [31mcol_character()[39m,
  MedAbbr = [31mcol_character()[39m,
  IsoAbbr = [31mcol_character()[39m,
  `ISSN (Print)` = [31mcol_character()[39m,
  `ISSN (Online)` = [31mcol_character()[39m
)




JrId,JournalTitle,NlmId,MedAbbr,IsoAbbr,ISSN (Print),ISSN (Online)
<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
198,"Acta radiologica: therapy, physics, biology",201,Acta Radiol Ther Phys Biol,Acta Radiol Ther Phys Biol,0567-8064,
159,Acta paediatrica Scandinavica,211,Acta Paediatr Scand,Acta Paediatr Scand,0001-656X,


In [3]:
# Create a pubmed-scopus mapping
pubmed_map_df = journal_df |>
  tidyr::gather(key = 'issn_type', value = 'issn', `ISSN (Print)`, `ISSN (Online)`) |>
  dplyr::distinct() |>
  dplyr::transmute(journal_nlm_id = NlmId, issn) |>
  # dplyr::mutate(issn = sub('-', '', issn)) |>
  dplyr::inner_join(issn_df) |>
  dplyr::select(journal_nlm_id, scopus_id) |>
  dplyr::distinct() |>
  dplyr::arrange(journal_nlm_id, scopus_id)

pubmed_map_df |>
  readr::write_tsv(file.path('data', 'pubmed-map.tsv'))

head(pubmed_map_df, 2)

Joining, by = "issn"



journal_nlm_id,scopus_id
<chr>,<chr>
201,72678
211,40958


In [4]:
# Read journalmetrics data
col_types = list(scopus_id = readr::col_character())
metric_df = file.path('data', 'metrics.tsv.gz') |>
  readr::read_tsv(col_types = col_types)

In [5]:
# Transform metrics to pubmed
metric_df = pubmed_map_df |>
  dplyr::inner_join(metric_df) |>
  dplyr::select(-scopus_id) |>
  dplyr::group_by(journal_nlm_id, year, metric) |>
  dplyr::summarize(
    value = mean(value)
  ) |> dplyr::ungroup()

head(metric_df, 2)

Joining, by = "scopus_id"

`summarise()` has grouped output by 'journal_nlm_id', 'year'. You can override using the `.groups` argument.



journal_nlm_id,year,metric,value
<chr>,<dbl>,<chr>,<dbl>
266,2011,CiteScore,4.5
266,2011,SJR,1.36


In [6]:
# Save pubmed metrics to a gzipped TSV
path = file.path('data', 'pubmed-metrics.tsv')
metric_df |>
  readr::write_tsv(path)
system2('gzip', c('--force', "--no-name", path))