From 4a0ff680aac194489612860ade95ccaf01f7fcea Mon Sep 17 00:00:00 2001 From: Jake Hughey Date: Tue, 23 May 2023 18:03:17 -0500 Subject: [PATCH] updated parsing of empty investigator fields. --- CRAN-SUBMISSION | 6 +++--- DESCRIPTION | 2 +- NEWS.md | 3 +++ R/parse_element.R | 3 ++- R/parse_person.R | 4 ++-- cran-comments.md | 6 +++--- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index d656ba1..733938c 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 1.0.15 -Date: 2022-12-13 21:38:16 UTC -SHA: 4b014e60c1e04e8711dd6613ac5e63a90888fdd6 +Version: 1.0.16 +Date: 2023-02-12 15:41:49 UTC +SHA: b0cda50ec63986ee6160581dc70e904cf88a3dec diff --git a/DESCRIPTION b/DESCRIPTION index 7931a46..966af44 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pmparser Title: Create and Maintain a Relational Database of Data from PubMed/MEDLINE -Version: 1.0.16 +Version: 1.0.17 Authors@R: c(person('Jake', 'Hughey', , 'jakejhughey@gmail.com', c('aut', 'cre')), person('Josh', 'Schoenbachler', , 'josh.schoenbachler@gmail.com', 'aut'), person('Elliot', 'Outland', , 'ehoutland@gmail.com', 'aut')) diff --git a/NEWS.md b/NEWS.md index f157ec1..8f6ceec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# pmparser 1.0.17 +* Updated parsing of empty investigator fields. + # pmparser 1.0.16 * Updated readme table in test standards. diff --git a/R/parse_element.R b/R/parse_element.R index 49f9739..b299d70 100644 --- a/R/parse_element.R +++ b/R/parse_element.R @@ -325,7 +325,8 @@ parseMesh = function(pmXml, dPmid, con = NULL, tableSuffix = NULL) { x5 = xml_find_all(x4, './/QualifierName', flatten = FALSE) nQualPerDesc = lengths(x5) - descPos = unlist(lapply(nDescPerPmid[nDescPerPmid > 0], function(n) 1:n)) + descPos = unlist(lapply( + nDescPerPmid[nDescPerPmid > 0], function(n) seq_len(n))) if (length(nQualPerDesc) > 0 && sum(nQualPerDesc) > 0) { x6 = data.table( diff --git a/R/parse_person.R b/R/parse_person.R index 6331218..0af1177 100644 --- a/R/parse_person.R +++ b/R/parse_person.R @@ -108,8 +108,8 @@ parsePerson = function(pmXml, dPmid, con = NULL, tableSuffix = NULL, x10[is.na(n_affil_ids), n_affil_ids := 0] x10[, n_person_ids := n_total_ids - n_affil_ids] - if (nrow(x10) > 0) { - x11 = x10[n_person_ids > 0, .(id_pos = 1:n_person_ids), by = cols] + if (nrow(x10) > 0 && any(x10$n_person_ids > 0)) { + x11 = x10[n_person_ids > 0, .(id_pos = seq_len(n_person_ids)), by = cols] dAllId[, id_pos := seq_len(.N), by = cols] dPersonId = merge(dAllId, x11, by = c(cols, 'id_pos')) diff --git a/cran-comments.md b/cran-comments.md index 84634f1..0b046e6 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -16,14 +16,14 @@ Found the following files/directories: 'lastMiKTeXException' -See results for [Windows](https://builder.r-hub.io/status/pmparser_1.0.16.tar.gz-7c295fe76dd64afdac4541030567c42f), [Ubuntu](https://builder.r-hub.io/status/pmparser_1.0.16.tar.gz-aa5fdb1f25ae49aabe4188b7cffba9ab), and [Fedora](https://builder.r-hub.io/status/pmparser_1.0.16.tar.gz-bf42f4c7ae324c94bcd7a0fb8ca9b8cc). +See results for [Windows](https://builder.r-hub.io/status/pmparser_1.0.17.tar.gz-aac25aa5452a4d71849cc4b1efc67ced), [Ubuntu](https://builder.r-hub.io/status/pmparser_1.0.17.tar.gz-a892eda3b5a542dc9a28e7a330f41b06), and [Fedora](https://builder.r-hub.io/status/pmparser_1.0.17.tar.gz-b9631972a174459a833eebd5f0744c95). ### GitHub Actions 0 errors ✓ | 0 warnings ✓ | 0 notes ✓ -See results for Mac, Windows, and Ubuntu [here](https://github.com/hugheylab/pmparser/actions/runs/4156795437). +See results for Mac, Windows, and Ubuntu [here](). ## Changes from current CRAN release -The test standards have been updated to include the latest version of the readme file on PubMed, which was the source of the test failures. +* Updated parsing of empty investigator fields.