diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/CommonTaxonService.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/CommonTaxonService.java index 46a1b4d8..db71bc3a 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/CommonTaxonService.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/CommonTaxonService.java @@ -423,10 +423,10 @@ private void resolveHierarchyIfNeeded( Taxon resolvedTaxon ) { if (shouldResolveHierarchy(childParent, resolvedTaxon)) { - List pathNames = new ArrayList<>(); - List pathIds = new ArrayList<>(); List path = new ArrayList<>(); - + List pathIds = new ArrayList<>(); + List pathNames = new ArrayList<>(); + List pathAuthorships = new ArrayList<>(); path.add(StringUtils.defaultIfBlank(resolvedTaxon.getName(), "")); @@ -434,6 +434,8 @@ private void resolveHierarchyIfNeeded( pathNames.add(StringUtils.defaultIfBlank(resolvedTaxon.getRank(), "")); + pathAuthorships.add(StringUtils.defaultIfBlank(resolvedTaxon.getAuthorship(), "")); + T parent = childParent.get(focalTaxonKey); List visitedParents = new ArrayList(); visitedParents.add(focalTaxonKey); @@ -444,20 +446,23 @@ private void resolveHierarchyIfNeeded( if (parentTaxonProperties != null) { Taxon parentTaxon = TaxonUtil.mapToTaxon(parentTaxonProperties); path.add(StringUtils.defaultIfBlank(parentTaxon.getName(), "")); - pathNames.add(StringUtils.defaultIfBlank(parentTaxon.getRank(), "")); pathIds.add(parentTaxon.getExternalId()); + pathNames.add(StringUtils.defaultIfBlank(parentTaxon.getRank(), "")); + pathAuthorships.add(StringUtils.defaultIfBlank(parentTaxon.getAuthorship(), "")); } visitedParents.add(parent); parent = childParent.get(parent); } - Collections.reverse(pathNames); - Collections.reverse(pathIds); Collections.reverse(path); + Collections.reverse(pathIds); + Collections.reverse(pathNames); + Collections.reverse(pathAuthorships); resolvedTaxon.setPath(StringUtils.join(path, CharsetConstant.SEPARATOR)); resolvedTaxon.setPathIds(StringUtils.join(pathIds, CharsetConstant.SEPARATOR)); resolvedTaxon.setPathNames(StringUtils.join(pathNames, CharsetConstant.SEPARATOR)); + resolvedTaxon.setPathAuthorships(StringUtils.join(pathAuthorships, CharsetConstant.SEPARATOR)); } } diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonService.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonService.java index ba25f416..ba7a61ca 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonService.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonService.java @@ -99,7 +99,12 @@ private void parseRecord( return value; }); + Stream pathAuthorshipStream = RANKS + .stream() + .map(rank -> ""); + taxon.setPath(pathStream.collect(Collectors.joining(CharsetConstant.SEPARATOR))); + taxon.setPathAuthorships(pathAuthorshipStream.collect(Collectors.joining(CharsetConstant.SEPARATOR))); String rankNames = RANKS .stream() .map(rank -> StringUtils.remove(rank, '_')) diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonService.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonService.java index 2dda3407..8d7d4b7d 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonService.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonService.java @@ -27,6 +27,8 @@ import java.io.InputStreamReader; import java.net.URI; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; public class IndexFungorumTaxonService extends CommonLongTaxonService { private static final Logger LOG = LoggerFactory.getLogger(IndexFungorumTaxonService.class); @@ -74,7 +76,9 @@ private void parseNodes(Map> nodes, taxon.setAuthorship(authorship); } taxon.setPath(StringUtils.join(new String[]{kingdomName, phylumName, subphylumName, className, subclassName, orderName, familyName, completeName}, CharsetConstant.SEPARATOR)); - taxon.setPathNames(StringUtils.join(new String[]{"kingdom", "phylum", "subphylum", "class", "subclass", "order", "family", ""}, CharsetConstant.SEPARATOR)); + String[] ranks = {"kingdom", "phylum", "subphylum", "class", "subclass", "order", "family", ""}; + taxon.setPathNames(StringUtils.join(ranks, CharsetConstant.SEPARATOR)); + taxon.setPathAuthorships(Stream.of(ranks).map(r -> "").collect(Collectors.joining(CharsetConstant.SEPARATOR))); if (NumberUtils.isCreatable(taxId)) { Long taxonKey = Long.parseLong(taxId); registerIdForName(taxonKey, taxon, name2nodeIds); diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/MDDTaxonService.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/MDDTaxonService.java index 3c0e73b5..f09211c9 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/MDDTaxonService.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/MDDTaxonService.java @@ -95,8 +95,15 @@ void parseNodes(Map> taxonMap, : StringUtils.defaultIfBlank(value, ""); }); + Stream pathAuthorshipStream = RANKS + .stream() + .map(rank -> { + return ""; + }); + taxon.setPath(pathStream.collect(Collectors.joining(CharsetConstant.SEPARATOR))); taxon.setPathNames(String.join(CharsetConstant.SEPARATOR, RANKS)); + taxon.setPathAuthorships(pathAuthorshipStream.collect(Collectors.joining(CharsetConstant.SEPARATOR))); String id = "https://www.mammaldiversity.org/explore.html#genus=" + genus + "&species=" + specificEpithet + "&id=" + taxonId; @@ -124,19 +131,19 @@ void parseNodes(Map> taxonMap, Stream subspeciesTaxa = subspecies .stream() .map(subspecificEpithetAndAuthor -> { - String[] s = subspecificEpithetAndAuthor.split(" "); - String subspecificEpithet = s[0]; - String subspecificAuthorship = StringUtils.trim(RegExUtils.replaceFirst(subspecificEpithetAndAuthor, subspecificEpithet, "")); - Taxon subspecificTaxon = TaxonUtil.copy(taxon); - subspecificTaxon.setName(taxon.getName() + " " + subspecificEpithet); - subspecificTaxon.setPath(taxon.getPath() + CharsetConstant.SEPARATOR + subspecificEpithet); - subspecificTaxon.setPathNames(taxon.getPathNames() + CharsetConstant.SEPARATOR + "subspecificEpithet"); - subspecificTaxon.setAuthorship(subspecificAuthorship); - String suspecificId = taxon.getExternalId() + "&subspecies=" + subspecificEpithet; - subspecificTaxon.setExternalId(suspecificId); - subspecificTaxon.setExternalUrl(suspecificId); - return subspecificTaxon; - }); + String[] s = subspecificEpithetAndAuthor.split(" "); + String subspecificEpithet = s[0]; + String subspecificAuthorship = StringUtils.trim(RegExUtils.replaceFirst(subspecificEpithetAndAuthor, subspecificEpithet, "")); + Taxon subspecificTaxon = TaxonUtil.copy(taxon); + subspecificTaxon.setName(taxon.getName() + " " + subspecificEpithet); + subspecificTaxon.setPath(taxon.getPath() + CharsetConstant.SEPARATOR + subspecificEpithet); + subspecificTaxon.setPathNames(taxon.getPathNames() + CharsetConstant.SEPARATOR + "subspecificEpithet"); + subspecificTaxon.setAuthorship(subspecificAuthorship); + String suspecificId = taxon.getExternalId() + "&subspecies=" + subspecificEpithet; + subspecificTaxon.setExternalId(suspecificId); + subspecificTaxon.setExternalUrl(suspecificId); + return subspecificTaxon; + }); subspeciesTaxa.forEach(t -> { registerTaxon(taxonMap, name2nodeIds, t.getExternalId(), t); diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/NCBITaxonService.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/NCBITaxonService.java index c8c3333e..56b1f741 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/NCBITaxonService.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/NCBITaxonService.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; public class NCBITaxonService extends PropertyEnricherSimple implements TermMatcher { @@ -47,6 +48,7 @@ public class NCBITaxonService extends PropertyEnricherSimple implements TermMatc private static final String NAME_IDS = "nameIds"; private static final String SYNONYM_IDS = "synonymIds"; private static final String COMMON_NAME_IDS = "commonNamesIds"; + private static final String AUTHORITY_IDS = "authorityIds"; private final TermMatcherContext ctx; @@ -54,6 +56,7 @@ public class NCBITaxonService extends PropertyEnricherSimple implements TermMatc private BTreeMap> nameIds = null; private BTreeMap> synonymIds = null; private BTreeMap> commonNameIds = null; + private BTreeMap> authoritiesIds = null; private BTreeMap mergedNodes = null; private BTreeMap> ncbiDenormalizedNodes = null; @@ -224,6 +227,7 @@ private void lazyInit() throws PropertyEnricherException { nameIds = db.getTreeMap(NAME_IDS); synonymIds = db.getTreeMap(SYNONYM_IDS); commonNameIds = db.getTreeMap(COMMON_NAME_IDS); + authoritiesIds = db.getTreeMap(AUTHORITY_IDS); } else { LOG.info("NCBI taxonomy importing..."); StopWatch watch = new StopWatch(); @@ -271,6 +275,12 @@ private void lazyInit() throws PropertyEnricherException { .valueSerializer(Serializer.JAVA) .make(); + authoritiesIds = db + .createTreeMap(AUTHORITY_IDS) + .keySerializer(BTreeKeySerializer.STRING) + .valueSerializer(Serializer.JAVA) + .make(); + try { parseMerged(mergedNodes, ctx.retrieve(getMergedNodesUrl())); @@ -285,7 +295,14 @@ private void lazyInit() throws PropertyEnricherException { .make(); try { - parseNames(ctx.retrieve(getNamesUrl()), ncbiNames, nameIds, commonNameIds, synonymIds); + parseNames( + ctx.retrieve(getNamesUrl()), + ncbiNames, + nameIds, + commonNameIds, + synonymIds, + authoritiesIds + ); } catch (IOException e) { throw new PropertyEnricherException("failed to parse NCBI nodes", e); } @@ -296,7 +313,7 @@ private void lazyInit() throws PropertyEnricherException { .keySerializer(BTreeKeySerializer.STRING) .valueSerializer(Serializer.JAVA) .make(); - denormalizeTaxa(ncbiNodes, ncbiDenormalizedNodes, childParent, ncbiNames); + denormalizeTaxa(ncbiNodes, ncbiDenormalizedNodes, childParent, ncbiNames, authoritiesIds); watch.stop(); TaxonCacheService.logCacheLoadStats(watch.getTime(), ncbiNodes.size(), LOG); @@ -353,24 +370,37 @@ static void parseNodes(Map> taxonMap, Map> taxonMap, Map> taxonMapDenormalized, Map childParent, Map taxonNames) { + static void denormalizeTaxa( + Map> taxonMap, + Map> taxonMapDenormalized, + Map childParent, + Map taxonNames, + Map> authoritiesIds) { Set>> taxa = taxonMap.entrySet(); for (Map.Entry> taxon : taxa) { - denormalizeTaxa(taxonMap, taxonMapDenormalized, childParent, taxonNames, taxon); + denormalizeTaxa(taxonMap, taxonMapDenormalized, childParent, taxonNames, taxon, authoritiesIds); } } - private static void denormalizeTaxa(Map> taxonMap, Map> taxonEnrichMap, Map childParent, Map names, Map.Entry> taxon) { + private static void denormalizeTaxa( + Map> taxonMap, + Map> taxonEnrichMap, + Map childParent, + Map names, + Map.Entry> taxon, + Map> authorityIds) { Map childTaxon = taxon.getValue(); - List pathNames = new ArrayList<>(); - List pathIds = new ArrayList<>(); List path = new ArrayList<>(); + List pathIds = new ArrayList<>(); + List pathNames = new ArrayList<>(); + List pathAuthorships = new ArrayList<>(); Taxon origTaxon = TaxonUtil.mapToTaxon(childTaxon); - String str = names.get(origTaxon.getExternalId()); - origTaxon.setName(str); - path.add(StringUtils.defaultIfBlank(str, "")); + String name = names.get(origTaxon.getExternalId()); + origTaxon.setName(name); + path.add(StringUtils.defaultIfBlank(name, "")); + String externalId = origTaxon.getExternalId(); origTaxon.setExternalId(externalId); pathIds.add(StringUtils.defaultIfBlank(externalId, "")); @@ -378,33 +408,56 @@ private static void denormalizeTaxa(Map> taxonMap, M origTaxon.setRank(origTaxon.getRank()); pathNames.add(StringUtils.defaultIfBlank(origTaxon.getRank(), "")); + String authorship = getAuthorshipById(authorityIds, name, origTaxon.getExternalId()); + origTaxon.setAuthorship(authorship); + pathAuthorships.add(StringUtils.defaultIfBlank(origTaxon.getAuthorship(), "")); + String parent = childParent.get(taxon.getKey()); while (StringUtils.isNotBlank(parent) && !pathIds.contains(parent)) { Map stringStringMap = taxonMap.get(parent); if (stringStringMap != null) { Taxon parentTaxon = TaxonUtil.mapToTaxon(stringStringMap); - pathNames.add(StringUtils.defaultIfBlank(parentTaxon.getRank(), "")); + String parentName = names.get(parentTaxon.getExternalId()); + path.add(StringUtils.defaultIfBlank(parentName, "")); + + String parentAuthorship = getAuthorshipById(authorityIds, parentName, parentTaxon.getExternalId()); + pathIds.add(StringUtils.defaultIfBlank(parentTaxon.getExternalId(), "")); - path.add(StringUtils.defaultIfBlank(names.get(parentTaxon.getExternalId()), "")); + pathNames.add(StringUtils.defaultIfBlank(parentTaxon.getRank(), "")); + pathAuthorships.add(StringUtils.defaultIfBlank(parentAuthorship, "")); } parent = childParent.get(parent); } - Collections.reverse(pathNames); - Collections.reverse(pathIds); Collections.reverse(path); + Collections.reverse(pathIds); + Collections.reverse(pathNames); + Collections.reverse(pathAuthorships); origTaxon.setPath(StringUtils.join(path, CharsetConstant.SEPARATOR)); origTaxon.setPathIds(StringUtils.join(pathIds, CharsetConstant.SEPARATOR)); origTaxon.setPathNames(StringUtils.join(pathNames, CharsetConstant.SEPARATOR)); + origTaxon.setPathAuthorships(StringUtils.join(pathAuthorships, CharsetConstant.SEPARATOR)); taxonEnrichMap.put(taxon.getKey(), TaxonUtil.taxonToMap(origTaxon)); } + private static String getAuthorshipById(Map> authorityIds, String name, String externalId) { + List authorships = authorityIds.get(externalId); + List collect = authorships == null + ? Collections.emptyList() + : authorships.stream().filter(auth -> StringUtils.startsWith(auth, name)).collect(Collectors.toList()); + + return collect.size() == 0 + ? "" + : StringUtils.defaultIfBlank(StringUtils.trim(StringUtils.replace(collect.get(0), name, "")), ""); + } + static void parseNames(InputStream resourceAsStream, Map nameMap, Map> nameIds, Map> commonNameIds, - Map> synonymIds) throws PropertyEnricherException { + Map> synonymIds, + Map> authorityIds) throws PropertyEnricherException { BufferedReader reader = new BufferedReader(new InputStreamReader(resourceAsStream)); String line; @@ -434,15 +487,15 @@ static void parseNames(InputStream resourceAsStream, Map nameMap "teleomorph", "type material"); + String ncbiTaxonId = TaxonomyProvider.ID_PREFIX_NCBI + taxId; if (StringUtils.equals("scientific name", taxonNameClass)) { - String ncbiTaxonId = TaxonomyProvider.ID_PREFIX_NCBI + taxId; nameMap.put(ncbiTaxonId, taxonName); addIdMapEntry(nameIds, taxonName, ncbiTaxonId); + } else if (StringUtils.equals("authority", taxonNameClass)) { + addIdMapEntry(authorityIds, ncbiTaxonId, taxonName); } else if (StringUtils.equals("synonym", taxonNameClass)) { - String ncbiTaxonId = TaxonomyProvider.ID_PREFIX_NCBI + taxId; addIdMapEntry(synonymIds, taxonName, ncbiTaxonId); } else if (Arrays.asList("genbank common name", "common name").contains(taxonNameClass)) { - String ncbiTaxonId = TaxonomyProvider.ID_PREFIX_NCBI + taxId; addIdMapEntry(commonNameIds, taxonName, ncbiTaxonId); } @@ -453,17 +506,17 @@ static void parseNames(InputStream resourceAsStream, Map nameMap } } - private static void addIdMapEntry(Map> nameIds, - String taxonName, - String key) { - List ids = nameIds.get(taxonName); - if (ids == null) { - ids = new ArrayList<>(); + private static void addIdMapEntry(Map> lookupTable, + String key, + String value) { + List values = lookupTable.get(key); + if (values == null) { + values = new ArrayList<>(); } - if (!ids.contains(key)) { - ids.add(key); + if (!values.contains(value)) { + values.add(value); } - nameIds.put(taxonName, ids); + lookupTable.put(key, values); } static void parseMerged(Map mergedMap, InputStream resourceAsStream) throws PropertyEnricherException { diff --git a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/TabularTaxonUtil.java b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/TabularTaxonUtil.java index 7b1689f3..4f5b1072 100644 --- a/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/TabularTaxonUtil.java +++ b/nomer-taxon-resolver/src/main/java/org/globalbioticinteractions/nomer/match/TabularTaxonUtil.java @@ -22,6 +22,7 @@ import static org.eol.globi.domain.PropertyAndValueDictionary.EXTERNAL_ID; import static org.eol.globi.domain.PropertyAndValueDictionary.NAME_SOURCE; import static org.eol.globi.domain.PropertyAndValueDictionary.PATH; +import static org.eol.globi.domain.PropertyAndValueDictionary.PATH_AUTHORSHIPS; import static org.eol.globi.domain.PropertyAndValueDictionary.PATH_NAMES; public class TabularTaxonUtil { @@ -81,6 +82,7 @@ public static Triple parseNameRelations(LabeledCSVParser Map taxonMap = new TreeMap<>(); List path = new ArrayList<>(); List pathNames = new ArrayList<>(); + List pathAuthorships = new ArrayList<>(); for (TabularColumn orderedRank : ORDERED_RANKS) { String value = labeledCSVParser.getValueByLabel(orderedRank.getColumnName()); @@ -88,6 +90,7 @@ public static Triple parseNameRelations(LabeledCSVParser && !StringUtils.equals(value, "NA")) { path.add(value); pathNames.add(orderedRank.getColumnName()); + pathAuthorships.add(""); } } @@ -104,6 +107,7 @@ public static Triple parseNameRelations(LabeledCSVParser taxonMap.put(PATH, StringUtils.join(path, CharsetConstant.SEPARATOR)); taxonMap.put(PATH_NAMES, StringUtils.join(pathNames, CharsetConstant.SEPARATOR)); + taxonMap.put(PATH_AUTHORSHIPS, StringUtils.join(pathAuthorships, CharsetConstant.SEPARATOR)); List collect = ORDERED_RANKS .stream() diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/CatalogueOfLifeTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/CatalogueOfLifeTaxonServiceTest.java index f4493a9a..414f282a 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/CatalogueOfLifeTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/CatalogueOfLifeTaxonServiceTest.java @@ -30,6 +30,7 @@ public void enrichById() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Phryganellidae | Phryganella")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("COL:625ZT | COL:63MJH")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("family | genus")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is(" | ")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/GBIFTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/GBIFTaxonServiceTest.java index 0a771c87..5ffc2d3e 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/GBIFTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/GBIFTaxonServiceTest.java @@ -32,6 +32,7 @@ public void enrichById() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("species")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("GBIF:3220631")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("Lien & Beeder, 1997")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonServiceTest.java index 78df516a..5d63acbf 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/HesperomysTaxonServiceTest.java @@ -46,6 +46,7 @@ private void assertRhinolophusSinicus(Map enriched) { assertThat(enrichedTaxon.getAuthorship(), is("Andersen, 1905")); assertThat(enrichedTaxon.getPath(), is("Mammalia | Chiroptera | Rhinolophidae | Rhinolophus | sinicus | sinicus")); assertThat(enrichedTaxon.getPathNames(), is("class | order | family | genus | specificEpithet | subspecificEpithet")); + assertThat(enrichedTaxon.getPathAuthorships(), is(" | | | | | ")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/ITISTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/ITISTaxonServiceTest.java index 97aed942..b40ffb38 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/ITISTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/ITISTaxonServiceTest.java @@ -37,6 +37,7 @@ public void enrichById() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getAuthorship(), is("ITIS:AUTHORSHIP:177805")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("ITIS:956340 | ITIS:57")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("family | genus")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("ITIS:AUTHORSHIP:184763 | ITIS:AUTHORSHIP:177805")); } @Test @@ -53,6 +54,7 @@ public void enrichById2() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Ariopsis felis")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("ITIS:680665")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("(Linnaeus, 1766)")); } @Test @@ -68,6 +70,7 @@ public void enrichByName() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("genus")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("ITIS:956340 | ITIS:57")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("family | genus")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("ITIS:AUTHORSHIP:184763 | ITIS:AUTHORSHIP:177805")); } @Test @@ -83,6 +86,7 @@ public void enrichMerged() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("genus")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("ITIS:956340 | ITIS:57")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("family | genus")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("ITIS:AUTHORSHIP:184763 | ITIS:AUTHORSHIP:177805")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonServiceTest.java index 387b4a4c..1db637ec 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/IndexFungorumTaxonServiceTest.java @@ -43,6 +43,7 @@ private void assertIF808518(Map enriched) { assertThat(TaxonUtil.mapToTaxon(enriched).getAuthorship(), is("(Pers.) Vizzini, P. Alvarado, G. Moreno & Consiglio, 2015")); assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Fungi | Basidiomycota | Agaricomycotina | Agaricomycetes | Agaricomycetidae | Agaricales | Incertae sedis | Leucocybe candicans")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("kingdom | phylum | subphylum | class | subclass | order | family | ")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is(" | | | | | | | ")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/MDDTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/MDDTaxonServiceTest.java index 174c939c..dda455f9 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/MDDTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/MDDTaxonServiceTest.java @@ -47,6 +47,7 @@ private void assertRhinolophusSinicus(Map enriched) { assertThat(enrichedTaxon.getAuthorship(), is("K. Andersen, 1905")); assertThat(enrichedTaxon.getPath(), is("Theria | Placentalia | Boreoeutheria | Laurasiatheria | Chiroptera | Pteropodiformes | | | Rhinolophoidea | Rhinolophidae | | | Rhinolophus | | sinicus")); assertThat(enrichedTaxon.getPathNames(), is("subclass | infraclass | magnorder | superorder | order | suborder | infraorder | parvorder | superfamily | family | subfamily | tribe | genus | subgenus | specificEpithet")); + assertThat(enrichedTaxon.getPathAuthorships(), is(" | | | | | | | | | | | | | | ")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/NCBITaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/NCBITaxonServiceTest.java index 5e0d1daa..86a2fb2c 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/NCBITaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/NCBITaxonServiceTest.java @@ -8,13 +8,10 @@ import org.eol.globi.service.TaxonUtil; import org.eol.globi.taxon.TermMatchListener; import org.globalbioticinteractions.nomer.cmd.OutputFormat; -import org.globalbioticinteractions.nomer.util.TermMatcherContext; import org.junit.Test; import java.io.File; -import java.io.IOException; import java.io.InputStream; -import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -33,18 +30,37 @@ public class NCBITaxonServiceTest { @Test - public void enrich() throws PropertyEnricherException { + public void enrichSuperkingdom() throws PropertyEnricherException { NCBITaxonService service = createService(); String externalId = "NCBI:2"; Map enriched = service.enrich(TaxonUtil.taxonToMap(new TaxonImpl(null, externalId))); - assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Bacteria")); - assertThat(TaxonUtil.mapToTaxon(enriched).getExternalId(), is("NCBI:2")); assertThat(TaxonUtil.mapToTaxon(enriched).getName(), is("Bacteria")); + assertThat(TaxonUtil.mapToTaxon(enriched).getExternalId(), is("NCBI:2")); assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("superkingdom")); + assertThat(TaxonUtil.mapToTaxon(enriched).getAuthorship(), is("")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Bacteria")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("NCBI:2")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("superkingdom")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("")); + } + + @Test + public void enrichSpecies() throws PropertyEnricherException { + NCBITaxonService service = createService(); + + String externalId = "NCBI:385028"; + Map enriched = service.enrich(TaxonUtil.taxonToMap(new TaxonImpl(null, externalId))); + + assertThat(TaxonUtil.mapToTaxon(enriched).getName(), is("Anteholosticha manca")); + assertThat(TaxonUtil.mapToTaxon(enriched).getExternalId(), is("NCBI:385028")); + assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getAuthorship(), is("(Kahl, 1932) Berger, 2003")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("NCBI:584654 | NCBI:385028")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is(" | Anteholosticha manca")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("genus | species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is(" | (Kahl, 1932) Berger, 2003")); } @Test @@ -298,14 +314,21 @@ public void denormalizeTaxa() throws PropertyEnricherException { put("2", "two name"); }}; + Map> authorityNames = new TreeMap>() {{ + put("1", Arrays.asList("one name Doe 2021")); + put("2", Arrays.asList("two name Doe 1758")); + }}; - NCBITaxonService.denormalizeTaxa(taxonMap, taxonMapDenormalized, childParent, taxonNames); + + NCBITaxonService.denormalizeTaxa(taxonMap, taxonMapDenormalized, childParent, taxonNames, authorityNames); Taxon actual = TaxonUtil.mapToTaxon(taxonMapDenormalized.get("1")); assertThat(actual.getPath(), is("two name | one name")); assertThat(actual.getPathIds(), is("2 | 1")); assertThat(actual.getPathNames(), is("rank two | rank one")); + assertThat(actual.getPathAuthorships(), is("Doe 1758 | Doe 2021")); assertThat(actual.getRank(), is("rank one")); + assertThat(actual.getAuthorship(), is("Doe 2021")); assertThat(actual.getName(), is("one name")); assertThat(actual.getExternalId(), is("1")); @@ -313,9 +336,11 @@ public void denormalizeTaxa() throws PropertyEnricherException { assertThat(two.getPath(), is("two name")); assertThat(two.getPathIds(), is("2")); assertThat(two.getPathNames(), is("rank two")); - assertThat(two.getRank(), is("rank two")); + assertThat(two.getPathAuthorships(), is("Doe 1758")); assertThat(two.getName(), is("two name")); assertThat(two.getExternalId(), is("2")); + assertThat(two.getRank(), is("rank two")); + assertThat(two.getAuthorship(), is("Doe 1758")); } @@ -325,11 +350,12 @@ public void parseNames() throws PropertyEnricherException { Map nameMap = new TreeMap<>(); Map> nameIds = new TreeMap<>(); Map> commonNameIds = new TreeMap<>(); + Map> nameAuthorities = new TreeMap<>(); Map> synonymIds = new TreeMap<>(); InputStream namesStream = getClass().getResourceAsStream("/org/globalbioticinteractions/nomer/match/ncbi/names.dmp"); - NCBITaxonService.parseNames(namesStream, nameMap, nameIds, commonNameIds, synonymIds); + NCBITaxonService.parseNames(namesStream, nameMap, nameIds, commonNameIds, synonymIds, nameAuthorities); assertThat(nameMap.size(), is(3)); assertThat(nameMap.get("NCBI:1"), is("root")); @@ -339,6 +365,9 @@ public void parseNames() throws PropertyEnricherException { assertThat(nameIds.get("Anteholosticha manca"), hasItem("NCBI:385028")); assertThat(synonymIds.get("Holosticha manca"), hasItem("NCBI:385028")); assertThat(commonNameIds.get("eubacteria"), hasItem("NCBI:2")); + assertThat(nameAuthorities.get("NCBI:385028").size(), is(2)); + assertThat(nameAuthorities.get("NCBI:385028"), hasItem("Holosticha manca Kahl, 1932")); + assertThat(nameAuthorities.get("NCBI:385028"), hasItem("Anteholosticha manca (Kahl, 1932) Berger, 2003")); } diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/PBDBTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/PBDBTaxonServiceTest.java index bda520c4..d192873a 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/PBDBTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/PBDBTaxonServiceTest.java @@ -40,6 +40,7 @@ private void assertTruncatedHomoSapiens(Map enriched) { assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Hominini | Homo | Homo sapiens")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("PBDB:91486 | PBDB:40901 | PBDB:83088")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("tribe | genus | species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is(" | C. Linnaeus 1758 | C. Linnaeus 1758")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/TPTTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/TPTTaxonServiceTest.java index b36702b7..59776452 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/TPTTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/TPTTaxonServiceTest.java @@ -44,6 +44,7 @@ private void assertGardineri(Map enriched) { assertThat(mapToTaxon(enriched).getRank(), is("species")); assertThat(mapToTaxon(enriched).getPath(), is("Animalia | Arthropoda | Arachnida | Holothyrida | Holothyridae | Dicrogonatus | gardineri")); assertThat(mapToTaxon(enriched).getPathNames(), is("kingdom | phylum | class | order | family | genus | specificEpithet")); + assertThat(mapToTaxon(enriched).getPathAuthorships(), is(" | | | | | | ")); } @Test diff --git a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/WorldOfFloraOnlineTaxonServiceTest.java b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/WorldOfFloraOnlineTaxonServiceTest.java index ed2b11a3..b8ca9335 100644 --- a/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/WorldOfFloraOnlineTaxonServiceTest.java +++ b/nomer-taxon-resolver/src/test/java/org/globalbioticinteractions/nomer/match/WorldOfFloraOnlineTaxonServiceTest.java @@ -44,6 +44,7 @@ public void assertEnrichById(PropertyEnricherSimple service) throws PropertyEnri assertThat(TaxonUtil.mapToTaxon(enriched).getPath(), is("Syneilesis | Syneilesis palmata")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathIds(), is("WFO:4000037295 | WFO:0000000100")); assertThat(TaxonUtil.mapToTaxon(enriched).getPathNames(), is("genus | species")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("Maxim. | Maxim.")); } @Test @@ -140,6 +141,7 @@ public void ensureTrimmingOfDoubleQuotes() throws PropertyEnricherException { assertThat(TaxonUtil.mapToTaxon(enriched).getAuthorship(), is("Rodriguez & Alfonso")); assertThat(TaxonUtil.mapToTaxon(enriched).getRank(), is("species")); assertThat(TaxonUtil.mapToTaxon(enriched).getName(), is("Cyperus violifolia")); + assertThat(TaxonUtil.mapToTaxon(enriched).getPathAuthorships(), is("Rodriguez & Alfonso")); } @Test diff --git a/nomer/src/main/java/org/globalbioticinteractions/nomer/util/AppenderUtil.java b/nomer/src/main/java/org/globalbioticinteractions/nomer/util/AppenderUtil.java index 93ff88fc..618cd79b 100644 --- a/nomer/src/main/java/org/globalbioticinteractions/nomer/util/AppenderUtil.java +++ b/nomer/src/main/java/org/globalbioticinteractions/nomer/util/AppenderUtil.java @@ -55,6 +55,8 @@ public static String valueForTaxonProperty(Taxon taxon, colValue = taxon.getPath(); } else if (StringUtils.equalsIgnoreCase(taxonPropertyName, "path.rank")) { colValue = taxon.getPathNames(); + } else if (StringUtils.equalsIgnoreCase(taxonPropertyName, "path.authorship")) { + colValue = taxon.getPathAuthorships(); } else if (StringUtils.startsWith(taxonPropertyName, "path.") && ranks.size() > 0 && ranks.size() == ids.size() diff --git a/nomer/src/test/java/org/globalbioticinteractions/nomer/util/AppenderTSVTest.java b/nomer/src/test/java/org/globalbioticinteractions/nomer/util/AppenderTSVTest.java index dcf4b765..ef678bee 100644 --- a/nomer/src/test/java/org/globalbioticinteractions/nomer/util/AppenderTSVTest.java +++ b/nomer/src/test/java/org/globalbioticinteractions/nomer/util/AppenderTSVTest.java @@ -148,6 +148,18 @@ public void appendWithFullPath() { assertThat(out.toString(), is("col1\tcol2\tSAME_AS\tpath1 | path2\tpathId1 | pathId2\tpathName1 | pathName2\n")); } + @Test + public void appendWithFullPathAndAuthorities() { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + appendTo(new AppenderTSV(new HashMap() {{ + put(0, "path.name"); + put(1, "path.id"); + put(2, "path.rank"); + put(3, "path.authorship"); + }}), out); + assertThat(out.toString(), is("col1\tcol2\tSAME_AS\tpath1 | path2\tpathId1 | pathId2\tpathName1 | pathName2\tauth1 | auth2\n")); + } + private void appendTo(Appender appender, ByteArrayOutputStream out) { String[] row = {"col1", "col2"}; TaxonImpl provided = new TaxonImpl("providedName", "providedId"); @@ -155,6 +167,7 @@ private void appendTo(Appender appender, ByteArrayOutputStream out) { resolved.setPath("path1 | path2"); resolved.setPathIds("pathId1 | pathId2"); resolved.setPathNames("pathName1 | pathName2"); + resolved.setPathAuthorships("auth1 | auth2"); resolved.setRank("resolvedRank"); resolved.setNameSource("resolvedCatalog"); resolved.setNameSourceURL("resolvedCatalogURL");