-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SO-2127 snomed rf2 importer issues #122
Changes from 1 commit
54b1ff8
3fe1824
0a46423
c44cee9
df2aa25
ce44373
fb31a71
7ebd046
9613dd5
923c222
d38dcf8
0f9b4a1
6de7228
51b9bc3
0331d77
c59ecd7
9b96893
a01efba
59e33b5
35eefec
f7a3f56
8934bf7
07770db
d2f5bce
d3e48b6
4105458
e3e04ed
efecf53
1b80174
b780da1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,14 +16,15 @@ | |
package com.b2international.snowowl.snomed.importer.rf2.validation; | ||
|
||
import static com.b2international.snowowl.snomed.common.ContentSubType.SNAPSHOT; | ||
import static com.google.common.collect.Sets.newHashSet; | ||
import static com.google.common.collect.Lists.newArrayListWithExpectedSize; | ||
import static java.util.Collections.emptySet; | ||
import static org.slf4j.LoggerFactory.getLogger; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
|
@@ -36,12 +37,14 @@ | |
import com.b2international.snowowl.snomed.SnomedConstants.Concepts; | ||
import com.b2international.snowowl.snomed.datastore.index.entry.SnomedRelationshipIndexEntry; | ||
import com.b2international.snowowl.snomed.datastore.taxonomy.InvalidRelationship; | ||
import com.b2international.snowowl.snomed.datastore.taxonomy.InvalidRelationship.MissingConcept; | ||
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyBuilder; | ||
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyBuilderResult; | ||
import com.b2international.snowowl.snomed.importer.net4j.DefectType; | ||
import com.b2international.snowowl.snomed.importer.net4j.ImportConfiguration; | ||
import com.b2international.snowowl.snomed.importer.net4j.SnomedIncompleteTaxonomyValidationDefect; | ||
import com.b2international.snowowl.snomed.importer.net4j.SnomedValidationDefect; | ||
import com.b2international.snowowl.snomed.importer.net4j.TaxonomyDefect; | ||
import com.b2international.snowowl.snomed.importer.rf2.RepositoryState; | ||
import com.b2international.snowowl.snomed.importer.rf2.util.Rf2FileModifier; | ||
import com.google.common.collect.ArrayListMultimap; | ||
|
@@ -56,6 +59,19 @@ | |
public class SnomedTaxonomyValidator { | ||
|
||
private static final Logger LOGGER = getLogger(SnomedTaxonomyValidator.class); | ||
|
||
private static final Comparator<String> EFFECTIVE_TIME_COMPARATOR = new Comparator<String>() { | ||
@Override | ||
public int compare(String o1, String o2) { | ||
// consider empty greater than non-empty | ||
if (o1.isEmpty() && !o2.isEmpty()) { | ||
return 1; | ||
} else if (!o1.isEmpty() && o2.isEmpty()) { | ||
return -1; | ||
} | ||
return o1.compareTo(o2); | ||
} | ||
}; | ||
|
||
// new RF2 state | ||
private final File conceptsFile; | ||
|
@@ -112,105 +128,17 @@ public Collection<SnomedValidationDefect> validate() { | |
*/ | ||
private Collection<SnomedValidationDefect> doValidate() { | ||
try { | ||
final Rf2BasedSnomedTaxonomyBuilder builder = Rf2BasedSnomedTaxonomyBuilder.newInstance(new SnomedTaxonomyBuilder(conceptIds, statements), characteristicType); | ||
|
||
final Multimap<String, InvalidRelationship> invalidRelationships = ArrayListMultimap.create(); | ||
|
||
if (snapshot) { | ||
|
||
LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files..."); | ||
|
||
|
||
if (hasConceptImport()) { | ||
final String conceptFilePath = removeConceptHeader(); | ||
builder.applyNodeChanges(conceptFilePath); | ||
} | ||
|
||
if (hasRelationshipImport()) { | ||
final String relationshipFilePath = removeRelationshipHeader(); | ||
builder.applyEdgeChanges(relationshipFilePath); | ||
} | ||
|
||
final SnomedTaxonomyBuilderResult result = builder.build(); | ||
if (!result.getStatus().isOK()) { | ||
invalidRelationships.putAll("", result.getInvalidRelationships()); | ||
} | ||
} else { | ||
|
||
LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files..."); | ||
|
||
final Map<String, File> conceptFiles = hasConceptImport() ? Rf2FileModifier.split(conceptsFile) : ImmutableMap.<String, File>of(); | ||
final Map<String, File> relationshipFiles = hasRelationshipImport() ? Rf2FileModifier.split(relationshipsFile) : ImmutableMap.<String, File>of(); | ||
|
||
final List<String> effectiveTimes = ImmutableSortedSet.<String>naturalOrder() | ||
.addAll(conceptFiles.keySet()) | ||
.addAll(relationshipFiles.keySet()) | ||
.build() | ||
.asList(); | ||
|
||
|
||
for (final String effectiveTime : effectiveTimes) { | ||
LOGGER.info("Validating concepts and relationships from '" + effectiveTime + "'..."); | ||
|
||
final File conceptFile = conceptFiles.get(effectiveTime); | ||
final File relationshipFile = relationshipFiles.get(effectiveTime); | ||
|
||
builder.applyNodeChanges(getFilePath(conceptFile)); | ||
builder.applyEdgeChanges(getFilePath(relationshipFile)); | ||
final SnomedTaxonomyBuilderResult result = builder.build(); | ||
if (!result.getStatus().isOK()) { | ||
invalidRelationships.putAll(effectiveTime, result.getInvalidRelationships()); | ||
} | ||
} | ||
} | ||
|
||
final Multimap<String, InvalidRelationship> invalidRelationships = processTaxonomy(); | ||
if (!invalidRelationships.isEmpty()) { | ||
final Collection<String> defects = newHashSet(); | ||
final Collection<String> conceptIdsToInactivate = newHashSet(); | ||
|
||
final Collection<TaxonomyDefect> defects = newArrayListWithExpectedSize(invalidRelationships.size()); | ||
for (final String effectiveTime : invalidRelationships.keySet()) { | ||
for (final InvalidRelationship invalidRelationship: invalidRelationships.get(effectiveTime)) { | ||
final String sourceId = Long.toString(invalidRelationship.getSourceId()); | ||
final String destinationId = Long.toString(invalidRelationship.getDestinationId()); | ||
|
||
if (conceptIds.contains(invalidRelationship.getDestinationId())) { | ||
conceptIdsToInactivate.add(destinationId); | ||
} | ||
|
||
if (conceptIds.contains(invalidRelationship.getSourceId())) { | ||
conceptIdsToInactivate.add(sourceId); | ||
} | ||
|
||
final StringBuilder sb = new StringBuilder(); | ||
sb.append("IS A relationship"); | ||
sb.append(" '" + invalidRelationship.getRelationshipId() + "'"); | ||
sb.append(" has a missing or inactive "); | ||
|
||
switch (invalidRelationship.getMissingConcept()) { | ||
case DESTINATION: | ||
sb.append("destination concept"); | ||
sb.append(" '" + destinationId); | ||
break; | ||
case SOURCE: | ||
sb.append("source concept"); | ||
sb.append(" '" + sourceId); | ||
break; | ||
default: | ||
throw new IllegalStateException("Unexpected missing concept type '" + invalidRelationship.getMissingConcept() + "'."); | ||
} | ||
|
||
sb.append("' in effectiveTime "); | ||
sb.append("".equals(effectiveTime) ? "Unpublished/Undefined" : effectiveTime); | ||
sb.append("'."); | ||
|
||
defects.add(sb.toString()); | ||
defects.add(new TaxonomyDefect(invalidRelationship.getRelationshipId(), effectiveTime, invalidRelationship.getMissingConcept() == MissingConcept.DESTINATION ? TaxonomyDefect.Type.MISSING_DESTINATION : TaxonomyDefect.Type.MISSING_SOURCE, invalidRelationship.getMissingConceptId())); | ||
} | ||
} | ||
|
||
final SnomedValidationDefect defect = new SnomedIncompleteTaxonomyValidationDefect(relationshipsFile.getName(), defects, conceptIdsToInactivate); | ||
return Collections.<SnomedValidationDefect>singleton(defect); | ||
return Collections.singleton(new SnomedIncompleteTaxonomyValidationDefect(relationshipsFile.getName(), defects)); | ||
} | ||
|
||
} catch (final IOException e) { | ||
LOGGER.error("Validation failed.", e); | ||
return Collections.<SnomedValidationDefect>singleton(new SnomedValidationDefect(relationshipsFile.getName(), DefectType.IO_PROBLEM, Collections.<String>emptySet())); | ||
|
@@ -220,6 +148,59 @@ private Collection<SnomedValidationDefect> doValidate() { | |
return emptySet(); | ||
} | ||
|
||
private Multimap<String, InvalidRelationship> processTaxonomy() throws IOException { | ||
final Rf2BasedSnomedTaxonomyBuilder builder = Rf2BasedSnomedTaxonomyBuilder.newInstance(new SnomedTaxonomyBuilder(conceptIds, statements), characteristicType); | ||
final Multimap<String, InvalidRelationship> invalidRelationships = ArrayListMultimap.create(); | ||
if (snapshot) { | ||
|
||
LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files..."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This output could be changed slightly, so that we know immediately that one or the other branch was hit (snapshot or non-snapshot validation); in this case, we don't have to check whether additional lines appear after this one saying |
||
|
||
|
||
if (hasConceptImport()) { | ||
final String conceptFilePath = removeConceptHeader(); | ||
builder.applyNodeChanges(conceptFilePath); | ||
} | ||
|
||
if (hasRelationshipImport()) { | ||
final String relationshipFilePath = removeRelationshipHeader(); | ||
builder.applyEdgeChanges(relationshipFilePath); | ||
} | ||
|
||
final SnomedTaxonomyBuilderResult result = builder.build(); | ||
if (!result.getStatus().isOK()) { | ||
invalidRelationships.putAll("", result.getInvalidRelationships()); | ||
} | ||
} else { | ||
|
||
LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files..."); | ||
|
||
final Map<String, File> conceptFiles = hasConceptImport() ? Rf2FileModifier.split(conceptsFile) : ImmutableMap.<String, File>of(); | ||
final Map<String, File> relationshipFiles = hasRelationshipImport() ? Rf2FileModifier.split(relationshipsFile) : ImmutableMap.<String, File>of(); | ||
|
||
final List<String> effectiveTimes = ImmutableSortedSet.orderedBy(EFFECTIVE_TIME_COMPARATOR) | ||
.addAll(conceptFiles.keySet()) | ||
.addAll(relationshipFiles.keySet()) | ||
.build() | ||
.asList(); | ||
|
||
|
||
for (final String effectiveTime : effectiveTimes) { | ||
LOGGER.info("Validating taxonomy in '{}'...", effectiveTime); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some extra formatting for the "unpublished" effective time would be nice ✨ |
||
|
||
final File conceptFile = conceptFiles.get(effectiveTime); | ||
final File relationshipFile = relationshipFiles.get(effectiveTime); | ||
|
||
builder.applyNodeChanges(getFilePath(conceptFile)); | ||
builder.applyEdgeChanges(getFilePath(relationshipFile)); | ||
final SnomedTaxonomyBuilderResult result = builder.build(); | ||
if (!result.getStatus().isOK()) { | ||
invalidRelationships.putAll(effectiveTime, result.getInvalidRelationships()); | ||
} | ||
} | ||
} | ||
return invalidRelationships; | ||
} | ||
|
||
private String getFilePath(@Nullable final File file) { | ||
return null == file ? null : file.getPath(); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that
ArrayListMultimap
's key set might not preserve the ordering used inprocessTaxonomy
. (Not sure if we are relying on it here.)