Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SO-2127 snomed rf2 importer issues #122

Merged
merged 30 commits into from
Jan 6, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
54b1ff8
SO-2127: fixing issues with RF2 importer
cmark Nov 24, 2016
3fe1824
SO-1782: store taxonomy defect information in data objects...
cmark Nov 25, 2016
0a46423
SO-2127: fix NPE when getting datatype for cd member rows
cmark Dec 6, 2016
c44cee9
SO-2127: skip cycle detection during import
cmark Dec 6, 2016
df2aa25
SO-2127: improve performance of in-memory ID service
cmark Dec 6, 2016
ce44373
SO-2127: extract relevant taxonomy information for nodes and edges...
cmark Dec 6, 2016
fb31a71
SO-2127: support bulk methods in import time component lookup impls
cmark Dec 6, 2016
7ebd046
SO-2127: refactor RF2 importer to support batch processing
cmark Dec 6, 2016
9613dd5
SO-2127: disable error reporting functionality
cmark Dec 6, 2016
923c222
SO-2127: batch RF2 processing improvements
cmark Dec 6, 2016
d38dcf8
SO-2127: get temp CDOResource outside the for loop (lang.refset import)
cmark Dec 6, 2016
0f9b4a1
SO-2127: remove unnecessary setId calls from member importers
cmark Dec 6, 2016
6de7228
Merge remote-tracking branch 'origin/develop' into issue/SO-2127-snom…
cmark Dec 7, 2016
51b9bc3
SO-2127: fix concrete domain member index entry building
cmark Dec 7, 2016
0331d77
SO-2127: use log message arguments instead of string concatenation
cmark Dec 7, 2016
c59ecd7
SO-2127: run index purge/optimize before creating version...
cmark Dec 7, 2016
9b96893
SO-2127: remove commit notification from notification queue
cmark Dec 8, 2016
a01efba
SO-2127: enqueue commit notifications when enabled in tx context
cmark Dec 8, 2016
59e33b5
SO-2127: remove temporary resource form lang. refset importer
cmark Dec 8, 2016
35eefec
SO-2127: change importer logger names to snomed.importer.rf2
cmark Dec 8, 2016
f7a3f56
SO-2127: reduce document load times during import
cmark Dec 9, 2016
8934bf7
SO-2127: rename constant from ON to TAB_SPLITTER
cmark Jan 4, 2017
07770db
SO-2127: fix comments in import time lookup services
cmark Jan 5, 2017
d2f5bce
SO-2127: include stack trace in log messages
cmark Jan 5, 2017
d3e48b6
SO-2127: use rows.clear instead of recreating List instance
cmark Jan 5, 2017
4105458
SO-2127: simplify processing of rows
cmark Jan 5, 2017
e3e04ed
SO-2127: rename component creation template methods
cmark Jan 5, 2017
efecf53
SO-2127: generate available identifiers in bulk load method for symmetry
cmark Jan 5, 2017
1b80174
SO-2127: avoid setting core component identifier twice
cmark Jan 5, 2017
b780da1
SO-2127: rename field extended to hasMapTargetDescription
cmark Jan 5, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
import static com.b2international.snowowl.snomed.importer.rf2.util.RF2ReleaseRefSetFileCollector.collectUrlFromRelease;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Maps.newHashMap;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
Expand All @@ -40,6 +42,7 @@
import java.util.Enumeration;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
Expand Down Expand Up @@ -111,6 +114,7 @@
import com.b2international.snowowl.snomed.importer.rf2.terminology.SnomedRelationshipImporter;
import com.b2international.snowowl.snomed.importer.rf2.validation.SnomedValidationContext;
import com.b2international.snowowl.terminologyregistry.core.request.CodeSystemRequests;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
Expand Down Expand Up @@ -515,23 +519,36 @@ private boolean isContentValid(final RepositoryState repositoryState,
@Override
public Boolean execute(RevisionSearcher index) throws IOException {
final SnomedValidationContext validator = new SnomedValidationContext(index, requestingUserId, configuration, IMPORT_LOGGER, repositoryState);
result.getValidationDefects().addAll(validator.validate(subMonitor.newChild(1)));
final Set<SnomedValidationDefect> defects = result.getValidationDefects();
defects.addAll(validator.validate(subMonitor.newChild(1)));

if (!isEmpty(result.getValidationDefects())) {
if (!isEmpty(defects)) {
// If only header differences exist, continue the import
final FluentIterable<String> defects = FluentIterable.from(result.getValidationDefects()).transformAndConcat(new Function<SnomedValidationDefect, Iterable<? extends String>>() {
@Override
public Iterable<? extends String> apply(SnomedValidationDefect input) {
return input.getDefects();
}
});

final String message = String.format("Validation encountered %s issue(s).", defects.size());
LogUtils.logImportActivity(IMPORT_LOGGER, requestingUserId, branchPath, message);
for (String defect : defects) {
LogUtils.logImportActivity(IMPORT_LOGGER, requestingUserId, branchPath, defect);

final String lineSeparator = System.getProperty("line.separator");
final Map<String, BufferedWriter> defectWriters = newHashMap();
try {
for (SnomedValidationDefect defect : defects) {
final String filePath = defect.getFileName();
final String defectsFile = String.format("d:/%s_%s_defects.txt", configuration.getArchiveFile().getName(), filePath);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard-coded file location above 👓

if (!defectWriters.containsKey(defectsFile)) {
defectWriters.put(defectsFile, Files.newWriter(new File(defectsFile), Charsets.UTF_8));
}
for (String lineDefect : defect.getDefects()) {
defectWriters.get(defectsFile).write(defect.getDefectType().name() + "\t" + lineDefect);
defectWriters.get(defectsFile).write(lineSeparator);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BufferedWriter exposes a newLine method, which uses the line.separator system property as well, so lineSeparator could be removed if it's not used elsewhere.

}
}
} finally {
for (BufferedWriter writer : defectWriters.values()) {
writer.close();
}
}

return !Iterables.tryFind(result.getValidationDefects(), new Predicate<SnomedValidationDefect>() {
return !Iterables.tryFind(defects, new Predicate<SnomedValidationDefect>() {
@Override
public boolean apply(SnomedValidationDefect input) {
return input.getDefectType().isCritical();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
*/
public class Rf2FileModifier {

private static final Splitter ON = Splitter.on('\t');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename this constant so that it indicates that it's a Splitter.


/**
* Splits the given RF2 files into RF2 file segments based on distinct effective times. The result will be a map
* where keys are the effective times and values are the RF2 files. These files are being created to the temporary
Expand Down Expand Up @@ -66,7 +68,7 @@ public static Map<String, File> split(final File toSplit) throws IOException {
continue;
}

final List<String> values = Splitter.on('\t').splitToList(line);
final List<String> values = ON.splitToList(line);
final String effectiveTime = values.get(1);

final PrintWriter effectiveTimeWriter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,9 @@ private Path getEffectiveTimeFile(String effectiveTimeKey) {
* @param monitor the SubMonitor instance to report progress on
*/
protected void doValidate(final String effectiveTime, IProgressMonitor monitor) {
if (!effectiveTimes.contains(effectiveTime)) return;

if (!effectiveTimes.contains(effectiveTime)) {
return;
}
final Stopwatch watch = Stopwatch.createStarted();
final String effectiveTimeMessage = effectiveTime.length() == 0 ? "Unpublished" : effectiveTime;
final String message = String.format("Validating %s file in '%s'...", importType.getDisplayName(), effectiveTimeMessage);
Expand Down Expand Up @@ -249,7 +250,7 @@ protected void postValidate(final SubMonitor monitor) {
}

protected void addDefect(final DefectType type, String...defects) {
this.validationContext.addDefect(type, defects);
this.validationContext.addDefect(releaseFileName, type, defects);
}

/**
Expand All @@ -258,7 +259,7 @@ protected void addDefect(final DefectType type, String...defects) {
* @param validationDefect the defect to be added
*/
protected void addDefect(final DefectType type, Iterable<String> defects) {
this.validationContext.addDefect(type, defects);
this.validationContext.addDefect(releaseFileName, type, defects);
}

/**
Expand Down Expand Up @@ -348,7 +349,7 @@ protected boolean isComponentActive(final String componentId) {
}

protected void registerComponent(ComponentCategory category, String componentId, boolean status) throws AlreadyExistsException {
validationContext.registerComponent(category, componentId, status);
validationContext.registerComponent(releaseFileName, category, componentId, status);
}

/**
Expand Down Expand Up @@ -422,7 +423,7 @@ private void validateModuleId(final List<String> row, final int lineNumber) {
}

private void validateEffectiveTime(String effectiveTime, final int lineNumber) {
if (ContentSubType.DELTA.equals(configuration.getVersion()) && effectiveTime.isEmpty()) {
if (effectiveTime.isEmpty()) {
return;
}
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ protected void doValidate(String effectiveTime, IProgressMonitor monitor) {
addDefect(DefectType.DESCRIPTION_TYPE_NOT_EXIST, typeConceptNotExist);
addDefect(DefectType.DESCRIPTION_CASE_SIGNIFICANCE_NOT_EXIST, caseSignificanceConceptNotExist);

descriptionIdsWithEffectivetimeStatus.clear();
fullySpecifiedNames.clear();
descriptionIdNotUnique.clear();
fullySpecifiedNameNotUnique.clear();
descriptionConceptNotExist.clear();
typeConceptNotExist.clear();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ protected void doValidate(String effectiveTime, IProgressMonitor monitor) {
addDefect(DefectType.REFSET_MEMBER_COMPONENT_NOT_EXIST, referencedComponentNotExist);
uuidInvalid.clear();
referencedComponentNotExist.clear();
memberDataByUuid.clear();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uuidNotUnique is not cleared here. Do all fields need to be cleared in all validators after checking an effectiveTime layer? If not, please revisit doValidate methods and check if the appropriate fields are cleared.

Copy link
Member Author

@cmark cmark Jan 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It depends on the field whether it needs to be cleared or not after doValidate(). uuidNotUnique seems to be used to report ID uniqueness issues independent of the effective time.

}

/**returns with the proper import component type based on the component ID argument.*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.b2international.snowowl.snomed.importer.rf2.validation;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
Expand Down Expand Up @@ -43,8 +44,8 @@ public class SnomedRelationshipValidator extends AbstractSnomedValidator {
private Collection<String> relationshipSourceAndDestinationAreEqual = Sets.newHashSet();
private Collection<String> missingReferencedConcepts = Sets.newHashSet();

public SnomedRelationshipValidator(final ImportConfiguration configuration, final SnomedValidationContext context) throws IOException {
super(configuration, configuration.toURL(configuration.getRelationshipsFile()), ComponentImportType.RELATIONSHIP, context, SnomedRf2Headers.RELATIONSHIP_HEADER);
public SnomedRelationshipValidator(final ImportConfiguration configuration, final SnomedValidationContext context, final File relationshipsFile) throws IOException {
super(configuration, configuration.toURL(relationshipsFile), ComponentImportType.RELATIONSHIP, context, SnomedRf2Headers.RELATIONSHIP_HEADER);
}

@Override
Expand Down Expand Up @@ -87,6 +88,8 @@ protected void doValidate(String effectiveTime, IProgressMonitor monitor) {
addDefect(DefectType.RELATIONSHIP_SOURCE_DESTINATION_EQUALS, relationshipSourceAndDestinationAreEqual);
addDefect(DefectType.RELATIONSHIP_REFERENCED_INVALID_CONCEPT, missingReferencedConcepts);

relationshipIdsWithEffectivetimeStatus.clear();
relationshipIdNotUnique.clear();
relationshipSourceAndDestinationAreEqual.clear();
missingReferencedConcepts.clear();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import com.b2international.snowowl.datastore.server.snomed.index.init.Rf2BasedSnomedTaxonomyBuilder;
import com.b2international.snowowl.snomed.SnomedConstants.Concepts;
import com.b2international.snowowl.snomed.datastore.index.entry.SnomedRelationshipIndexEntry;
import com.b2international.snowowl.snomed.datastore.taxonomy.IncompleteTaxonomyException;
import com.b2international.snowowl.snomed.datastore.taxonomy.InvalidRelationship;
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyBuilder;
import com.b2international.snowowl.snomed.datastore.taxonomy.SnomedTaxonomyBuilderResult;
Expand All @@ -45,9 +44,10 @@
import com.b2international.snowowl.snomed.importer.net4j.SnomedValidationDefect;
import com.b2international.snowowl.snomed.importer.rf2.RepositoryState;
import com.b2international.snowowl.snomed.importer.rf2.util.Rf2FileModifier;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;

/**
* Class for validating the taxonomy of active concepts and active IS_A relationships.
Expand Down Expand Up @@ -113,6 +113,9 @@ public Collection<SnomedValidationDefect> validate() {
private Collection<SnomedValidationDefect> doValidate() {
try {
final Rf2BasedSnomedTaxonomyBuilder builder = Rf2BasedSnomedTaxonomyBuilder.newInstance(new SnomedTaxonomyBuilder(conceptIds, statements), characteristicType);

final Multimap<String, InvalidRelationship> invalidRelationships = ArrayListMultimap.create();

if (snapshot) {

LOGGER.info("Validating SNOMED CT ontology based on the given RF2 release files...");
Expand All @@ -130,7 +133,7 @@ private Collection<SnomedValidationDefect> doValidate() {

final SnomedTaxonomyBuilderResult result = builder.build();
if (!result.getStatus().isOK()) {
throw new IncompleteTaxonomyException(Lists.newArrayList(result.getInvalidRelationships()));
invalidRelationships.putAll("", result.getInvalidRelationships());
}
} else {

Expand All @@ -145,6 +148,7 @@ private Collection<SnomedValidationDefect> doValidate() {
.build()
.asList();


for (final String effectiveTime : effectiveTimes) {
LOGGER.info("Validating concepts and relationships from '" + effectiveTime + "'...");

Expand All @@ -153,54 +157,63 @@ private Collection<SnomedValidationDefect> doValidate() {

builder.applyNodeChanges(getFilePath(conceptFile));
builder.applyEdgeChanges(getFilePath(relationshipFile));
builder.build();
final SnomedTaxonomyBuilderResult result = builder.build();
if (!result.getStatus().isOK()) {
invalidRelationships.putAll(effectiveTime, result.getInvalidRelationships());
}
}
}

} catch (final IOException e) {
LOGGER.error("Validation failed with an IOException.", e);
return Collections.<SnomedValidationDefect>singleton(new SnomedValidationDefect(DefectType.IO_PROBLEM, Collections.<String>emptySet()));
} catch (final IncompleteTaxonomyException e) {
LOGGER.error("Validation failed.");
final Collection<String> defects = newHashSet();
final Collection<String> conceptIdsToInactivate = newHashSet();
for (final InvalidRelationship invalidRelationship: e.getInvalidRelationships()) {
final String sourceId = Long.toString(invalidRelationship.getSourceId());
final String destinationId = Long.toString(invalidRelationship.getDestinationId());

if (conceptIds.contains(invalidRelationship.getDestinationId())) {
conceptIdsToInactivate.add(destinationId);
}
if (!invalidRelationships.isEmpty()) {
final Collection<String> defects = newHashSet();
final Collection<String> conceptIdsToInactivate = newHashSet();

if (conceptIds.contains(invalidRelationship.getSourceId())) {
conceptIdsToInactivate.add(sourceId);
for (final String effectiveTime : invalidRelationships.keySet()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that ArrayListMultimap's key set might not preserve the ordering used in processTaxonomy. (Not sure if we are relying on it here.)

for (final InvalidRelationship invalidRelationship: invalidRelationships.get(effectiveTime)) {
final String sourceId = Long.toString(invalidRelationship.getSourceId());
final String destinationId = Long.toString(invalidRelationship.getDestinationId());

if (conceptIds.contains(invalidRelationship.getDestinationId())) {
conceptIdsToInactivate.add(destinationId);
}

if (conceptIds.contains(invalidRelationship.getSourceId())) {
conceptIdsToInactivate.add(sourceId);
}

final StringBuilder sb = new StringBuilder();
sb.append("IS A relationship");
sb.append(" '" + invalidRelationship.getRelationshipId() + "'");
sb.append(" has a missing or inactive ");

switch (invalidRelationship.getMissingConcept()) {
case DESTINATION:
sb.append("destination concept");
sb.append(" '" + destinationId);
break;
case SOURCE:
sb.append("source concept");
sb.append(" '" + sourceId);
break;
default:
throw new IllegalStateException("Unexpected missing concept type '" + invalidRelationship.getMissingConcept() + "'.");
}

sb.append("' in effectiveTime ");
sb.append("".equals(effectiveTime) ? "Unpublished/Undefined" : effectiveTime);
sb.append("'.");

defects.add(sb.toString());
}
}

final StringBuilder sb = new StringBuilder();
sb.append("IS A relationship");
sb.append(" '" + invalidRelationship.getRelationshipId() + "'");
sb.append(" has a missing or inactive ");

switch (invalidRelationship.getMissingConcept()) {
case DESTINATION:
sb.append("destination concept");
sb.append(" '" + destinationId);
break;
case SOURCE:
sb.append("source concept");
sb.append(" '" + sourceId);
break;
default:
throw new IllegalStateException("Unexpected missing concept type '" + invalidRelationship.getMissingConcept() + "'.");
}

sb.append("'.");

defects.add(sb.toString());
final SnomedValidationDefect defect = new SnomedIncompleteTaxonomyValidationDefect(relationshipsFile.getName(), defects, conceptIdsToInactivate);
return Collections.<SnomedValidationDefect>singleton(defect);
}

final SnomedValidationDefect defect = new SnomedIncompleteTaxonomyValidationDefect(defects, conceptIdsToInactivate);
return Collections.<SnomedValidationDefect>singleton(defect);
} catch (final IOException e) {
LOGGER.error("Validation failed.", e);
return Collections.<SnomedValidationDefect>singleton(new SnomedValidationDefect(relationshipsFile.getName(), DefectType.IO_PROBLEM, Collections.<String>emptySet()));
}

LOGGER.info("SNOMED CT ontology validation successfully finished. No errors were found.");
Expand Down