Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SO-2127 snomed rf2 importer issues #122

Merged
merged 30 commits into from
Jan 6, 2017
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
54b1ff8
SO-2127: fixing issues with RF2 importer
cmark Nov 24, 2016
3fe1824
SO-1782: store taxonomy defect information in data objects...
cmark Nov 25, 2016
0a46423
SO-2127: fix NPE when getting datatype for cd member rows
cmark Dec 6, 2016
c44cee9
SO-2127: skip cycle detection during import
cmark Dec 6, 2016
df2aa25
SO-2127: improve performance of in-memory ID service
cmark Dec 6, 2016
ce44373
SO-2127: extract relevant taxonomy information for nodes and edges...
cmark Dec 6, 2016
fb31a71
SO-2127: support bulk methods in import time component lookup impls
cmark Dec 6, 2016
7ebd046
SO-2127: refactor RF2 importer to support batch processing
cmark Dec 6, 2016
9613dd5
SO-2127: disable error reporting functionality
cmark Dec 6, 2016
923c222
SO-2127: batch RF2 processing improvements
cmark Dec 6, 2016
d38dcf8
SO-2127: get temp CDOResource outside the for loop (lang.refset import)
cmark Dec 6, 2016
0f9b4a1
SO-2127: remove unnecessary setId calls from member importers
cmark Dec 6, 2016
6de7228
Merge remote-tracking branch 'origin/develop' into issue/SO-2127-snom…
cmark Dec 7, 2016
51b9bc3
SO-2127: fix concrete domain member index entry building
cmark Dec 7, 2016
0331d77
SO-2127: use log message arguments instead of string concatenation
cmark Dec 7, 2016
c59ecd7
SO-2127: run index purge/optimize before creating version...
cmark Dec 7, 2016
9b96893
SO-2127: remove commit notification from notification queue
cmark Dec 8, 2016
a01efba
SO-2127: enqueue commit notifications when enabled in tx context
cmark Dec 8, 2016
59e33b5
SO-2127: remove temporary resource form lang. refset importer
cmark Dec 8, 2016
35eefec
SO-2127: change importer logger names to snomed.importer.rf2
cmark Dec 8, 2016
f7a3f56
SO-2127: reduce document load times during import
cmark Dec 9, 2016
8934bf7
SO-2127: rename constant from ON to TAB_SPLITTER
cmark Jan 4, 2017
07770db
SO-2127: fix comments in import time lookup services
cmark Jan 5, 2017
d2f5bce
SO-2127: include stack trace in log messages
cmark Jan 5, 2017
d3e48b6
SO-2127: use rows.clear instead of recreating List instance
cmark Jan 5, 2017
4105458
SO-2127: simplify processing of rows
cmark Jan 5, 2017
e3e04ed
SO-2127: rename component creation template methods
cmark Jan 5, 2017
efecf53
SO-2127: generate available identifiers in bulk load method for symmetry
cmark Jan 5, 2017
1b80174
SO-2127: avoid setting core component identifier twice
cmark Jan 5, 2017
b780da1
SO-2127: rename field extended to hasMapTargetDescription
cmark Jan 5, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@
package com.b2international.snowowl.snomed.datastore.id.memory;

import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Lists.newArrayListWithExpectedSize;
import static com.google.common.collect.Sets.newHashSet;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.eclipse.emf.cdo.spi.server.Store;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -213,18 +216,13 @@ public void register(final Collection<String> componentIds) {
LOGGER.debug(String.format("Registering %d component IDs.", componentIds.size()));

try {
final Collection<String> existingIds = getSctIds(componentIds).stream().filter(id -> id.matches(IdentifierStatus.AVAILABLE, IdentifierStatus.RESERVED)).map(SctId::getSctid).collect(Collectors.toSet());

for (final String componentId : componentIds) {
if (!existingIds.contains(componentId)) {
final SctId sctId = buildSctId(componentId, IdentifierStatus.ASSIGNED);
sctIds.put(componentId, sctId);
registeredComponentIds.add(componentId);
for (final SctId sctId : getSctIds(componentIds)) {
if (!sctId.matches(IdentifierStatus.AVAILABLE, IdentifierStatus.RESERVED)) {
LOGGER.warn("Cannot register ID {} as it is already present with status {}.", sctId.getSctid(), sctId.getStatus());
} else {
LOGGER.warn("Cannot register ID {} as it is already taken.", componentId);
sctId.setStatus(IdentifierStatus.ASSIGNED.getSerializedName());
}
}

putSctIds(sctIds);
} catch (Exception e) {
// remove the registered component IDs
Expand Down Expand Up @@ -307,7 +305,20 @@ public void publish(final Collection<String> componentIds) {

@Override
public Collection<SctId> getSctIds(final Collection<String> componentIds) {
return store.get().read(index -> index.search(Query.select(SctId.class).where(Expressions.matchAny(DocumentMapping._ID, componentIds)).limit(componentIds.size()).build()).getHits());
final List<SctId> existingIds = store.get().read(index -> index.search(Query.select(SctId.class).where(Expressions.matchAny(DocumentMapping._ID, componentIds)).limit(componentIds.size()).build()).getHits());
if (existingIds.size() == componentIds.size()) {
return existingIds;
} else {
final Set<String> existingIdentifiers = existingIds.stream().map(SctId::getSctid).collect(Collectors.toSet());
final List<SctId> ids = newArrayListWithExpectedSize(componentIds.size());
ids.addAll(existingIds);
for (String componentId : componentIds) {
if (!existingIdentifiers.contains(componentId)) {
ids.add(buildSctId(componentId, IdentifierStatus.AVAILABLE));
}
}
return ids;
}
}

private String generateId(final String namespace, final ComponentCategory category) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ private CellProcessor[] createValidatingCellProcessors() {
}

private ImportAction handlePreImportException(final SuperCSVException e) {
log("Exception caught while reading release file. Continuing with next row. {}", e.getMessage());
log("Exception caught while reading release file. Continuing with next row.", e);
importContext.getLogger().warn("Exception caught while reading release file. Continuing with next row.", e);
return ImportAction.CONTINUE;
}
Expand Down Expand Up @@ -437,7 +437,7 @@ private ComponentImportEntry getOrCreateImportEntry(final Map<String, ComponentI
try {
sliceStream = new FileOutputStream(sliceFile);
} catch (final FileNotFoundException e) {
log("SNOMED CT import failed. Couldn't open output file '{}' for writing. {}", sliceFile.getAbsolutePath(), e.getMessage());
log("SNOMED CT import failed. Couldn't open output file '{}' for writing.", sliceFile.getAbsolutePath(), e);
throw new ImportException("Couldn't open output file '" + sliceFile.getAbsolutePath() + "' for writing.", e);
}

Expand Down Expand Up @@ -534,7 +534,7 @@ public final void doImport(final SubMonitor subMonitor, final AbstractImportUnit
// process batch loaded rows and commit them
importRows(rows);
// reinit rows array
rows = Lists.newArrayListWithExpectedSize(COMMIT_EVERY_NUM_ELEMENTS);
rows.clear();
if (ImportAction.BREAK.equals(commit(subMonitor, effectiveTimeKey))) {
break;
}
Expand All @@ -557,8 +557,9 @@ protected int getImportWorkUnits(final int recordCount) {
}

private ImportAction handleImportException(final Throwable e) {
log("Exception caught while importing row from release file. Continuing with next row. {}", e.getMessage());
importContext.getLogger().warn("Exception caught while importing row from release file. Continuing with next row.", e);
final String message = "Exception caught while importing row from release file. Continuing with next row.";
log(message, e);
importContext.getLogger().warn(message, e);
return ImportAction.CONTINUE;
}

Expand Down Expand Up @@ -633,13 +634,13 @@ protected void preCommit(final InternalCDOTransaction transaction) throws Snowow
}

private ImportAction checkCommitException(final SnowowlServiceException e) {
log("SNOMED CT import failed. Caught exception while import, aborting. {}", e.getMessage());
log("SNOMED CT import failed. Caught exception while import, aborting. {}", e);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The placeholder should be removed from this format string.

importContext.getLogger().warn("Caught exception while import, aborting.", e);
return ImportAction.BREAK;
}

private ImportAction checkCommitException(final CommitException e) {
log("SNOMED CT import failed. Caught exception while import, aborting. {}", e.getMessage());
log("SNOMED CT import failed. Caught exception while import, aborting. {}", e);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(also here)

importContext.getLogger().warn("Caught exception while import, aborting.", e);
handleCommitException();
return ImportAction.BREAK;
Expand All @@ -665,10 +666,9 @@ private final void importRows(List<T> rows) {
final Map<String, C> existingComponents = loadComponents(rowsToImport.keySet()).stream().collect(Collectors.toMap(getComponentIdMapper(), c -> c));
// create or update components
final Collection<C> componentsToAttach = newHashSet();
for (Entry<String, T> rowToImport : rowsToImport.entrySet()) {
final String componentId = rowToImport.getKey();
for (final String componentId : rowsToImport.keySet()) {
final T row = rowsToImport.get(componentId);
C component = existingComponents.get(componentId);
final T row = rowToImport.getValue();
if (component == null) {
// XXX some RF2 rows might already introduced the component with just the ID
component = getOrCreateNew(componentId, componentsToAttach);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,12 @@ protected void registerNewComponent(M component) {

@Override
protected final M createComponent(String memberId) {
final M member = createComponent();
final M member = createMember();
member.setUuid(memberId);
return member;
}

protected abstract M createComponent();
protected abstract M createMember();

protected SnomedRefSet getOrCreateRefSet(final String refSetSctId, final String referencedComponentId) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ private LongValueMap<String> getStorageKeys(final Collection<String> componentId
@Override
public LongValueMap<String> execute(RevisionSearcher index) throws IOException {
final LongValueMap<String> map = PrimitiveMaps.newObjectKeyLongOpenHashMapWithExpectedSize(componentIds.size());
// index componentIds by their category
final Query<SnomedRefSetMemberIndexEntry> query = Query.select(SnomedRefSetMemberIndexEntry.class)
.where(SnomedRefSetMemberIndexEntry.Expressions.ids(componentIds))
.limit(componentIds.size())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ protected SnomedRefSetType getRefSetType() {
}

@Override
protected SnomedAssociationRefSetMember createComponent() {
protected SnomedAssociationRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedAssociationRefSetMember();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedAttributeValueRefSetMember createComponent() {
protected SnomedAttributeValueRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedAttributeValueRefSetMember();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedComplexMapRefSetMember createComponent() {
protected SnomedComplexMapRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedComplexMapRefSetMember();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedConcreteDataTypeRefSetMember createComponent() {
protected SnomedConcreteDataTypeRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedConcreteDataTypeRefSetMember();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedDescriptionTypeRefSetMember createComponent() {
protected SnomedDescriptionTypeRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedDescriptionTypeRefSetMember();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedLanguageRefSetMember createComponent() {
protected SnomedLanguageRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedLanguageRefSetMember();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ protected String getIdentifierParentConceptId(String refSetId) {
}

@Override
protected SnomedModuleDependencyRefSetMember createComponent() {
protected SnomedModuleDependencyRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedModuleDependencyRefSetMember();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedQueryRefSetMember createComponent() {
protected SnomedQueryRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedQueryRefSetMember();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ public class SnomedSimpleMapTypeRefSetImporter extends AbstractSnomedMapTypeRefS
.add(new IndexConfiguration("SNOMEDREFSET_SNOMEDSIMPLEMAPREFSETMEMBER_IDX1003", "SNOMEDREFSET_SNOMEDSIMPLEMAPREFSETMEMBER", "UUID/*!(255)*/", "CDO_BRANCH", "CDO_VERSION"))
.add(new IndexConfiguration("SNOMEDREFSET_SNOMEDSIMPLEMAPREFSETMEMBER_IDX1004", "SNOMEDREFSET_SNOMEDSIMPLEMAPREFSETMEMBER", "MAPTARGETCOMPONENTID/*!(255)*/", "CDO_BRANCH", "CDO_VERSION"))
.build();
private boolean extended;
private boolean hasMapTargetDescription;

public SnomedSimpleMapTypeRefSetImporter(final SnomedImportContext importContext, final InputStream releaseFileStream, boolean extended, final String releaseFileIdentifier) {
super(createImportConfiguration(extended), importContext, releaseFileStream, releaseFileIdentifier);
this.extended = extended;
public SnomedSimpleMapTypeRefSetImporter(final SnomedImportContext importContext, final InputStream releaseFileStream, boolean hasMapTargetDescription, final String releaseFileIdentifier) {
super(createImportConfiguration(hasMapTargetDescription), importContext, releaseFileStream, releaseFileIdentifier);
this.hasMapTargetDescription = hasMapTargetDescription;
}

private static SnomedImportConfiguration<SimpleMapRefSetRow> createImportConfiguration(boolean extended) {
Expand Down Expand Up @@ -109,7 +109,7 @@ protected void applyRow(SnomedSimpleMapRefSetMember member, SimpleMapRefSetRow r
member.setReferencedComponentId(row.getReferencedComponentId());
member.setMapTargetComponentId(row.getAssociatedComponentId());

if (extended) {
if (hasMapTargetDescription) {
member.setMapTargetComponentDescription(((SimpleMapRefSetRow) row).getMapTargetDescription());
}
}
Expand All @@ -120,7 +120,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedSimpleMapRefSetMember createComponent() {
protected SnomedSimpleMapRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedSimpleMapRefSetMember();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ protected String getIdentifierParentConceptId(final String refSetId) {
}

@Override
protected SnomedRefSetMember createComponent() {
protected SnomedRefSetMember createMember() {
return SnomedRefSetFactory.eINSTANCE.createSnomedRefSetMember();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ protected final Function<C, String> getComponentIdMapper() {

@Override
protected C createComponent(String componentId) {
final C component = createComponent();
final C component = createCoreComponent();
component.setId(componentId);
return component;
}

protected abstract C createComponent();
protected abstract C createCoreComponent();

protected final Concept getConceptSafe(final String conceptId, final String conceptField, final String componentId) {
final Concept result = (Concept) Iterables.getOnlyElement(getComponents(Collections.singleton(conceptId)), null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ public LongValueMap<String> execute(RevisionSearcher index) throws IOException {
// index componentIds by their category
final Multimap<Class<? extends SnomedDocument>, String> idsByType = Multimaps.index(componentIds, id -> SnomedDocument.getType(SnomedIdentifiers.getComponentCategory(id)));
for (Class<? extends SnomedDocument> type : idsByType.keySet()) {
// execute queries for each type and based on the current clazz extract either refset or concept storage keys
final Query<? extends SnomedDocument> query = Query.select(type).where(SnomedDocument.Expressions.ids(componentIds)).limit(componentIds.size()).build();
final Hits<? extends SnomedDocument> hits = index.search(query);
for (SnomedDocument doc : hits) {
Expand All @@ -181,7 +182,6 @@ public LongValueMap<String> execute(RevisionSearcher index) throws IOException {
}
}
}
// execute queries for each type
return map;
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ protected Class<? extends SnomedDocument> getType() {
}

@Override
protected Concept createComponent() {
protected Concept createCoreComponent() {
return SnomedFactory.eINSTANCE.createConcept();
}

Expand All @@ -81,7 +81,6 @@ protected void attach(Collection<Concept> componentsToAttach) {

@Override
protected void applyRow(Concept component, ConceptRow row, Collection<Concept> componentsToAttach) {
component.setId(row.getId());
if (row.getEffectiveTime() != null) {
component.setEffectiveTime(row.getEffectiveTime());
component.setReleased(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ protected Class<? extends SnomedDocument> getType() {

@Override
protected void applyRow(Description component, DescriptionRow row, Collection<Description> componentsToAttach) {
component.setId(row.getId());
if (row.getEffectiveTime() != null) {
component.setEffectiveTime(row.getEffectiveTime());
component.setReleased(true);
Expand All @@ -103,7 +102,7 @@ protected void attach(Collection<Description> componentsToAttach) {
}

@Override
protected Description createComponent() {
protected Description createCoreComponent() {
return SnomedFactory.eINSTANCE.createDescription();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ protected Class<? extends SnomedDocument> getType() {

@Override
protected void applyRow(Relationship component, RelationshipRow row, Collection<Relationship> componentsToAttach) {
component.setId(row.getId());
if (row.getEffectiveTime() != null) {
component.setEffectiveTime(row.getEffectiveTime());
component.setReleased(true);
Expand Down Expand Up @@ -122,7 +121,7 @@ protected void attach(Collection<Relationship> componentsToAttach) {
}

@Override
protected Relationship createComponent() {
protected Relationship createCoreComponent() {
return SnomedFactory.eINSTANCE.createRelationship();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
*/
public class Rf2FileModifier {

private static final Splitter ON = Splitter.on('\t');
private static final Splitter TAB_SPLITTER = Splitter.on('\t');

/**
* Splits the given RF2 files into RF2 file segments based on distinct effective times. The result will be a map
Expand Down Expand Up @@ -68,7 +68,7 @@ public static Map<String, File> split(final File toSplit) throws IOException {
continue;
}

final List<String> values = ON.splitToList(line);
final List<String> values = TAB_SPLITTER.splitToList(line);
final String effectiveTime = values.get(1);

final PrintWriter effectiveTimeWriter;
Expand Down