Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SO-2127 snomed rf2 importer issues #122

Merged
merged 30 commits into from
Jan 6, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
54b1ff8
SO-2127: fixing issues with RF2 importer
cmark Nov 24, 2016
3fe1824
SO-1782: store taxonomy defect information in data objects...
cmark Nov 25, 2016
0a46423
SO-2127: fix NPE when getting datatype for cd member rows
cmark Dec 6, 2016
c44cee9
SO-2127: skip cycle detection during import
cmark Dec 6, 2016
df2aa25
SO-2127: improve performance of in-memory ID service
cmark Dec 6, 2016
ce44373
SO-2127: extract relevant taxonomy information for nodes and edges...
cmark Dec 6, 2016
fb31a71
SO-2127: support bulk methods in import time component lookup impls
cmark Dec 6, 2016
7ebd046
SO-2127: refactor RF2 importer to support batch processing
cmark Dec 6, 2016
9613dd5
SO-2127: disable error reporting functionality
cmark Dec 6, 2016
923c222
SO-2127: batch RF2 processing improvements
cmark Dec 6, 2016
d38dcf8
SO-2127: get temp CDOResource outside the for loop (lang.refset import)
cmark Dec 6, 2016
0f9b4a1
SO-2127: remove unnecessary setId calls from member importers
cmark Dec 6, 2016
6de7228
Merge remote-tracking branch 'origin/develop' into issue/SO-2127-snom…
cmark Dec 7, 2016
51b9bc3
SO-2127: fix concrete domain member index entry building
cmark Dec 7, 2016
0331d77
SO-2127: use log message arguments instead of string concatenation
cmark Dec 7, 2016
c59ecd7
SO-2127: run index purge/optimize before creating version...
cmark Dec 7, 2016
9b96893
SO-2127: remove commit notification from notification queue
cmark Dec 8, 2016
a01efba
SO-2127: enqueue commit notifications when enabled in tx context
cmark Dec 8, 2016
59e33b5
SO-2127: remove temporary resource form lang. refset importer
cmark Dec 8, 2016
35eefec
SO-2127: change importer logger names to snomed.importer.rf2
cmark Dec 8, 2016
f7a3f56
SO-2127: reduce document load times during import
cmark Dec 9, 2016
8934bf7
SO-2127: rename constant from ON to TAB_SPLITTER
cmark Jan 4, 2017
07770db
SO-2127: fix comments in import time lookup services
cmark Jan 5, 2017
d2f5bce
SO-2127: include stack trace in log messages
cmark Jan 5, 2017
d3e48b6
SO-2127: use rows.clear instead of recreating List instance
cmark Jan 5, 2017
4105458
SO-2127: simplify processing of rows
cmark Jan 5, 2017
e3e04ed
SO-2127: rename component creation template methods
cmark Jan 5, 2017
efecf53
SO-2127: generate available identifiers in bulk load method for symmetry
cmark Jan 5, 2017
1b80174
SO-2127: avoid setting core component identifier twice
cmark Jan 5, 2017
b780da1
SO-2127: rename field extended to hasMapTargetDescription
cmark Jan 5, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.b2international.snowowl.snomed.importer.rf2.model;

import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Sets.newHashSet;

import java.io.File;
import java.io.FileInputStream;
Expand All @@ -29,11 +30,16 @@
import java.sql.SQLException;
import java.text.MessageFormat;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import org.eclipse.core.runtime.SubMonitor;
import org.eclipse.emf.cdo.CDOObject;
Expand Down Expand Up @@ -81,12 +87,7 @@
import com.b2international.snowowl.importer.AbstractLoggingImporter;
import com.b2international.snowowl.importer.ImportAction;
import com.b2international.snowowl.importer.ImportException;
import com.b2international.snowowl.snomed.Annotatable;
import com.b2international.snowowl.snomed.Component;
import com.b2international.snowowl.snomed.Concept;
import com.b2international.snowowl.snomed.Description;
import com.b2international.snowowl.snomed.Inactivatable;
import com.b2international.snowowl.snomed.Relationship;
import com.b2international.snowowl.snomed.SnomedConstants;
import com.b2international.snowowl.snomed.SnomedPackage;
import com.b2international.snowowl.snomed.common.ContentSubType;
Expand All @@ -102,6 +103,7 @@
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;

Expand Down Expand Up @@ -188,44 +190,10 @@ protected File getComponentStagingDirectory() {
return componentStagingDirectory;
}

private Component getComponent(final String componentId) {
return importContext.getComponentLookup().getComponent(componentId);
protected final Collection<Component> getComponents(final Collection<String> componentIds) {
return importContext.getComponentLookup().getComponents(componentIds);
}

protected Concept getConcept(final String conceptId) {
return (Concept) getComponent(conceptId);
}

protected Concept getConceptSafe(final String conceptId, final String conceptField, final String componentId) {

final Concept result = getConcept(conceptId);

if (null == result) {
throw new NullPointerException(MessageFormat.format("Concept ''{0}'' for field {1}, {2} ''{3}'' not found.",
conceptId, conceptField, getImportConfiguration().getType().getDisplayName(), componentId));
}

return result;
}

protected Description getDescription(final String descriptionId) {
return (Description) getComponent(descriptionId);
}

protected Relationship getRelationship(final String relationshipId) {
return (Relationship) getComponent(relationshipId);
}

protected Annotatable getAnnotatableComponent(final String componentId) {
return (Annotatable) getComponent(componentId);
}

protected Inactivatable getInactivatableComponent(final String componentId) {
return (Inactivatable) getComponent(componentId);
}



@Override
public void preImport(final SubMonitor subMonitor) {

Expand Down Expand Up @@ -278,7 +246,7 @@ private String getComponentStagingDirectoryName() {
*<p>Could return with {@code null}.*/
protected abstract Date getComponentEffectiveTime(C editedComponent);

protected boolean skipCurrentRow(final AbstractComponentRow rf2Row, final C existingComponent) {
protected final boolean skipCurrentRow(final AbstractComponentRow rf2Row, final C existingComponent) {
return skipCurrentRow(rf2Row, getComponentEffectiveTime(existingComponent));
}

Expand Down Expand Up @@ -504,14 +472,13 @@ private List<ComponentImportUnit> createImportUnits(final Map<String, ComponentI
}

@Override
public void doImport(final SubMonitor subMonitor, final AbstractImportUnit unit) {
public final void doImport(final SubMonitor subMonitor, final AbstractImportUnit unit) {

final ComponentImportUnit concreteUnit = (ComponentImportUnit) unit;
final int recordCount = concreteUnit.getRecordCount();
final String effectiveTimeKey = concreteUnit.getEffectiveTimeKey();

final int workUnits = getImportWorkUnits(recordCount);
int unitsAdded = 0;

final String message = getImportMessage(effectiveTimeKey);
subMonitor.beginTask(message, workUnits);
Expand All @@ -538,6 +505,8 @@ public void doImport(final SubMonitor subMonitor, final AbstractImportUnit unit)

try {

List<T> rows = Lists.newArrayListWithExpectedSize(COMMIT_EVERY_NUM_ELEMENTS);

while (true) {

try {
Expand All @@ -559,39 +528,33 @@ public void doImport(final SubMonitor subMonitor, final AbstractImportUnit unit)
}
}

if (currentRow == null) {
// End of file reached
break;
}

try {
importRow(currentRow);
} catch (final NullPointerException e) {
if (currentRow != null) {
rows.add(currentRow);
subMonitor.worked(1);
// keep loading all items, until we reach the threshold
if (!needsCommitting(rows.size())) {
continue;
}

if (ImportAction.BREAK.equals(handleImportException(e))) {
// process batch loaded rows and commit them
importRows(rows);
// reinit rows array
rows = Lists.newArrayListWithExpectedSize(COMMIT_EVERY_NUM_ELEMENTS);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is a new List instance created here deliberately?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use rows.clear() here.

if (ImportAction.BREAK.equals(commit(subMonitor, effectiveTimeKey))) {
break;
} else {
continue;
}
}

unitsAdded++;
subMonitor.worked(1);

if (!needsCommitting(unitsAdded)) {
continue;
}

if (ImportAction.BREAK.equals(commit(subMonitor, effectiveTimeKey))) {

} else {
// process remaining rows and break
importRows(rows);
rows.clear();
commit(subMonitor, effectiveTimeKey);
break;
}
}

} finally {
Closeables.closeQuietly(sliceBeanReader);
}

commit(subMonitor, effectiveTimeKey);
}

protected int getImportWorkUnits(final int recordCount) {
Expand Down Expand Up @@ -697,15 +660,72 @@ private ImportAction checkCommitException(final CommitException e) {
* By default this method does nothing. Clients may override it.
*/
protected void handleCommitException() {
return;
}

/**
* Modifies repository state based on the incoming CSV row bean.
* Modifies repository state based on the incoming CSV rows.
*
* @param rows the rows to import
*/
private final void importRows(List<T> rows) {
final Map<String, T> rowsToImport = rows.stream().filter(getRowFilter()).collect(Collectors.toMap(getRowIdMapper(), row -> row));
// load existing components
final Map<String, C> existingComponents = loadComponents(rowsToImport.keySet()).stream().collect(Collectors.toMap(getComponentIdMapper(), c -> c));
// create or update components
final Collection<C> componentsToAttach = newHashSet();
for (Entry<String, T> rowToImport : rowsToImport.entrySet()) {
Copy link
Member

@apeteri apeteri Jan 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not super important: maybe iterate by only the ID set here, and retrieve both the RF2 row and the component with a {rowsToImport,existingComponents}.get(componentId) call for symmetry?

final String componentId = rowToImport.getKey();
C component = existingComponents.get(componentId);
final T row = rowToImport.getValue();
if (component == null) {
component = getOrCreate(componentId, componentsToAttach);
} else if (skipCurrentRow(row, component)) {
getLogger().warn("Not importing component '{}|{}' with effective time '{}'; it should have been filtered from the input file.",
row.getClass().getSimpleName(),
getRowIdMapper().apply(row),
EffectiveTimes.format(row.getEffectiveTime(), DateFormats.SHORT));
continue;
}
applyRow(component, row, componentsToAttach);
}
attach(componentsToAttach);
}

/**
* Gets or creates a new components. This method should return a new component from the current transaction if it does exist, otherwise it should
* return a new object with the given id.
*
* @param currentRow the row to import
* @param componentId
* @param componentsToAttach - the collection to register attachable components to
* @return
*/
protected abstract void importRow(T currentRow);
protected final C getOrCreate(String componentId, Collection<C> componentsToAttach) {
C component = Iterables.getOnlyElement(loadComponents(Collections.singleton(componentId)), null);
if (component == null) {
component = createComponent(componentId);
registerNewComponent(component);
componentsToAttach.add(component);
}
return component;
}

protected abstract C createComponent(String componentId);

protected abstract void registerNewComponent(C component);

protected abstract void applyRow(C component, T row, Collection<C> componentsToAttach);

protected abstract Collection<C> loadComponents(Set<String> componentIds);

protected abstract void attach(Collection<C> componentsToAttach);

protected abstract Function<T, String> getRowIdMapper();

protected abstract Function<C, String> getComponentIdMapper();

protected Predicate<T> getRowFilter() {
return row -> true;
}

@Override
public void postImport(final SubMonitor subMonitor) {
Expand Down