Permalink
Browse files

Replace hppc collections with fastutils and implement stable ids for …

…pro parte synonyms, fixes http://dev.gbif.org/issues/browse/POR-3031
  • Loading branch information...
mdoering committed Jan 18, 2017
1 parent 7b6653f commit da4bcd65ac0b95793664fc4ec39638928e8ecefb
Showing with 466 additions and 315 deletions.
  1. +4 −4 checklistbank-cli/pom.xml
  2. +76 −74 checklistbank-cli/src/main/java/org/gbif/checklistbank/kryo/CliKryoFactory.java
  3. +0 −31 checklistbank-cli/src/main/java/org/gbif/checklistbank/kryo/IntArrayListSerializer.java
  4. +3 −3 checklistbank-cli/src/main/java/org/gbif/checklistbank/neo/traverse/ChunkingEvaluator.java
  5. +84 −30 checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/IdGenerator.java
  6. +31 −29 checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/NubBuilder.java
  7. +3 −2 checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/model/NubUsage.java
  8. +6 −6 checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/source/NubSource.java
  9. +3 −3 checklistbank-cli/src/main/java/org/gbif/checklistbank/nub/validation/NeoAssertionEngine.java
  10. +42 −9 checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/IdGeneratorTest.java
  11. +3 −1 checklistbank-cli/src/test/java/org/gbif/checklistbank/nub/NubBuilderIT.java
  12. +1 −0 checklistbank-cli/src/test/resources/nub-sources/dataset111.txt
  13. +2 −0 checklistbank-cli/src/test/resources/trees/111.txt
  14. +4 −4 checklistbank-mybatis-service/pom.xml
  15. +6 −6 ...tis-service/src/main/java/org/gbif/checklistbank/service/mybatis/DatasetImportServiceMyBatis.java
  16. +5 −0 checklistbank-nub/pom.xml
  17. +1 −1 checklistbank-nub/src/main/java/org/gbif/nub/lookup/straight/IdLookup.java
  18. +80 −10 checklistbank-nub/src/main/java/org/gbif/nub/lookup/straight/IdLookupImpl.java
  19. +0 −2 checklistbank-nub/src/main/java/org/gbif/nub/lookup/straight/IdLookupWs.java
  20. +4 −0 checklistbank-nub/src/main/java/org/gbif/nub/lookup/straight/LookupKryoFactory.java
  21. +93 −84 checklistbank-nub/src/main/java/org/gbif/nub/lookup/straight/LookupUsage.java
  22. +9 −9 checklistbank-nub/src/test/java/org/gbif/nub/lookup/straight/IdLookupImplTest.java
  23. +2 −3 docs/MAP-LIBRARIES.md
  24. +4 −4 pom.xml
@@ -278,14 +278,14 @@
<groupId>org.kohsuke.metainf-services</groupId>
<artifactId>metainf-services</artifactId>
</dependency>
<dependency>
<groupId>com.carrotsearch</groupId>
<artifactId>hppc</artifactId>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
</dependency>
<!-- JACKSON 2.4 -->
@@ -59,9 +59,9 @@
import java.util.HashSet;
import java.util.UUID;
import com.carrotsearch.hppc.IntArrayList;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.pool.KryoFactory;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import org.neo4j.kernel.impl.core.NodeProxy;
@@ -72,83 +72,85 @@
*/
public class CliKryoFactory implements KryoFactory {
@Override
public Kryo create() {
Kryo kryo = new Kryo();
kryo.setRegistrationRequired(true);
@Override
public Kryo create() {
Kryo kryo = new Kryo();
kryo.setRegistrationRequired(true);
kryo.register(NameUsage.class);
kryo.register(VerbatimNameUsage.class);
kryo.register(NameUsageMetrics.class);
kryo.register(UsageExtensions.class);
kryo.register(ParsedName.class);
kryo.register(DatasetMetrics.class);
kryo.register(Description.class);
kryo.register(Distribution.class);
kryo.register(Identifier.class);
kryo.register(NameUsageMediaObject.class);
kryo.register(Reference.class);
kryo.register(SpeciesProfile.class);
kryo.register(TypeSpecimen.class);
kryo.register(VernacularName.class);
// cli specifics
kryo.register(NubUsage.class);
kryo.register(UsageFacts.class);
kryo.register(Classification.class);
kryo.register(SrcUsage.class);
kryo.register(NameUsage.class);
kryo.register(VerbatimNameUsage.class);
kryo.register(NameUsageMetrics.class);
kryo.register(UsageExtensions.class);
kryo.register(ParsedName.class);
kryo.register(DatasetMetrics.class);
kryo.register(Description.class);
kryo.register(Distribution.class);
kryo.register(Identifier.class);
kryo.register(NameUsageMediaObject.class);
kryo.register(Reference.class);
kryo.register(SpeciesProfile.class);
kryo.register(TypeSpecimen.class);
kryo.register(VernacularName.class);
// cli specifics
kryo.register(NubUsage.class);
kryo.register(UsageFacts.class);
kryo.register(Classification.class);
kryo.register(SrcUsage.class);
// java & commons
kryo.register(Date.class);
kryo.register(HashMap.class);
kryo.register(HashSet.class);
kryo.register(ArrayList.class);
kryo.register(UUID.class, new UUIDSerializer());
kryo.register(URI.class, new URISerializer());
kryo.register(IntArrayList.class, new IntArrayListSerializer());
kryo.register(int[].class);
ImmutableListSerializer.registerSerializers(kryo);
// fastutil
kryo.register(IntArrayList.class);
// enums
kryo.register(EnumSet.class, new EnumSetSerializer());
kryo.register(NameUsageIssue.class);
kryo.register(NomenclaturalStatus.class);
kryo.register(NomenclaturalStatus[].class);
kryo.register(TaxonomicStatus.class);
kryo.register(Origin.class);
kryo.register(Rank.class);
kryo.register(Extension.class);
kryo.register(Kingdom.class);
kryo.register(NameType.class);
kryo.register(NamePart.class,40);
kryo.register(Language.class);
kryo.register(Country.class);
kryo.register(OccurrenceStatus.class);
kryo.register(LifeStage.class);
kryo.register(ThreatStatus.class);
kryo.register(EstablishmentMeans.class);
kryo.register(CitesAppendix.class);
kryo.register(IdentifierType.class);
kryo.register(MediaType.class);
kryo.register(TypeStatus.class);
kryo.register(TypeDesignationType.class);
kryo.register(Sex.class);
// java & commons
kryo.register(Date.class);
kryo.register(HashMap.class);
kryo.register(HashSet.class);
kryo.register(ArrayList.class);
kryo.register(UUID.class, new UUIDSerializer());
kryo.register(URI.class, new URISerializer());
kryo.register(int[].class);
ImmutableListSerializer.registerSerializers(kryo);
// term enums
kryo.register(AcTerm.class);
kryo.register(DcElement.class);
kryo.register(DcTerm.class);
kryo.register(DwcTerm.class);
kryo.register(EolReferenceTerm.class);
kryo.register(GbifInternalTerm.class);
kryo.register(GbifTerm.class);
kryo.register(IucnTerm.class);
kryo.register(XmpRightsTerm.class);
kryo.register(XmpTerm.class);
kryo.register(UnknownTerm.class, new TermSerializer());
// enums
kryo.register(EnumSet.class, new EnumSetSerializer());
kryo.register(NameUsageIssue.class);
kryo.register(NomenclaturalStatus.class);
kryo.register(NomenclaturalStatus[].class);
kryo.register(TaxonomicStatus.class);
kryo.register(Origin.class);
kryo.register(Rank.class);
kryo.register(Extension.class);
kryo.register(Kingdom.class);
kryo.register(NameType.class);
kryo.register(NamePart.class, 40);
kryo.register(Language.class);
kryo.register(Country.class);
kryo.register(OccurrenceStatus.class);
kryo.register(LifeStage.class);
kryo.register(ThreatStatus.class);
kryo.register(EstablishmentMeans.class);
kryo.register(CitesAppendix.class);
kryo.register(IdentifierType.class);
kryo.register(MediaType.class);
kryo.register(TypeStatus.class);
kryo.register(TypeDesignationType.class);
kryo.register(Sex.class);
// ignore neo node proxies and set them to null upon read:
kryo.register(NodeProxy.class, new NullSerializer());
// term enums
kryo.register(AcTerm.class);
kryo.register(DcElement.class);
kryo.register(DcTerm.class);
kryo.register(DwcTerm.class);
kryo.register(EolReferenceTerm.class);
kryo.register(GbifInternalTerm.class);
kryo.register(GbifTerm.class);
kryo.register(IucnTerm.class);
kryo.register(XmpRightsTerm.class);
kryo.register(XmpTerm.class);
kryo.register(UnknownTerm.class, new TermSerializer());
return kryo;
}
// ignore neo node proxies and set them to null upon read:
kryo.register(NodeProxy.class, new NullSerializer());
return kryo;
}
}

This file was deleted.

Oops, something went wrong.
@@ -3,9 +3,9 @@
import org.gbif.checklistbank.cli.model.UsageFacts;
import org.gbif.checklistbank.neo.UsageDao;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongSet;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.traversal.Evaluation;
@@ -22,7 +22,7 @@
private UsageDao dao;
private int chunkSize;
private int minChunkSize;
private LongSet chunkIds = new LongHashSet();
private LongSet chunkIds = new LongOpenHashSet();
public ChunkingEvaluator(UsageDao dao, int minChunkSize, int chunkSize) {
Preconditions.checkArgument(minChunkSize < chunkSize, "Minimum chunk size needs to be smaller then the chunk size");
Oops, something went wrong.

0 comments on commit da4bcd6

Please sign in to comment.