Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Feature Request: Allow for Indexing #3

Merged
merged 1 commit into from

3 participants

@maxdemarzi

Allowing users to specify csv files for fulltext and exact indexing of nodes and relationships.

@jexp
Owner

max could you squash your commits into one? with git rebase -i ccd110e "p"ick the first one and "s"quash the others into a single commit.

@maxdemarzi

Done.

@jexp
Owner

Thanks,

What do you think about my suggestion of skipping these additional index files and rather using the header for this? Seems less involved?

@maxdemarzi

I think doing it this way we capture the more general case and avoid problems with:

  • wanting a field to be indexed in two indexes
  • indexes that use values not included in the nodes (I could see this for aliases, or i18n)
  • easier to update the importer as you add a file without messing with your nodes.csv
@jexp jexp merged commit 69c442d into jexp:master
@jexp
Owner

I think those are rather the 10% use cases so I'm still not convinced that they are worth the effort :)

I think I'll implement the other solution for the 90% case

@karussell

Two side notes: always close writers/streams + do it in a finally clause and specify the encoding for the streams

@maxdemarzi

I'm a Ruby guy, so I did what I could... >8-]

Sounds like somebody (cough Peter cough) is volunteering a pull-request?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jul 3, 2012
  1. @maxdemarzi

    updating libraries and adding Lucene dependency in order to

    maxdemarzi authored
    implement node and relationship indexing.
This page is out of date. Refresh to see the latest.
View
1  .gitignore
@@ -1,3 +1,4 @@
+.project
.shell_history
.idea
*.ipr
View
8 pom.xml
@@ -14,8 +14,14 @@
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-kernel</artifactId>
- <version>1.6.M02</version>
+ <version>1.8.M05</version>
</dependency>
+ <dependency>
+ <groupId>org.neo4j</groupId>
+ <artifactId>neo4j-lucene-index</artifactId>
+ <version>1.8.M05</version>
+ </dependency>
+
</dependencies>
<build>
<plugins>
View
31 readme.txt
@@ -57,4 +57,33 @@ Importing 7500000 Nodes took 17 seconds
Importing 41246740 Relationships took 170 seconds
212 seconds
ynagzet:batchimport mh$ du -sh target/db/
-3,2G target/db/
+3,2G target/db/
+
+Optionally you can add nodes and relationships to indexes.
+
+Add four arguments per each index to command line:
+
+To create a full text node index called users using nodes_index.csv:
+node_index users fulltext nodes_index.csv
+
+To create an exact relationship index called worked using rels_index.csv:
+rel_index worked exact rels_index.csv
+
+Example command line:
+java -server -Xmx4G -jar ../batch-import/target/batch-import-jar-with-dependencies.jar neo4j/data/graph.db nodes.csv rels.csv node_index users fulltext nodes_index.csv rel_index worked exact rels_index.csv
+
+nodes_index.csv
+
+id name language
+1 Victor Richards West Frisian
+2 Virginia Shaw Korean
+3 Lois Simpson Belarusian
+4 Randy Bishop Hiri Motu
+5 Lori Mendoza Tok Pisin
+
+rels_index.csv
+
+id property1 property2
+0 cwqbnxrv rpyqdwhk
+1 qthnrret tzjmmhta
+2 dtztaqpy pbmcdqyc
View
128 src/main/java/org/neo4j/batchimport/Importer.java
@@ -1,41 +1,59 @@
package org.neo4j.batchimport;
import org.neo4j.graphdb.RelationshipType;
-import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+import org.neo4j.unsafe.batchinsert.BatchInserter;
+import org.neo4j.unsafe.batchinsert.BatchInserters;
+import org.neo4j.unsafe.batchinsert.BatchInserterImpl;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
+import org.neo4j.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
+import java.io.FileWriter;
import java.io.IOException;
+import java.util.HashMap;
import java.util.Map;
+import org.neo4j.helpers.collection.MapUtil;
import static org.neo4j.helpers.collection.MapUtil.map;
import static org.neo4j.helpers.collection.MapUtil.stringMap;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.EXACT_CONFIG;
public class Importer {
private static Report report;
- private BatchInserterImpl db;
-
+ private BatchInserter db;
+ private BatchInserterIndexProvider lucene;
+
public Importer(File graphDb) {
- final Map<String, String> config = getConfig();
- db = new BatchInserterImpl(graphDb.getAbsolutePath(), config);
- report = new Report(10 * 1000 * 1000, 100);
- }
-
- private Map<String, String> getConfig() {
- if (new File("batch.properties").exists()) {
- return BatchInserterImpl.loadProperties("batch.properties");
- } else {
- return stringMap(
- "dump_configuration", "true",
- "cache_type", "none",
- "neostore.propertystore.db.index.keys.mapped_memory", "5M",
- "neostore.propertystore.db.index.mapped_memory", "5M",
- "neostore.nodestore.db.mapped_memory", "50M",
- "neostore.relationshipstore.db.mapped_memory", "250M",
- "neostore.propertystore.db.mapped_memory", "200M",
- "neostore.propertystore.db.strings.mapped_memory", "100M");
+ Map<String, String> config = new HashMap<String, String>();
+ try {
+ if (new File("batch.properties").exists()) {
+ System.out.println("Using Existing Configuration File");
+ } else {
+ System.out.println("Writing Configuration File to batch.properties");
+ FileWriter fw = new FileWriter( "batch.properties" );
+ fw.append( "neostore.nodestore.db.mapped_memory=100M\n"
+ + "neostore.relationshipstore.db.mapped_memory=1G\n"
+ + "neostore.propertystore.db.mapped_memory=250M\n"
+ + "neostore.propertystore.db.strings.mapped_memory=100M\n"
+ + "neostore.propertystore.db.arrays.mapped_memory=0M\n"
+ + "neostore.propertystore.db.index.keys.mapped_memory=15M\n"
+ + "neostore.propertystore.db.index.mapped_memory=15M" );
+ fw.close();
+ }
+
+ config = MapUtil.load( new File(
+ "batch.properties" ) );
+
+ } catch (Exception e) {
+ System.out.println(e.getMessage());
}
+
+ db = BatchInserters.inserter(graphDb.getAbsolutePath(), config);
+ lucene = new LuceneBatchInserterIndexProvider(db);
+ report = new Report(10 * 1000 * 1000, 100);
}
public static void main(String[] args) throws IOException {
@@ -45,17 +63,33 @@ public static void main(String[] args) throws IOException {
File graphDb = new File(args[0]);
File nodesFile = new File(args[1]);
File relationshipsFile = new File(args[2]);
+ File indexFile = new File("");
+ String indexName = "";
+ String indexType = "";
+
if (!graphDb.exists()) graphDb.mkdirs();
Importer importBatch = new Importer(graphDb);
try {
if (nodesFile.exists()) importBatch.importNodes(nodesFile);
- if (relationshipsFile.exists()) importBatch.importRelationships(relationshipsFile);
- } finally {
+ if (relationshipsFile.exists()) importBatch.importRelationships(relationshipsFile);
+ for (int i = 3; i < args.length; i = i + 4) {
+ indexName = args[i+1];
+ indexType = args[i+2];
+ indexFile = new File(args[i + 3]);
+ if (args[i].equals("node_index")) {
+ if (indexFile.exists()) importBatch.importNodeIndexes(indexFile, indexName, indexType);
+ } else {
+ if (indexFile.exists()) importBatch.importRelationshipIndexes(indexFile, indexName, indexType);
+ }
+
+ }
+ } finally {
importBatch.finish();
}
}
private void finish() {
+ lucene.shutdown();
db.shutdown();
report.finish();
}
@@ -149,6 +183,54 @@ private void importRelationships(File file) throws IOException {
report.finishImport("Relationships");
}
+ private void importNodeIndexes(File file, String indexName, String indexType) throws IOException {
+ BatchInserterIndex index;
+ if (indexType.equals("fulltext")) {
+ index = lucene.nodeIndex( indexName, stringMap( "type", "fulltext" ) );
+ } else {
+ index = lucene.nodeIndex( indexName, EXACT_CONFIG );
+ }
+
+ BufferedReader bf = new BufferedReader(new FileReader(file));
+
+ final Data data = new Data(bf.readLine(), "\t", 1);
+ Object[] node = new Object[1];
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = map(data.update(line, node));
+ index.add(id(node[0]), properties);
+ report.dots();
+ }
+
+ report.finishImport("Nodes into " + indexName + " Index");
+ }
+
+ private void importRelationshipIndexes(File file, String indexName, String indexType) throws IOException {
+ BatchInserterIndex index;
+ if (indexType.equals("fulltext")) {
+ index = lucene.relationshipIndex( indexName, stringMap( "type", "fulltext" ) );
+ } else {
+ index = lucene.relationshipIndex( indexName, EXACT_CONFIG );
+ }
+
+ BufferedReader bf = new BufferedReader(new FileReader(file));
+
+ final Data data = new Data(bf.readLine(), "\t", 1);
+ Object[] rel = new Object[1];
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = map(data.update(line, rel));
+ index.add(id(rel[0]), properties);
+ report.dots();
+ }
+
+ report.finishImport("Relationships into " + indexName + " Index");
+
+ }
+
+
static class Type implements RelationshipType {
String name;
Something went wrong with that request. Please try again.