Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Feature Request: Allow for Indexing #3

Merged
merged 1 commit into from

3 participants

Max De Marzi Michael Hunger Peter
Max De Marzi

Allowing users to specify csv files for fulltext and exact indexing of nodes and relationships.

Michael Hunger
Owner

max could you squash your commits into one? with git rebase -i ccd110e "p"ick the first one and "s"quash the others into a single commit.

Max De Marzi

Done.

Michael Hunger
Owner

Thanks,

What do you think about my suggestion of skipping these additional index files and rather using the header for this? Seems less involved?

Max De Marzi

I think doing it this way we capture the more general case and avoid problems with:

  • wanting a field to be indexed in two indexes
  • indexes that use values not included in the nodes (I could see this for aliases, or i18n)
  • easier to update the importer as you add a file without messing with your nodes.csv
Michael Hunger jexp merged commit 69c442d into from
Michael Hunger
Owner

I think those are rather the 10% use cases so I'm still not convinced that they are worth the effort :)

I think I'll implement the other solution for the 90% case

Peter

Two side notes: always close writers/streams + do it in a finally clause and specify the encoding for the streams

Max De Marzi

I'm a Ruby guy, so I did what I could... >8-]

Sounds like somebody (cough Peter cough) is volunteering a pull-request?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jul 3, 2012
  1. Max De Marzi

    updating libraries and adding Lucene dependency in order to

    maxdemarzi authored
    implement node and relationship indexing.
This page is out of date. Refresh to see the latest.
1  .gitignore
View
@@ -1,3 +1,4 @@
+.project
.shell_history
.idea
*.ipr
8 pom.xml
View
@@ -14,8 +14,14 @@
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-kernel</artifactId>
- <version>1.6.M02</version>
+ <version>1.8.M05</version>
</dependency>
+ <dependency>
+ <groupId>org.neo4j</groupId>
+ <artifactId>neo4j-lucene-index</artifactId>
+ <version>1.8.M05</version>
+ </dependency>
+
</dependencies>
<build>
<plugins>
31 readme.txt
View
@@ -57,4 +57,33 @@ Importing 7500000 Nodes took 17 seconds
Importing 41246740 Relationships took 170 seconds
212 seconds
ynagzet:batchimport mh$ du -sh target/db/
-3,2G target/db/
+3,2G target/db/
+
+Optionally you can add nodes and relationships to indexes.
+
+Add four arguments per each index to command line:
+
+To create a full text node index called users using nodes_index.csv:
+node_index users fulltext nodes_index.csv
+
+To create an exact relationship index called worked using rels_index.csv:
+rel_index worked exact rels_index.csv
+
+Example command line:
+java -server -Xmx4G -jar ../batch-import/target/batch-import-jar-with-dependencies.jar neo4j/data/graph.db nodes.csv rels.csv node_index users fulltext nodes_index.csv rel_index worked exact rels_index.csv
+
+nodes_index.csv
+
+id name language
+1 Victor Richards West Frisian
+2 Virginia Shaw Korean
+3 Lois Simpson Belarusian
+4 Randy Bishop Hiri Motu
+5 Lori Mendoza Tok Pisin
+
+rels_index.csv
+
+id property1 property2
+0 cwqbnxrv rpyqdwhk
+1 qthnrret tzjmmhta
+2 dtztaqpy pbmcdqyc
128 src/main/java/org/neo4j/batchimport/Importer.java
View
@@ -1,41 +1,59 @@
package org.neo4j.batchimport;
import org.neo4j.graphdb.RelationshipType;
-import org.neo4j.kernel.impl.batchinsert.BatchInserterImpl;
+import org.neo4j.unsafe.batchinsert.BatchInserter;
+import org.neo4j.unsafe.batchinsert.BatchInserters;
+import org.neo4j.unsafe.batchinsert.BatchInserterImpl;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
+import org.neo4j.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
+import java.io.FileWriter;
import java.io.IOException;
+import java.util.HashMap;
import java.util.Map;
+import org.neo4j.helpers.collection.MapUtil;
import static org.neo4j.helpers.collection.MapUtil.map;
import static org.neo4j.helpers.collection.MapUtil.stringMap;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.EXACT_CONFIG;
public class Importer {
private static Report report;
- private BatchInserterImpl db;
-
+ private BatchInserter db;
+ private BatchInserterIndexProvider lucene;
+
public Importer(File graphDb) {
- final Map<String, String> config = getConfig();
- db = new BatchInserterImpl(graphDb.getAbsolutePath(), config);
- report = new Report(10 * 1000 * 1000, 100);
- }
-
- private Map<String, String> getConfig() {
- if (new File("batch.properties").exists()) {
- return BatchInserterImpl.loadProperties("batch.properties");
- } else {
- return stringMap(
- "dump_configuration", "true",
- "cache_type", "none",
- "neostore.propertystore.db.index.keys.mapped_memory", "5M",
- "neostore.propertystore.db.index.mapped_memory", "5M",
- "neostore.nodestore.db.mapped_memory", "50M",
- "neostore.relationshipstore.db.mapped_memory", "250M",
- "neostore.propertystore.db.mapped_memory", "200M",
- "neostore.propertystore.db.strings.mapped_memory", "100M");
+ Map<String, String> config = new HashMap<String, String>();
+ try {
+ if (new File("batch.properties").exists()) {
+ System.out.println("Using Existing Configuration File");
+ } else {
+ System.out.println("Writing Configuration File to batch.properties");
+ FileWriter fw = new FileWriter( "batch.properties" );
+ fw.append( "neostore.nodestore.db.mapped_memory=100M\n"
+ + "neostore.relationshipstore.db.mapped_memory=1G\n"
+ + "neostore.propertystore.db.mapped_memory=250M\n"
+ + "neostore.propertystore.db.strings.mapped_memory=100M\n"
+ + "neostore.propertystore.db.arrays.mapped_memory=0M\n"
+ + "neostore.propertystore.db.index.keys.mapped_memory=15M\n"
+ + "neostore.propertystore.db.index.mapped_memory=15M" );
+ fw.close();
+ }
+
+ config = MapUtil.load( new File(
+ "batch.properties" ) );
+
+ } catch (Exception e) {
+ System.out.println(e.getMessage());
}
+
+ db = BatchInserters.inserter(graphDb.getAbsolutePath(), config);
+ lucene = new LuceneBatchInserterIndexProvider(db);
+ report = new Report(10 * 1000 * 1000, 100);
}
public static void main(String[] args) throws IOException {
@@ -45,17 +63,33 @@ public static void main(String[] args) throws IOException {
File graphDb = new File(args[0]);
File nodesFile = new File(args[1]);
File relationshipsFile = new File(args[2]);
+ File indexFile = new File("");
+ String indexName = "";
+ String indexType = "";
+
if (!graphDb.exists()) graphDb.mkdirs();
Importer importBatch = new Importer(graphDb);
try {
if (nodesFile.exists()) importBatch.importNodes(nodesFile);
- if (relationshipsFile.exists()) importBatch.importRelationships(relationshipsFile);
- } finally {
+ if (relationshipsFile.exists()) importBatch.importRelationships(relationshipsFile);
+ for (int i = 3; i < args.length; i = i + 4) {
+ indexName = args[i+1];
+ indexType = args[i+2];
+ indexFile = new File(args[i + 3]);
+ if (args[i].equals("node_index")) {
+ if (indexFile.exists()) importBatch.importNodeIndexes(indexFile, indexName, indexType);
+ } else {
+ if (indexFile.exists()) importBatch.importRelationshipIndexes(indexFile, indexName, indexType);
+ }
+
+ }
+ } finally {
importBatch.finish();
}
}
private void finish() {
+ lucene.shutdown();
db.shutdown();
report.finish();
}
@@ -149,6 +183,54 @@ private void importRelationships(File file) throws IOException {
report.finishImport("Relationships");
}
+ private void importNodeIndexes(File file, String indexName, String indexType) throws IOException {
+ BatchInserterIndex index;
+ if (indexType.equals("fulltext")) {
+ index = lucene.nodeIndex( indexName, stringMap( "type", "fulltext" ) );
+ } else {
+ index = lucene.nodeIndex( indexName, EXACT_CONFIG );
+ }
+
+ BufferedReader bf = new BufferedReader(new FileReader(file));
+
+ final Data data = new Data(bf.readLine(), "\t", 1);
+ Object[] node = new Object[1];
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = map(data.update(line, node));
+ index.add(id(node[0]), properties);
+ report.dots();
+ }
+
+ report.finishImport("Nodes into " + indexName + " Index");
+ }
+
+ private void importRelationshipIndexes(File file, String indexName, String indexType) throws IOException {
+ BatchInserterIndex index;
+ if (indexType.equals("fulltext")) {
+ index = lucene.relationshipIndex( indexName, stringMap( "type", "fulltext" ) );
+ } else {
+ index = lucene.relationshipIndex( indexName, EXACT_CONFIG );
+ }
+
+ BufferedReader bf = new BufferedReader(new FileReader(file));
+
+ final Data data = new Data(bf.readLine(), "\t", 1);
+ Object[] rel = new Object[1];
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = map(data.update(line, rel));
+ index.add(id(rel[0]), properties);
+ report.dots();
+ }
+
+ report.finishImport("Relationships into " + indexName + " Index");
+
+ }
+
+
static class Type implements RelationshipType {
String name;
Something went wrong with that request. Please try again.