Permalink
Browse files

skips non-existend property values

added type conversion
  • Loading branch information...
1 parent 69c442d commit 4d4462922be60667c4ab24db8fcb46dd5c629f70 @jexp committed Aug 10, 2012
Showing with 174 additions and 45 deletions.
  1. +2 −2 pom.xml
  2. +10 −4 readme.txt
  3. +156 −34 src/main/java/org/neo4j/batchimport/Importer.java
  4. +6 −5 src/test/java/TestDataGenerator.java
View
@@ -14,12 +14,12 @@
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-kernel</artifactId>
- <version>1.8.M05</version>
+ <version>1.8.M07</version>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-lucene-index</artifactId>
- <version>1.8.M05</version>
+ <version>1.8.M07</version>
</dependency>
</dependencies>
View
@@ -4,22 +4,28 @@ Example data for the files is a small social network
Property names in first row.
Property values not listed will not be set on the nodes or properties.
+Optionally property fields can have a type (defaults to String) indicated with name:type where type is one of
+(int, long, float, double, boolean, byte, short, char, string). The string value is then converted to that type.
+Failure to do so will result in abort of the import operation.
The row number corresponds to the node-id (node 0 is the reference node)
nodes.csv
name age works_on
Michael 37 neo4j
+Selina 14
Rana 6
Selma 4
rels.csv
-start end type since
-1 2 FATHER_OF 2004-01-01
-1 3 FATHER_OF 2007-09-01
-2 3 SISTER_OF 2008-05-03
+start end type since counter:int
+1 2 FATHER_OF 1998-07-10 1
+1 3 FATHER_OF 2007-09-15 2
+1 4 FATHER_OF 2008-05-03 3
+3 4 SISTER_OF 2008-05-03 5
+2 3 SISTER_OF 2007-09-15 7
ynagzet:batchimport mh$ rm -rf target/db
@@ -1,9 +1,9 @@
package org.neo4j.batchimport;
import org.neo4j.graphdb.RelationshipType;
+import org.neo4j.kernel.impl.util.FileUtils;
import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserters;
-import org.neo4j.unsafe.batchinsert.BatchInserterImpl;
import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
import org.neo4j.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
@@ -13,13 +13,14 @@
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
+
import org.neo4j.helpers.collection.MapUtil;
import static org.neo4j.helpers.collection.MapUtil.map;
import static org.neo4j.helpers.collection.MapUtil.stringMap;
import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.EXACT_CONFIG;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.FULLTEXT_CONFIG;
public class Importer {
private static Report report;
@@ -34,10 +35,11 @@ public Importer(File graphDb) {
} else {
System.out.println("Writing Configuration File to batch.properties");
FileWriter fw = new FileWriter( "batch.properties" );
- fw.append( "neostore.nodestore.db.mapped_memory=100M\n"
- + "neostore.relationshipstore.db.mapped_memory=1G\n"
- + "neostore.propertystore.db.mapped_memory=250M\n"
- + "neostore.propertystore.db.strings.mapped_memory=100M\n"
+ fw.append( "use_memory_mapped_buffers=true\n"
+ + "neostore.nodestore.db.mapped_memory=100M\n"
+ + "neostore.relationshipstore.db.mapped_memory=1G\n"
+ + "neostore.propertystore.db.mapped_memory=250M\n"
+ + "neostore.propertystore.db.strings.mapped_memory=100M\n"
+ "neostore.propertystore.db.arrays.mapped_memory=0M\n"
+ "neostore.propertystore.db.index.keys.mapped_memory=15M\n"
+ "neostore.propertystore.db.index.mapped_memory=15M" );
@@ -58,16 +60,18 @@ public Importer(File graphDb) {
public static void main(String[] args) throws IOException {
if (args.length < 3) {
- System.err.println("Usage java -jar batchimport.jar data/dir nodes.csv relationships.csv");
+ System.err.println("Usage java -jar batchimport.jar data/dir nodes.csv relationships.csv [node_index node-index-name fulltext|exact nodes_index.csv rel_index rel-index-name fulltext|exact rels_index.csv ....]");
}
File graphDb = new File(args[0]);
File nodesFile = new File(args[1]);
File relationshipsFile = new File(args[2]);
- File indexFile = new File("");
- String indexName = "";
- String indexType = "";
+ File indexFile;
+ String indexName;
+ String indexType;
- if (!graphDb.exists()) graphDb.mkdirs();
+ if (graphDb.exists()) {
+ FileUtils.deleteRecursively(graphDb);
+ }
Importer importBatch = new Importer(graphDb);
try {
if (nodesFile.exists()) importBatch.importNodes(nodesFile);
@@ -95,29 +99,79 @@ private void finish() {
}
static class Data {
- private final Object[] data;
+ private Object[] data;
private final int offset;
private final String delim;
+ private final String[] fields;
+ private final String[] lineData;
+ private final Type types[];
+ private final int lineSize;
+ private int dataSize;
public Data(String header, String delim, int offset) {
this.offset = offset;
this.delim = delim;
- String[] fields = header.split(delim);
- data = new Object[(fields.length - offset) * 2];
- for (int i = 0; i < fields.length - offset; i++) {
+ fields = header.split(delim);
+ lineSize = fields.length;
+ types = parseTypes(fields);
+ lineData = new String[lineSize];
+ createMapData(lineSize, offset);
+ }
+
+ private Object[] createMapData(int lineSize, int offset) {
+ dataSize = lineSize - offset;
+ data = new Object[dataSize*2];
+ for (int i = 0; i < dataSize; i++) {
data[i * 2] = fields[i + offset];
}
+ return data;
+ }
+
+ private Type[] parseTypes(String[] fields) {
+ Type[] types = new Type[lineSize];
+ Arrays.fill(types, Type.STRING);
+ for (int i = 0; i < lineSize; i++) {
+ String field = fields[i];
+ int idx = field.indexOf(':');
+ if (idx!=-1) {
+ fields[i]=field.substring(0,idx);
+ types[i]= Type.fromString(field.substring(idx + 1));
+ }
+ }
+ return types;
+ }
+
+ private int split(String line) {
+ final StringTokenizer st = new StringTokenizer(line, delim,true);
+ int count=0;
+ for (int i = 0; i < lineSize; i++) {
+ String value = st.nextToken();
+ if (value.equals(delim)) {
+ lineData[i] = null;
+ } else {
+ lineData[i] = value.trim().isEmpty() ? null : value;
+ if (i< lineSize -1) st.nextToken();
+ }
+ if (i >= offset && lineData[i]!=null) {
+ data[count++]=fields[i];
+ data[count++]=types[i-offset].convert(lineData[i]);
+ }
+ }
+ return count;
}
- public Object[] update(String line, Object... header) {
- final String[] values = line.split(delim);
+ public Map<String,Object> update(String line, Object... header) {
+ int nonNullCount = split(line);
if (header.length > 0) {
- System.arraycopy(values, 0, header, 0, header.length);
+ System.arraycopy(lineData, 0, header, 0, header.length);
}
- for (int i = 0; i < values.length - offset; i++) {
- data[i * 2 + 1] = values[i + offset];
+
+ if (nonNullCount == dataSize*2) {
+ return map(data);
}
- return data;
+ Object[] newData=new Object[nonNullCount];
+ System.arraycopy(data,0,newData,0,nonNullCount);
+ return map(newData);
}
}
@@ -139,15 +193,15 @@ public void reset() {
}
public void finish() {
- System.out.println((System.currentTimeMillis() - total) / 1000 + " seconds ");
+ System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds ");
}
public void dots() {
if ((++count % dots) != 0) return;
System.out.print(".");
if ((count % batch) != 0) return;
long now = System.currentTimeMillis();
- System.out.println((now - batchTime) + " ms for "+batch);
+ System.out.println(" "+ (now - batchTime) + " ms for "+batch);
batchTime = now;
}
@@ -162,7 +216,7 @@ private void importNodes(File file) throws IOException {
String line;
report.reset();
while ((line = bf.readLine()) != null) {
- db.createNode(map(data.update(line)));
+ db.createNode(data.update(line));
report.dots();
}
report.finishImport("Nodes");
@@ -172,12 +226,12 @@ private void importRelationships(File file) throws IOException {
BufferedReader bf = new BufferedReader(new FileReader(file));
final Data data = new Data(bf.readLine(), "\t", 3);
Object[] rel = new Object[3];
- final Type type = new Type();
+ final RelType relType = new RelType();
String line;
report.reset();
while ((line = bf.readLine()) != null) {
- final Map<String, Object> properties = map(data.update(line, rel));
- db.createRelationship(id(rel[0]), id(rel[1]), type.update(rel[2]), properties);
+ final Map<String, Object> properties = data.update(line, rel);
+ db.createRelationship(id(rel[0]), id(rel[1]), relType.update(rel[2]), properties);
report.dots();
}
report.finishImport("Relationships");
@@ -186,7 +240,7 @@ private void importRelationships(File file) throws IOException {
private void importNodeIndexes(File file, String indexName, String indexType) throws IOException {
BatchInserterIndex index;
if (indexType.equals("fulltext")) {
- index = lucene.nodeIndex( indexName, stringMap( "type", "fulltext" ) );
+ index = lucene.nodeIndex( indexName, FULLTEXT_CONFIG );
} else {
index = lucene.nodeIndex( indexName, EXACT_CONFIG );
}
@@ -198,7 +252,7 @@ private void importNodeIndexes(File file, String indexName, String indexType) th
String line;
report.reset();
while ((line = bf.readLine()) != null) {
- final Map<String, Object> properties = map(data.update(line, node));
+ final Map<String, Object> properties = data.update(line, node);
index.add(id(node[0]), properties);
report.dots();
}
@@ -209,7 +263,7 @@ private void importNodeIndexes(File file, String indexName, String indexType) th
private void importRelationshipIndexes(File file, String indexName, String indexType) throws IOException {
BatchInserterIndex index;
if (indexType.equals("fulltext")) {
- index = lucene.relationshipIndex( indexName, stringMap( "type", "fulltext" ) );
+ index = lucene.relationshipIndex( indexName, FULLTEXT_CONFIG );
} else {
index = lucene.relationshipIndex( indexName, EXACT_CONFIG );
}
@@ -221,7 +275,7 @@ private void importRelationshipIndexes(File file, String indexName, String index
String line;
report.reset();
while ((line = bf.readLine()) != null) {
- final Map<String, Object> properties = map(data.update(line, rel));
+ final Map<String, Object> properties = data.update(line, rel);
index.add(id(rel[0]), properties);
report.dots();
}
@@ -231,10 +285,10 @@ private void importRelationshipIndexes(File file, String indexName, String index
}
- static class Type implements RelationshipType {
+ static class RelType implements RelationshipType {
String name;
- public Type update(Object value) {
+ public RelType update(Object value) {
this.name = value.toString();
return this;
}
@@ -244,6 +298,74 @@ public String name() {
}
}
+ enum Type {
+ BOOLEAN {
+ @Override
+ public Object convert(String value) {
+ return Boolean.valueOf(value);
+ }
+ },
+ INT {
+ @Override
+ public Object convert(String value) {
+ return Integer.valueOf(value);
+ }
+ },
+ LONG {
+ @Override
+ public Object convert(String value) {
+ return Long.valueOf(value);
+ }
+ },
+ DOUBLE {
+ @Override
+ public Object convert(String value) {
+ return Double.valueOf(value);
+ }
+ },
+ FLOAT {
+ @Override
+ public Object convert(String value) {
+ return Float.valueOf(value);
+ }
+ },
+ BYTE {
+ @Override
+ public Object convert(String value) {
+ return Byte.valueOf(value);
+ }
+ },
+ SHORT {
+ @Override
+ public Object convert(String value) {
+ return Short.valueOf(value);
+ }
+ },
+ CHAR {
+ @Override
+ public Object convert(String value) {
+ return value.charAt(0);
+ }
+ },
+ STRING {
+ @Override
+ public Object convert(String value) {
+ return value;
+ }
+ };
+
+ private static Type fromString(String typeString) {
+ if (typeString==null || typeString.isEmpty()) return Type.STRING;
+ try {
+ return valueOf(typeString.toUpperCase());
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Unknown Type "+typeString);
+ }
+ }
+
+ public abstract Object convert(String value);
+ }
+
private long id(Object id) {
return Long.parseLong(id.toString());
}
Oops, something went wrong.

0 comments on commit 4d44629

Please sign in to comment.