Skip to content
Permalink
Browse files
Support to skip the line start with specified symbols
Implement #25

Change-Id: Id5a664a3d0c09bac7cb3761862ab69bfd8cca5dd
  • Loading branch information
Linary authored and zhoney committed Nov 29, 2018
1 parent 87bd702 commit e1f085f203600c20db122e41e19202e4d6c6b5e8
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 3 deletions.
@@ -77,6 +77,11 @@ public boolean hasNext() {
throw new LoadException("Read next line error", e);
}
}
// Skip the comment line
if (this.nextLine != null && this.isCommentLine(this.nextLine)) {
this.nextLine = null;
return this.hasNext();
}
return this.nextLine != null;
}

@@ -121,6 +126,10 @@ private BufferedReader open(FileSource source) throws IOException {
}
}

private boolean isCommentLine(String line) {
return this.source.commentSymbols().stream().anyMatch(line::startsWith);
}

private static void checkFile(File file) {
if (!file.exists()) {
throw new LoadException("Please ensure the file exist: '%s'", file);
@@ -19,7 +19,10 @@

package com.baidu.hugegraph.loader.source.file;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.baidu.hugegraph.loader.source.InputSource;
import com.baidu.hugegraph.loader.source.SourceType;
@@ -38,9 +41,13 @@ public class FileSource implements InputSource {
@JsonProperty("delimiter")
private String delimiter;
@JsonProperty("charset")
private String charset = DEFAULT_CHARSET;
private String charset;
@JsonProperty("comment_symbols")
private Set<String> commentSymbols;

public FileSource() {
this.charset = DEFAULT_CHARSET;
this.commentSymbols = new HashSet<>();
}

@Override
@@ -57,7 +64,11 @@ public FileFormat format() {
}

public List<String> header() {
return this.header;
if (this.header == null) {
return null;
} else {
return Collections.unmodifiableList(this.header);
}
}

public String delimiter() {
@@ -67,4 +78,9 @@ public String delimiter() {
public String charset() {
return this.charset;
}

public Set<String> commentSymbols() {
assert this.commentSymbols != null;
return Collections.unmodifiableSet(this.commentSymbols);
}
}
@@ -573,7 +573,6 @@ public void testLoadWithIgnoreNullValueColumns() {
}

List<Vertex> vertices = client.graph().listVertices();

Assert.assertEquals(3, vertices.size());

for (Vertex vertex : vertices) {
@@ -582,6 +581,29 @@ public void testLoadWithIgnoreNullValueColumns() {
}
}

@Test
public void testLoadWithFileHasCommentLine() {
FileUtil.append(path("vertex_person.csv"),
"# This is a comment",
"marko,29,Beijing",
"// This is also a comment",
"# This is still a comment",
"vadas,27,Hongkong");

String[] args = new String[] {"-f", path("struct_comment_symbol.json"),
"-s", path("schema_joint_pk.groovy"),
"-g", "hugegraph",
"--test-mode", "true"};

try {
HugeGraphLoader.main(args);
} catch (Exception e) {
Assert.fail("Should not throw exception, but throw " + e);
}
List<Vertex> vertices = client.graph().listVertices();
Assert.assertEquals(2, vertices.size());
}

private static String path(String fileName) {
return Paths.get(PATH_PREFIX, fileName).toString();
}
@@ -0,0 +1,19 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "file",
"path": "src/test/resources/vertex_person.csv",
"format": "CSV",
"charset": "UTF-8",
"comment_symbols": ["#", "//"]
},
"mapping": {
"name": "name",
"age": "age",
"city": "city"
}
}
]
}

0 comments on commit e1f085f

Please sign in to comment.