Skip to content
Permalink
Browse files
Support customized timezone for date (#107)
Change-Id: I9814239e101e1424fc6d247a9984fcaaedbcca5c
  • Loading branch information
Linary committed Dec 5, 2019
1 parent 62727dd commit ef2e6da674425585db47e83e6d6962fe2fd88dbd
Showing 14 changed files with 270 additions and 5 deletions.
@@ -6,7 +6,7 @@

<groupId>com.baidu.hugegraph</groupId>
<artifactId>hugegraph-loader</artifactId>
<version>0.10.0</version>
<version>0.10.1</version>

<properties>
<release.name>hugegraph-loader</release.name>
@@ -41,6 +41,7 @@ public final class Constants {
public static final String TAB_STR = "\t";
public static final String NULL_STR = "NULL";
public static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static final String TIME_ZONE = "GMT+8";
public static final String SKIPPED_LINE_REGEX = "(^#|^//).*|";
public static final String FAILURE = "failure";
public static final String FAILURE_CURRENT_DIR = "current";
@@ -22,6 +22,7 @@
import com.baidu.hugegraph.loader.constant.Constants;
import com.baidu.hugegraph.loader.source.AbstractSource;
import com.baidu.hugegraph.loader.source.SourceType;
import com.baidu.hugegraph.loader.util.DateUtil;
import com.baidu.hugegraph.util.E;
import com.fasterxml.jackson.annotation.JsonProperty;

@@ -37,6 +38,8 @@ public class FileSource extends AbstractSource {
private String delimiter;
@JsonProperty("date_format")
private String dateFormat;
@JsonProperty("time_zone")
private String timeZone;
@JsonProperty("skipped_line")
private SkippedLine skippedLine;
@JsonProperty("compression")
@@ -48,6 +51,7 @@ public FileSource() {
this.format = FileFormat.CSV;
this.delimiter = Constants.COMMA_STR;
this.dateFormat = Constants.DATE_FORMAT;
this.timeZone = Constants.TIME_ZONE;
this.skippedLine = new SkippedLine();
this.compression = Compression.NONE;
}
@@ -66,6 +70,8 @@ public void check() throws IllegalArgumentException {
"is %s, but got '%s'", Constants.COMMA_STR,
this.format, this.delimiter);
}
E.checkArgument(DateUtil.checkTimeZone(this.timeZone),
"The time_zone '%s' is invalid", this.timeZone);
if (this.listFormat() != null) {
String elemDelimiter = this.listFormat().elemDelimiter();
E.checkArgument(!elemDelimiter.equals(this.delimiter),
@@ -103,6 +109,10 @@ public String dateFormat() {
return this.dateFormat;
}

public String timeZone() {
return this.timeZone;
}

public SkippedLine skippedLine() {
return this.skippedLine;
}
@@ -120,7 +120,8 @@ private static Object parseSingleValue(Object rawValue, DataType dataType,
"Only accept FileSource when convert String value " +
"to Date, but got '%s'", source.getClass().getName());
String df = ((FileSource) source).dateFormat();
return parseDate(value, df);
String timeZone = ((FileSource) source).timeZone();
return parseDate(value, df, timeZone);
} else if (dataType.isUUID()) {
return parseUUID(value);
}
@@ -249,15 +250,15 @@ private static long parseLong(String rawValue) {
}
}

private static Date parseDate(Object value, String df) {
private static Date parseDate(Object value, String df, String timeZone) {
if (value instanceof Date) {
return (Date) value;
}
if (value instanceof Number) {
return new Date(((Number) value).longValue());
} else if (value instanceof String) {
try {
return DateUtil.parse((String) value, df);
return DateUtil.parse((String) value, df, timeZone);
} catch (ParseException e) {
throw new IllegalArgumentException(String.format(
"%s, expect format: %s",
@@ -22,17 +22,26 @@
import java.text.ParseException;
import java.util.Date;
import java.util.Map;
import java.util.TimeZone;
import java.util.concurrent.ConcurrentHashMap;

import com.baidu.hugegraph.date.SafeDateFormat;
import com.baidu.hugegraph.loader.constant.Constants;

public final class DateUtil {

private static final Map<String, SafeDateFormat> DATE_FORMATS =
private static final Map<String, SafeDateFormat> DATE_FORMATS =
new ConcurrentHashMap<>();

public static Date parse(String source, String df) throws ParseException {
return parse(source, df, Constants.TIME_ZONE);
}

public static Date parse(String source, String df, String timeZone)
throws ParseException {
SafeDateFormat dateFormat = getDateFormat(df);
// parse date with specified timezone
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZone));
return dateFormat.parse(source);
}

@@ -63,4 +72,18 @@ public static Object toPattern(String df) {
public static String now(String df) {
return getDateFormat(df).format(new Date());
}

public static boolean checkTimeZone(String timeZone) {
final String DEFAULT_GMT_TIMEZONE = "GMT";
if (timeZone.equals(DEFAULT_GMT_TIMEZONE)) {
return true;
} else {
/*
* Time zone id returned is always "GMT" by default
* if custom time zone is invalid
*/
String id = TimeZone.getTimeZone(timeZone).getID();
return !id.equals(DEFAULT_GMT_TIMEZONE);
}
}
}
@@ -1230,6 +1230,64 @@ public void testUnMatchedDatePropertyAndFormat() {
});
}

@Test
public void testDefaultTimeZoneGMT8() throws java.text.ParseException {
ioUtil.write("vertex_person_birth_date.csv",
"marko,1992-10-01 12:00:00,Beijing",
"vadas,2000-01-01 13:00:00,Hongkong");

String[] args = new String[]{
"-f", structPath("default_timezone_gmt8/struct.json"),
"-s", configPath("default_timezone_gmt8/schema.groovy"),
"-g", GRAPH,
"-h", SERVER,
"--test-mode", "true"
};
HugeGraphLoader.main(args);

List<Vertex> vertices = CLIENT.graph().listVertices();
Assert.assertEquals(2, vertices.size());

Vertex marko = CLIENT.graph().getVertex("1:marko");
Assert.assertEquals(DateUtil.parse("1992-10-01 12:00:00",
Constants.DATE_FORMAT).getTime(),
marko.property("birth"));

Vertex vadas = CLIENT.graph().getVertex("1:vadas");
Assert.assertEquals(DateUtil.parse("2000-01-01 13:00:00",
Constants.DATE_FORMAT).getTime(),
vadas.property("birth"));
}

@Test
public void testCustomizedTimeZoneGMT0() throws java.text.ParseException {
ioUtil.write("vertex_person_birth_date.csv",
"marko,1992-10-01 12:00:00,Beijing",
"vadas,2000-01-01 13:00:00,Hongkong");

String[] args = new String[]{
"-f", structPath("customized_timezone_gmt0/struct.json"),
"-s", configPath("customized_timezone_gmt0/schema.groovy"),
"-g", GRAPH,
"-h", SERVER,
"--test-mode", "true"
};
HugeGraphLoader.main(args);

List<Vertex> vertices = CLIENT.graph().listVertices();
Assert.assertEquals(2, vertices.size());

Vertex marko = CLIENT.graph().getVertex("1:marko");
Assert.assertEquals(DateUtil.parse("1992-10-01 20:00:00",
Constants.DATE_FORMAT).getTime(),
marko.property("birth"));

Vertex vadas = CLIENT.graph().getVertex("1:vadas");
Assert.assertEquals(DateUtil.parse("2000-01-01 21:00:00",
Constants.DATE_FORMAT).getTime(),
vadas.property("birth"));
}

@Test
public void testValueMapping() throws java.text.ParseException {
/*
@@ -0,0 +1,75 @@
/*
* Copyright 2017 HugeGraph Authors
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.baidu.hugegraph.loader.test.unit;

import org.junit.Test;

import com.baidu.hugegraph.loader.util.DateUtil;
import com.baidu.hugegraph.testutil.Assert;

public class DateUtilTest {

@Test
public void testCheckTimeZone() {
Assert.assertTrue(DateUtil.checkTimeZone("JST"));
Assert.assertTrue(DateUtil.checkTimeZone("UTC"));
Assert.assertTrue(DateUtil.checkTimeZone("GMT"));
// GMT+00:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT+0"));
// GMT-00:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT-0"));
// GMT+09:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT+9:00"));
// GMT+10:30
Assert.assertTrue(DateUtil.checkTimeZone("GMT+10:30"));
// GMT-04:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT-0400"));
// GMT+08:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT+8"));
// GMT-13:00
Assert.assertTrue(DateUtil.checkTimeZone("GMT-13"));
// GMT-13:59
Assert.assertTrue(DateUtil.checkTimeZone("GMT+13:59"));
// NOTE: valid time zone IDs (see TimeZone.getAvailableIDs())
// GMT-08:00
Assert.assertTrue(DateUtil.checkTimeZone("America/Los_Angeles"));
// GMT+09:00
Assert.assertTrue(DateUtil.checkTimeZone("Japan"));
// GMT+01:00
Assert.assertTrue(DateUtil.checkTimeZone("Europe/Berlin"));
// GMT+04:00
Assert.assertTrue(DateUtil.checkTimeZone("Europe/Moscow"));
// GMT+08:00
Assert.assertTrue(DateUtil.checkTimeZone("Asia/Singapore"));

Assert.assertFalse(DateUtil.checkTimeZone("JPY"));
Assert.assertFalse(DateUtil.checkTimeZone("USD"));
Assert.assertFalse(DateUtil.checkTimeZone("UTC+8"));
Assert.assertFalse(DateUtil.checkTimeZone("UTC+09:00"));
Assert.assertFalse(DateUtil.checkTimeZone("+09:00"));
Assert.assertFalse(DateUtil.checkTimeZone("-08:00"));
Assert.assertFalse(DateUtil.checkTimeZone("-1"));
Assert.assertFalse(DateUtil.checkTimeZone("GMT+10:-30"));
// hours is 0-23 only
Assert.assertFalse(DateUtil.checkTimeZone("GMT+24:00"));
// minutes 00-59 only
Assert.assertFalse(DateUtil.checkTimeZone("GMT+13:60"));
}
}
@@ -25,6 +25,7 @@
@RunWith(Suite.class)
@Suite.SuiteClasses({
LineTest.class,
DateUtilTest.class,
LoadProgressTest.class
})
public class UnitTestSuite {
@@ -0,0 +1,6 @@
// Define schema
schema.propertyKey("name").asText().ifNotExist().create();
schema.propertyKey("birth").asDate().ifNotExist().create();
schema.propertyKey("city").asText().ifNotExist().create();

schema.vertexLabel("person").properties("name", "birth", "city").primaryKeys("name").ifNotExist().create();
@@ -0,0 +1,21 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "${source_type}",
"path": "${store_path}/vertex_person_birth_date.csv",
"format": "CSV",
"header": ["name", "birth", "city"],
"charset": "UTF-8",
"date_format": "yyyy-MM-dd HH:mm:ss",
"time_zone": "GMT+0"
},
"field_mapping": {
"name": "name",
"birth": "birth",
"city": "city"
}
}
]
}
@@ -0,0 +1,22 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "${source_type}",
"path": "${store_path}/vertex_person_birth_date.csv",
"core_site_path": "src/test/resources/hdfs_with_core_site_path/core-site.xml",
"format": "CSV",
"header": ["name", "birth", "city"],
"charset": "UTF-8",
"date_format": "yyyy-MM-dd HH:mm:ss",
"time_zone": "GMT+0"
},
"field_mapping": {
"name": "name",
"birth": "birth",
"city": "city"
}
}
]
}
@@ -0,0 +1,6 @@
// Define schema
schema.propertyKey("name").asText().ifNotExist().create();
schema.propertyKey("birth").asDate().ifNotExist().create();
schema.propertyKey("city").asText().ifNotExist().create();

schema.vertexLabel("person").properties("name", "birth", "city").primaryKeys("name").ifNotExist().create();
@@ -0,0 +1,20 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "${source_type}",
"path": "${store_path}/vertex_person_birth_date.csv",
"format": "CSV",
"header": ["name", "birth", "city"],
"charset": "UTF-8",
"date_format": "yyyy-MM-dd HH:mm:ss"
},
"field_mapping": {
"name": "name",
"birth": "birth",
"city": "city"
}
}
]
}
@@ -0,0 +1,21 @@
{
"vertices": [
{
"label": "person",
"input": {
"type": "${source_type}",
"path": "${store_path}/vertex_person_birth_date.csv",
"core_site_path": "src/test/resources/hdfs_with_core_site_path/core-site.xml",
"format": "CSV",
"header": ["name", "birth", "city"],
"charset": "UTF-8",
"date_format": "yyyy-MM-dd HH:mm:ss"
},
"field_mapping": {
"name": "name",
"birth": "birth",
"city": "city"
}
}
]
}

0 comments on commit ef2e6da

Please sign in to comment.