From e04cd6f14ad93672f156ce50a1cabf0e59a3dffd Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Thu, 24 Feb 2022 15:49:20 +0800 Subject: [PATCH 1/6] fix the bug of IoTDB-2604 --- cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java index 55e91997b13ec..8cb6a4bbfe158 100644 --- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java +++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java @@ -87,7 +87,7 @@ public class ImportCsv extends AbstractCsvTool { private static String timeColumn = "Time"; private static String deviceColumn = "Device"; - private static final int BATCH_SIZE = 10000; + private static final int BATCH_SIZE = 1000; /** * create the commandline options. @@ -338,7 +338,7 @@ record -> { if (!hasStarted.get()) { hasStarted.set(true); timeFormatter.set(formatterInit(record.get(0))); - } else if ((record.getRecordNumber() - 1) % BATCH_SIZE == 0) { + } else if (deviceIds.size() >= BATCH_SIZE) { writeAndEmptyDataSet(deviceIds, times, typesList, valuesList, measurementsList, 3); } @@ -455,7 +455,7 @@ record -> { writeAndEmptyDataSet( deviceName.get(), times, typesList, valuesList, measurementsList, 3); deviceName.set(record.get(1)); - } else if (record.getRecordNumber() - 1 % BATCH_SIZE == 0 && times.size() != 0) { + } else if (times.size() >= BATCH_SIZE && times.size() != 0) { // insert a batch writeAndEmptyDataSet( deviceName.get(), times, typesList, valuesList, measurementsList, 3); From 0eaeee146f57709c13e94eae3f5f6c68a8e61b58 Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Fri, 25 Feb 2022 13:18:51 +0800 Subject: [PATCH 2/6] modify the default value of batchPointSize. --- .../java/org/apache/iotdb/tool/ImportCsv.java | 29 +++++++++++++++++-- .../Write-And-Delete-Data/CSV-Tool.md | 4 +++ .../Write-And-Delete-Data/CSV-Tool.md | 8 +++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java index 8cb6a4bbfe158..c089481cffc85 100644 --- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java +++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java @@ -51,6 +51,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -72,6 +73,9 @@ public class ImportCsv extends AbstractCsvTool { private static final String FAILED_FILE_ARGS = "fd"; private static final String FAILED_FILE_NAME = "failed file directory"; + private static final String BATCH_POINT_SIZE_ARGS = "batch"; + private static final String BATCH_POINT_SIZE_NAME = "batch point size"; + private static final String ALIGNED_ARGS = "aligned"; private static final String ALIGNED_NAME = "use the aligned interface"; @@ -87,7 +91,7 @@ public class ImportCsv extends AbstractCsvTool { private static String timeColumn = "Time"; private static String deviceColumn = "Device"; - private static final int BATCH_SIZE = 1000; + private static int batchPointSize = 100_000; /** * create the commandline options. @@ -141,6 +145,14 @@ private static Options createOptions() { .build(); options.addOption(opTimeZone); + Option opBatchPointSize = + Option.builder(BATCH_POINT_SIZE_ARGS) + .argName(BATCH_POINT_SIZE_NAME) + .hasArg() + .desc("100000 (optional)") + .build(); + options.addOption(opBatchPointSize); + return options; } @@ -152,6 +164,7 @@ private static Options createOptions() { private static void parseSpecialParams(CommandLine commandLine) { timeZoneID = commandLine.getOptionValue(TIME_ZONE_ARGS); targetPath = commandLine.getOptionValue(FILE_ARGS); + batchPointSize = Integer.parseInt(commandLine.getOptionValue(BATCH_POINT_SIZE_ARGS)); if (commandLine.getOptionValue(FAILED_FILE_ARGS) != null) { failedFileDirectory = commandLine.getOptionValue(FAILED_FILE_ARGS); File file = new File(failedFileDirectory); @@ -330,6 +343,7 @@ private static void writeDataAlignedByTime( AtomicReference timeFormatter = new AtomicReference<>(null); AtomicReference hasStarted = new AtomicReference<>(false); + AtomicInteger pointSize = new AtomicInteger(0); ArrayList> failedRecords = new ArrayList<>(); @@ -338,8 +352,9 @@ record -> { if (!hasStarted.get()) { hasStarted.set(true); timeFormatter.set(formatterInit(record.get(0))); - } else if (deviceIds.size() >= BATCH_SIZE) { + } else if (pointSize.get() >= batchPointSize) { writeAndEmptyDataSet(deviceIds, times, typesList, valuesList, measurementsList, 3); + pointSize.set(0); } boolean isFail = false; @@ -378,6 +393,7 @@ record -> { measurements.add(headerNameMap.get(header).replace(deviceId + '.', "")); types.add(type); values.add(valueTrans); + pointSize.getAndIncrement(); } } } @@ -406,6 +422,7 @@ record -> { }); if (!deviceIds.isEmpty()) { writeAndEmptyDataSet(deviceIds, times, typesList, valuesList, measurementsList, 3); + pointSize.set(0); } if (!failedRecords.isEmpty()) { @@ -442,6 +459,8 @@ private static void writeDataAlignedByDevice( List> valuesList = new ArrayList<>(); List> measurementsList = new ArrayList<>(); + AtomicInteger pointSize = new AtomicInteger(0); + ArrayList> failedRecords = new ArrayList<>(); records.forEach( @@ -455,10 +474,12 @@ record -> { writeAndEmptyDataSet( deviceName.get(), times, typesList, valuesList, measurementsList, 3); deviceName.set(record.get(1)); - } else if (times.size() >= BATCH_SIZE && times.size() != 0) { + pointSize.set(0); + } else if (pointSize.get() >= batchPointSize) { // insert a batch writeAndEmptyDataSet( deviceName.get(), times, typesList, valuesList, measurementsList, 3); + pointSize.set(0); } // the data of the record @@ -508,6 +529,7 @@ record -> { values.add(valueTrans); measurements.add(headerNameMap.get(measurement)); types.add(type); + pointSize.getAndIncrement(); } } } @@ -534,6 +556,7 @@ record -> { }); if (times.size() != 0) { writeAndEmptyDataSet(deviceName.get(), times, typesList, valuesList, measurementsList, 3); + pointSize.set(0); } if (!failedRecords.isEmpty()) { writeCsvFile(headerNames, failedRecords, failedFilePath); diff --git a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md index 08bad7d5bdcba..d774ca39eae7c 100644 --- a/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md +++ b/docs/UserGuide/Write-And-Delete-Data/CSV-Tool.md @@ -191,6 +191,10 @@ Description: - whether to use the aligned interface? The option `false` is default. - example: `-aligned true` +* `-batch`: + - specifying the point's number of a batch. If the program throw the exception `org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`, you can lower this parameter as appropriate. + - example: `-batch 100000`, `100000` is the default value. + ### Example ```sh diff --git a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md index a34437cce45ef..8ca621069b184 100644 --- a/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md +++ b/docs/zh/UserGuide/Write-And-Delete-Data/CSV-Tool.md @@ -185,11 +185,15 @@ Time,Device,str(TEXT),int(INT32) * `-fd`: - 指定一个目录来存放保存失败的行的文件,如果你没有指定这个参数,失败的文件将会被保存到源数据的目录中,然后文件名是源文件名加上`.failed`的后缀。 - - example: `-fd ./failed/` + - 例如: `-fd ./failed/` * `-aligned`: - 是否使用`aligned`接口? 默认参数为`false`。 - - example: `-aligned true` + - 例如: `-aligned true` + +* `-batch`: + - 用于指定每一批插入的数据的点数。如果程序报了`org.apache.thrift.transport.TTransportException: Frame size larger than protect max size`这个错的话,就可以适当的调低这个参数。 + - 例如: `-bs 100000`,`100000`是默认值。 ### 运行示例 From cdb6ea4895dabaecfb5b0e0f895791d48b6bb562 Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Fri, 18 Mar 2022 11:11:03 +0800 Subject: [PATCH 3/6] update the WayToGetIoTDB.md --- docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md index d3abfd19424c1..45b8f056021e7 100644 --- a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md +++ b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md @@ -69,6 +69,15 @@ Shell > uzip iotdb-.zip +- tools/ <-- system tools ``` +如果您想要编译项目中的某个模块,您可以在源码文件夹中使用`mvn clean package -pl {module.name} -am -Dmaven.test.skip=true`命令进行编译。如果您需要的是带依赖的 jar 包,您可以在编译命令后面加上`-P get-jar-with-dependencies`参数。比如您想编译带依赖的 jdbc jar 包,您就可以使用以下命令进行编译: + +```shell +> mvn clean package -pl jdbc -am "-Dmaven.test.skip=true" -P get-jar-with-dependencies +``` + +编译完成后就可以在`{module.name}/target`目录中找到需要的包了。 + + ### 通过 Docker 安装 (Dockerfile) Apache IoTDB 的 Docker 镜像已经上传至 [https://hub.docker.com/r/apache/iotdb](https://hub.docker.com/r/apache/iotdb), From 8a7ff6c95f6db3eb45b345613f0493a8f03299cc Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Fri, 18 Mar 2022 14:28:41 +0800 Subject: [PATCH 4/6] update the WayToGetIoTDB.md --- docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md index 45b8f056021e7..1914c37303a2a 100644 --- a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md +++ b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md @@ -51,8 +51,13 @@ Shell > uzip iotdb-.zip 您可以获取已发布的源码 [https://iotdb.apache.org/Download/](https://iotdb.apache.org/Download/) ,或者从 [https://github.com/apache/iotdb/tree/master](https://github.com/apache/iotdb/tree/master) git 仓库获取 -源码克隆后,进入到源码文件夹目录下,使用以下命令进行编译: +源码克隆后,进入到源码文件夹目录下。如果您想编译已经发布过的版本,可以先用`git checkout -b my_{project.version} v{project.version}`命令新建一个分支。比如您要编译0.12.4这个版本,您可以用如下命令去切换分支: +```shell +> git checkout -b my_0.12.4 v0.12.4 +``` + +切换分支之后就可以使用以下命令进行编译: ``` > mvn clean package -pl server -am -Dmaven.test.skip=true ``` From fa8f7ac0c3a043c92e36fc0cd199a54cf9567895 Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Fri, 18 Mar 2022 14:30:09 +0800 Subject: [PATCH 5/6] update the WayToGetIoTDB.md --- docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md index 1914c37303a2a..f09f27e136d69 100644 --- a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md +++ b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md @@ -51,13 +51,14 @@ Shell > uzip iotdb-.zip 您可以获取已发布的源码 [https://iotdb.apache.org/Download/](https://iotdb.apache.org/Download/) ,或者从 [https://github.com/apache/iotdb/tree/master](https://github.com/apache/iotdb/tree/master) git 仓库获取 -源码克隆后,进入到源码文件夹目录下。如果您想编译已经发布过的版本,可以先用`git checkout -b my_{project.version} v{project.version}`命令新建一个分支。比如您要编译0.12.4这个版本,您可以用如下命令去切换分支: +源码克隆后,进入到源码文件夹目录下。如果您想编译已经发布过的版本,可以先用`git checkout -b my_{project.version} v{project.version}`命令新建并切换分支。比如您要编译0.12.4这个版本,您可以用如下命令去切换分支: ```shell > git checkout -b my_0.12.4 v0.12.4 ``` 切换分支之后就可以使用以下命令进行编译: + ``` > mvn clean package -pl server -am -Dmaven.test.skip=true ``` From aa5488db31a665616f412de1be4ad113a80f3391 Mon Sep 17 00:00:00 2001 From: xuanronaldo Date: Fri, 18 Mar 2022 15:27:35 +0800 Subject: [PATCH 6/6] update the WayToGetIoTDB.md --- docs/UserGuide/QuickStart/WayToGetIoTDB.md | 17 ++++++++++++++++- docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md | 4 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/UserGuide/QuickStart/WayToGetIoTDB.md b/docs/UserGuide/QuickStart/WayToGetIoTDB.md index 88821484535aa..463fc7cc7353b 100644 --- a/docs/UserGuide/QuickStart/WayToGetIoTDB.md +++ b/docs/UserGuide/QuickStart/WayToGetIoTDB.md @@ -51,7 +51,13 @@ You can download the source code from: git clone https://github.com/apache/iotdb.git ``` -Under the root path of iotdb: +After that, go to the root path of IoTDB. If you want to build the version that we have released, you need to create and check out a new branch by command `git checkout -b my_{project.version} v{project.version}`. E.g., you want to build the version `0.12.4`, you can execute this command to make it: + +```shell +> git checkout -b my_0.12.4 v0.12.4 +``` + +Then you can execute this command to build the version that you want: ``` > mvn clean package -DskipTests @@ -69,6 +75,15 @@ If you would like to build the IoTDB server, you can run the following command u After build, the IoTDB server will be at the folder "server/target/iotdb-server-{project.version}". +If you would like to build a module, you can execute command `mvn clean package -pl {module.name} -am -DskipTests` under the root path of IoTDB. +If you need the jar with dependencies, you can add parameter `-P get-jar-with-dependencies` after the command. E.g., If you need the jar of jdbc with dependencies, you can execute this command: + +```shell +> mvn clean package -pl jdbc -am -DskipTests -P get-jar-with-dependencies +``` + +Then you can find it under the path `{module.name}/target`. + ### Installation by Docker (Dockerfile) Apache IoTDB' Docker image is released on [https://hub.docker.com/r/apache/iotdb](https://hub.docker.com/r/apache/iotdb), diff --git a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md index f09f27e136d69..2b96f62ffeebb 100644 --- a/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md +++ b/docs/zh/UserGuide/QuickStart/WayToGetIoTDB.md @@ -75,10 +75,10 @@ Shell > uzip iotdb-.zip +- tools/ <-- system tools ``` -如果您想要编译项目中的某个模块,您可以在源码文件夹中使用`mvn clean package -pl {module.name} -am -Dmaven.test.skip=true`命令进行编译。如果您需要的是带依赖的 jar 包,您可以在编译命令后面加上`-P get-jar-with-dependencies`参数。比如您想编译带依赖的 jdbc jar 包,您就可以使用以下命令进行编译: +如果您想要编译项目中的某个模块,您可以在源码文件夹中使用`mvn clean package -pl {module.name} -am -DskipTests`命令进行编译。如果您需要的是带依赖的 jar 包,您可以在编译命令后面加上`-P get-jar-with-dependencies`参数。比如您想编译带依赖的 jdbc jar 包,您就可以使用以下命令进行编译: ```shell -> mvn clean package -pl jdbc -am "-Dmaven.test.skip=true" -P get-jar-with-dependencies +> mvn clean package -pl jdbc -am -DskipTests -P get-jar-with-dependencies ``` 编译完成后就可以在`{module.name}/target`目录中找到需要的包了。