From ac94dbadc9dd5797a99bac6d778fdf26fa588972 Mon Sep 17 00:00:00 2001 From: xubo245 Date: Fri, 26 Oct 2018 23:03:30 +0800 Subject: [PATCH] [CARBONDATA-2997] Support read schema from index file and data file in CSDK 1.support readSchemaInIndexFile 2.support readSchemaInDataFile 3.support get field name and data type name 4.suppport get array child element data type name 5.can read schema when carbonreader has set ak,sk,endpoint 6.TODO: need support read scehma from S3 in the future This closes #2807 --- docs/csdk-guide.md | 82 +++++++- store/CSDK/CMakeLists.txt | 2 +- store/CSDK/src/CarbonReader.cpp | 12 +- store/CSDK/src/CarbonSchemaReader.cpp | 68 +++++++ store/CSDK/src/CarbonSchemaReader.h | 58 ++++++ store/CSDK/src/Schema.cpp | 92 +++++++++ store/CSDK/src/Schema.h | 90 +++++++++ store/CSDK/test/main.cpp | 66 +++++++ .../apache/carbondata/sdk/file/Schema.java | 43 +++++ .../carbondata/sdk/file/CarbonReaderTest.java | 1 + .../sdk/file/CarbonSchemaReaderTest.java | 181 ++++++++++++++++++ 11 files changed, 687 insertions(+), 8 deletions(-) create mode 100644 store/CSDK/src/CarbonSchemaReader.cpp create mode 100644 store/CSDK/src/CarbonSchemaReader.h create mode 100644 store/CSDK/src/Schema.cpp create mode 100644 store/CSDK/src/Schema.h create mode 100644 store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java diff --git a/docs/csdk-guide.md b/docs/csdk-guide.md index 71afa4a442b..cd5851ea348 100644 --- a/docs/csdk-guide.md +++ b/docs/csdk-guide.md @@ -39,6 +39,7 @@ and read data from S3 at main.cpp of CSDK module. Finally, users need to release the memory and destroy JVM. ## API List +### CarbonReader ``` /** * create a CarbonReaderBuilder object for building carbonReader, @@ -119,7 +120,7 @@ and write data to S3 at main.cpp of CSDK module. Finally, users need to release the memory and destroy JVM. ## API List - +### CarbonWriter ``` /** * create a CarbonWriterBuilder object for building carbonWriter, @@ -187,4 +188,83 @@ release the memory and destroy JVM. * close the carbon Writer */ void close(); +``` + +### CarbonSchemaReader + +``` + /** + * constructor with jni env + * + * @param env jni env + */ + CarbonSchemaReader(JNIEnv *env); +``` +``` + /** + * read Schema from Data File + * + * @param path Data File path + * @return carbon schema object + */ + jobject readSchemaInDataFile(char *path); +``` +``` + /** + * read Schema from index File + * + * @param path index File path + * @return carbon schema object + */ + jobject readSchemaInIndexFile(char *path); + +``` +###Schema +``` + /** + * constructor with jni env and carbon schema data + * + * @param env jni env + * @param schema carbon schema data + */ + Schema(JNIEnv *env, jobject schema); + +``` +``` + /** + * get fields length of schema + * + * @return fields length + */ + int getFieldsLength(); + +``` +``` + /** + * get field name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field name + */ + char *getFieldName(int ordinal); + +``` +``` + /** + * get field data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field data type name + */ + char *getFieldDataTypeName(int ordinal); + +``` +``` + /** + * get array child element data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal array child element data type name + */ + char *getArrayElementTypeName(int ordinal); ``` \ No newline at end of file diff --git a/store/CSDK/CMakeLists.txt b/store/CSDK/CMakeLists.txt index ab1429d4cb4..4da47cfacb3 100644 --- a/store/CSDK/CMakeLists.txt +++ b/store/CSDK/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(JNI REQUIRED) include_directories(${JNI_INCLUDE_DIRS}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -set(SOURCE_FILES src/CarbonReader.cpp src/CarbonReader.h test/main.cpp src/CarbonRow.h src/CarbonRow.cpp src/CarbonWriter.h src/CarbonWriter.cpp) +set(SOURCE_FILES src/CarbonReader.cpp src/CarbonReader.h test/main.cpp src/CarbonRow.h src/CarbonRow.cpp src/CarbonWriter.h src/CarbonWriter.cpp src/CarbonSchemaReader.h src/CarbonSchemaReader.cpp src/Schema.h src/Schema.cpp) add_executable(CJDK ${SOURCE_FILES}) get_filename_component(JAVA_JVM_LIBRARY_DIR ${JAVA_JVM_LIBRARY} DIRECTORY) diff --git a/store/CSDK/src/CarbonReader.cpp b/store/CSDK/src/CarbonReader.cpp index 8b908ab6848..57fcda687f3 100644 --- a/store/CSDK/src/CarbonReader.cpp +++ b/store/CSDK/src/CarbonReader.cpp @@ -86,9 +86,9 @@ void CarbonReader::projection(int argc, char *argv[]) { } checkBuilder(); jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); - jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "projection", + jmethodID methodID = jniEnv->GetMethodID(carbonReaderBuilderClass, "projection", "([Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); - if (buildID == NULL) { + if (methodID == NULL) { throw std::runtime_error("Can't find the method in java: projection"); } jclass objectArrayClass = jniEnv->FindClass("Ljava/lang/String;"); @@ -103,7 +103,7 @@ void CarbonReader::projection(int argc, char *argv[]) { jvalue args[1]; args[0].l = array; - carbonReaderBuilderObject = jniEnv->CallObjectMethodA(carbonReaderBuilderObject, buildID, args); + carbonReaderBuilderObject = jniEnv->CallObjectMethodA(carbonReaderBuilderObject, methodID, args); } void CarbonReader::withHadoopConf(char *key, char *value) { @@ -129,12 +129,12 @@ void CarbonReader::withHadoopConf(char *key, char *value) { jobject CarbonReader::build() { checkBuilder(); jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); - jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "build", + jmethodID methodID = jniEnv->GetMethodID(carbonReaderBuilderClass, "build", "()Lorg/apache/carbondata/sdk/file/CarbonReader;"); - if (buildID == NULL) { + if (methodID == NULL) { throw std::runtime_error("Can't find the method in java: build"); } - carbonReaderObject = jniEnv->CallObjectMethod(carbonReaderBuilderObject, buildID); + carbonReaderObject = jniEnv->CallObjectMethod(carbonReaderBuilderObject, methodID); if (jniEnv->ExceptionCheck()) { throw jniEnv->ExceptionOccurred(); } diff --git a/store/CSDK/src/CarbonSchemaReader.cpp b/store/CSDK/src/CarbonSchemaReader.cpp new file mode 100644 index 00000000000..95eae7da3bd --- /dev/null +++ b/store/CSDK/src/CarbonSchemaReader.cpp @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "CarbonSchemaReader.h" + +CarbonSchemaReader::CarbonSchemaReader(JNIEnv *env) { + if (env == NULL) { + throw std::runtime_error("JNIEnv parameter can't be NULL."); + } + this->carbonSchemaReaderClass = env->FindClass("org/apache/carbondata/sdk/file/CarbonSchemaReader"); + if (carbonSchemaReaderClass == NULL) { + throw std::runtime_error("Can't find the class in java: org/apache/carbondata/sdk/file/CarbonSchemaReader"); + } + this->jniEnv = env; +} + +jobject CarbonSchemaReader::readSchemaInDataFile(char *path) { + if (path == NULL) { + throw std::runtime_error("path parameter can't be NULL."); + } + jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchemaInDataFile", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: readSchemaInDataFile"); + } + jstring jPath = jniEnv->NewStringUTF(path); + jvalue args[1]; + args[0].l = jPath; + jobject result = jniEnv->CallStaticObjectMethodA(carbonSchemaReaderClass, methodID, args); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } + return result; +} + +jobject CarbonSchemaReader::readSchemaInIndexFile(char *path) { + if (path == NULL) { + throw std::runtime_error("path parameter can't be NULL."); + } + jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchemaInIndexFile", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: readSchemaInDataFile"); + } + jstring jPath = jniEnv->NewStringUTF(path); + jvalue args[1]; + args[0].l = jPath; + jobject result = jniEnv->CallStaticObjectMethodA(carbonSchemaReaderClass, methodID, args); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } + return result; +} \ No newline at end of file diff --git a/store/CSDK/src/CarbonSchemaReader.h b/store/CSDK/src/CarbonSchemaReader.h new file mode 100644 index 00000000000..1b1299515df --- /dev/null +++ b/store/CSDK/src/CarbonSchemaReader.h @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +class CarbonSchemaReader { +private: + + /** + * jni env + */ + JNIEnv *jniEnv; + + /** + * carbonSchemaReader Class for get method id and call method + */ + jclass carbonSchemaReaderClass; + +public: + + /** + * constructor with jni env + * + * @param env jni env + */ + CarbonSchemaReader(JNIEnv *env); + + /** + * read Schema from Data File + * + * @param path Data File path + * @return carbon schema object + */ + jobject readSchemaInDataFile(char *path); + + /** + * read Schema from index File + * + * @param path index File path + * @return carbon schema object + */ + jobject readSchemaInIndexFile(char *path); + +}; \ No newline at end of file diff --git a/store/CSDK/src/Schema.cpp b/store/CSDK/src/Schema.cpp new file mode 100644 index 00000000000..0a4fdc51d99 --- /dev/null +++ b/store/CSDK/src/Schema.cpp @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "Schema.h" + +Schema::Schema(JNIEnv *env, jobject schema) { + if (env == NULL) { + throw std::runtime_error("JNIEnv parameter can't be NULL."); + } + if (schema == NULL) { + throw std::runtime_error("schema parameter can't be NULL."); + } + this->schemaClass = env->FindClass("org/apache/carbondata/sdk/file/Schema"); + if (schemaClass == NULL) { + throw std::runtime_error("Can't find the class in java: org/apache/carbondata/sdk/file/Schema"); + } + this->jniEnv = env; + this->schema = schema; +} + +int Schema::getFieldsLength() { + jmethodID methodID = jniEnv->GetMethodID(schemaClass, "getFieldsLength", + "()I"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: getFieldsLength"); + } + return jniEnv->CallIntMethod(schema, methodID); +}; + +void Schema::checkOrdinal(int ordinal) { + if (ordinal < 0) { + throw std::runtime_error("ordinal parameter can't be negative."); + } +} + +char *Schema::getFieldName(int ordinal) { + checkOrdinal(ordinal); + jmethodID methodID = jniEnv->GetMethodID(schemaClass, "getFieldName", + "(I)Ljava/lang/String;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: getFieldName"); + } + jvalue args[1]; + args[0].i = ordinal; + jobject fieldName = jniEnv->CallObjectMethodA(schema, methodID, args); + return (char *) jniEnv->GetStringUTFChars((jstring) fieldName, JNI_FALSE); +}; + +char *Schema::getFieldDataTypeName(int ordinal) { + checkOrdinal(ordinal); + jmethodID methodID = jniEnv->GetMethodID(schemaClass, "getFieldDataTypeName", + "(I)Ljava/lang/String;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: getFieldDataTypeName"); + } + jvalue args[1]; + args[0].i = ordinal; + jobject fieldName = jniEnv->CallObjectMethodA(schema, methodID, args); + return (char *) jniEnv->GetStringUTFChars((jstring) fieldName, JNI_FALSE); +}; + +char *Schema::getArrayElementTypeName(int ordinal) { + checkOrdinal(ordinal); + jmethodID methodID = jniEnv->GetMethodID(schemaClass, "getArrayElementTypeName", + "(I)Ljava/lang/String;"); + if (methodID == NULL) { + throw std::runtime_error("Can't find the method in java: getArrayElementTypeName"); + } + jvalue args[1]; + args[0].i = ordinal; + jobject fieldName = jniEnv->CallObjectMethodA(schema, methodID, args); + if (jniEnv->ExceptionCheck()) { + throw jniEnv->ExceptionOccurred(); + } + return (char *) jniEnv->GetStringUTFChars((jstring) fieldName, JNI_FALSE); +}; diff --git a/store/CSDK/src/Schema.h b/store/CSDK/src/Schema.h new file mode 100644 index 00000000000..e9ddf4e9748 --- /dev/null +++ b/store/CSDK/src/Schema.h @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#ifndef CJDK_SCHEMA_H +#define CJDK_SCHEMA_H + +#endif //CJDK_SCHEMA_H + +class Schema { +private: + + /** + * jni env + */ + JNIEnv *jniEnv; + + /** + * schema class for get method id + */ + jclass schemaClass; + + /** + * carbon schema data + */ + jobject schema; + + /** + * check ordinal, ordinal can't be negative + * + * @param ordinal int value, the data index of carbon Row + */ + void checkOrdinal(int ordinal); + +public: + + /** + * constructor with jni env and carbon schema data + * + * @param env jni env + * @param schema carbon schema data + */ + Schema(JNIEnv *env, jobject schema); + + /** + * get fields length of schema + * + * @return fields length + */ + int getFieldsLength(); + + /** + * get field name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field name + */ + char *getFieldName(int ordinal); + + /** + * get field data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field data type name + */ + char *getFieldDataTypeName(int ordinal); + + /** + * get array child element data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal array child element data type name + */ + char *getArrayElementTypeName(int ordinal); +}; \ No newline at end of file diff --git a/store/CSDK/test/main.cpp b/store/CSDK/test/main.cpp index b102622520f..876e2e137f1 100644 --- a/store/CSDK/test/main.cpp +++ b/store/CSDK/test/main.cpp @@ -24,6 +24,8 @@ #include "../src/CarbonReader.h" #include "../src/CarbonRow.h" #include "../src/CarbonWriter.h" +#include "../src/CarbonSchemaReader.h" +#include "../src/Schema.h" using namespace std; @@ -118,6 +120,65 @@ void printResult(JNIEnv *env, CarbonReader reader) { reader.close(); } +/** + * test read Schema from Index File + * + * @param env jni env + * @return whether it is success + */ +bool readSchemaInIndexFile(JNIEnv *env, char *indexFilePath) { + printf("\nread Schema from Index File:\n"); + CarbonSchemaReader carbonSchemaReader(env); + jobject schema; + try { + schema = carbonSchemaReader.readSchemaInIndexFile(indexFilePath); + Schema carbonSchema(env, schema); + int length = carbonSchema.getFieldsLength(); + printf("schema length is:%d\n", length); + for (int i = 0; i < length; i++) { + printf("%d\t", i); + printf("%s\t", carbonSchema.getFieldName(i)); + printf("%s\n", carbonSchema.getFieldDataTypeName(i)); + if (strcmp(carbonSchema.getFieldDataTypeName(i), "ARRAY") == 0) { + printf("Array Element Type Name is:%s\n", carbonSchema.getArrayElementTypeName(i)); + } + } + + } catch (jthrowable e) { + env->ExceptionDescribe(); + } + return true; +} + +/** + * test read Schema from Data File + * + * @param env jni env + * @return whether it is success + */ +bool readSchemaInDataFile(JNIEnv *env, char *dataFilePath) { + printf("\nread Schema from Data File:\n"); + CarbonSchemaReader carbonSchemaReader(env); + jobject schema; + try { + schema = carbonSchemaReader.readSchemaInDataFile(dataFilePath); + } catch (jthrowable e) { + env->ExceptionDescribe(); + } + Schema carbonSchema(env, schema); + int length = carbonSchema.getFieldsLength(); + printf("schema length is:%d\n", length); + for (int i = 0; i < length; i++) { + printf("%d\t", i); + printf("%s\t", carbonSchema.getFieldName(i)); + printf("%s\n", carbonSchema.getFieldDataTypeName(i)); + if (strcmp(carbonSchema.getFieldDataTypeName(i), "ARRAY") == 0) { + printf("Array Element Type Name is:%s\n", carbonSchema.getArrayElementTypeName(i)); + } + } + return true; +} + /** * test read data from local disk, without projection * @@ -384,10 +445,15 @@ int main(int argc, char *argv[]) { char *S3ReadPath = "s3a://sdk/WriterOutput/carbondata"; if (argc > 3) { + // TODO: need support read schema from S3 in the future testWriteData(env, S3WritePath, 4, argv); readFromS3(env, S3ReadPath, argv); } else { tryCatchException(env); + char *indexFilePath = argv[1]; + char *dataFilePath = argv[2]; + readSchemaInIndexFile(env, indexFilePath); + readSchemaInDataFile(env, dataFilePath); readFromLocalWithoutProjection(env); testWriteData(env, "./data", 1, argv); readFromLocal(env); diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java index 6131d459a32..05d3d9e3640 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java @@ -24,6 +24,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.metadata.datatype.ArrayType; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; import com.google.gson.GsonBuilder; @@ -94,6 +95,48 @@ public Field[] getFields() { return fields; } + /** + * get fields length of schema + * + * @return fields length + */ + public int getFieldsLength() { + return fields.length; + } + + /** + * get field name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field name + */ + public String getFieldName(int ordinal) { + return fields[ordinal].getFieldName(); + } + + /** + * get field data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal field data type name + */ + public String getFieldDataTypeName(int ordinal) { + return fields[ordinal].getDataType().getName(); + } + + /** + * get array child element data type name by ordinal + * + * @param ordinal the data index of carbon schema + * @return ordinal array child element data type name + */ + public String getArrayElementTypeName(int ordinal) { + if (getFieldDataTypeName(ordinal).equalsIgnoreCase("ARRAY")) { + return ((ArrayType) fields[ordinal].getDataType()).getElementType().getName(); + } + throw new RuntimeException("Only support Array type."); + } + /** * Sort the schema order as original order * diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index b8eb2243ade..74428f05aa3 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -766,6 +766,7 @@ public void testReadSchemaFromDataFile() throws IOException { FileUtils.deleteDirectory(new File(path)); } + @Test public void testWriteAndReadFilesNonTransactional() throws IOException, InterruptedException { String path = "./testWriteFiles"; diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java new file mode 100644 index 00000000000..db41fa08714 --- /dev/null +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.sdk.file; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; +import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.commons.io.FileUtils; +import org.junit.*; + +public class CarbonSchemaReaderTest extends TestCase { + + String path = "./testWriteFiles"; + + @Before + public void setUp() throws IOException, InvalidLoadOptionException { + FileUtils.deleteDirectory(new File(path)); + + Field[] fields = new Field[12]; + fields[0] = new Field("stringField", DataTypes.STRING); + fields[1] = new Field("shortField", DataTypes.SHORT); + fields[2] = new Field("intField", DataTypes.INT); + fields[3] = new Field("longField", DataTypes.LONG); + fields[4] = new Field("doubleField", DataTypes.DOUBLE); + fields[5] = new Field("boolField", DataTypes.BOOLEAN); + fields[6] = new Field("dateField", DataTypes.DATE); + fields[7] = new Field("timeField", DataTypes.TIMESTAMP); + fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2)); + fields[9] = new Field("varcharField", DataTypes.VARCHAR); + fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING)); + fields[11] = new Field("floatField", DataTypes.FLOAT); + Map map = new HashMap<>(); + map.put("complex_delimiter_level_1", "#"); + CarbonWriter writer = CarbonWriter.builder() + .outputPath(path) + .withLoadOptions(map) + .withCsvInput(new Schema(fields)) + .writtenBy("CarbonSchemaReaderTest") + .build(); + + for (int i = 0; i < 10; i++) { + String[] row2 = new String[]{ + "robot" + (i % 10), + String.valueOf(i % 10000), + String.valueOf(i), + String.valueOf(Long.MAX_VALUE - i), + String.valueOf((double) i / 2), + String.valueOf(true), + "2019-03-02", + "2019-02-12 03:03:34", + "12.345", + "varchar", + "Hello#World#From#Carbon", + "1.23" + }; + writer.write(row2); + } + writer.close(); + } + + @Test + public void testReadSchemaFromDataFile() { + try { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(".carbondata"); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new RuntimeException("Carbon data file not exists."); + } + String dataFilePath = carbonFiles[0].getAbsolutePath(); + + Schema schema = CarbonSchemaReader + .readSchemaInDataFile(dataFilePath) + .asOriginOrder(); + + assertEquals(schema.getFieldsLength(), 12); + checkSchema(schema); + + } catch (Throwable e) { + e.printStackTrace(); + } + } + + public boolean checkSchema(Schema schema) { + assert (schema.getFieldName(0).equalsIgnoreCase("stringField")); + assert (schema.getFieldName(1).equalsIgnoreCase("shortField")); + assert (schema.getFieldName(2).equalsIgnoreCase("intField")); + assert (schema.getFieldName(3).equalsIgnoreCase("longField")); + assert (schema.getFieldName(4).equalsIgnoreCase("doubleField")); + assert (schema.getFieldName(5).equalsIgnoreCase("boolField")); + assert (schema.getFieldName(6).equalsIgnoreCase("dateField")); + assert (schema.getFieldName(7).equalsIgnoreCase("timeField")); + assert (schema.getFieldName(8).equalsIgnoreCase("decimalField")); + assert (schema.getFieldName(9).equalsIgnoreCase("varcharField")); + assert (schema.getFieldName(10).equalsIgnoreCase("arrayField")); + assert (schema.getFieldName(11).equalsIgnoreCase("floatField")); + + assert (schema.getFieldDataTypeName(0).equalsIgnoreCase("string")); + assert (schema.getFieldDataTypeName(1).equalsIgnoreCase("short")); + assert (schema.getFieldDataTypeName(2).equalsIgnoreCase("int")); + assert (schema.getFieldDataTypeName(3).equalsIgnoreCase("long")); + assert (schema.getFieldDataTypeName(4).equalsIgnoreCase("double")); + assert (schema.getFieldDataTypeName(5).equalsIgnoreCase("boolean")); + assert (schema.getFieldDataTypeName(6).equalsIgnoreCase("date")); + assert (schema.getFieldDataTypeName(7).equalsIgnoreCase("timestamp")); + assert (schema.getFieldDataTypeName(8).equalsIgnoreCase("decimal")); + assert (schema.getFieldDataTypeName(9).equalsIgnoreCase("varchar")); + assert (schema.getFieldDataTypeName(10).equalsIgnoreCase("array")); + assert (schema.getArrayElementTypeName(10).equalsIgnoreCase("String")); + assert (schema.getFieldDataTypeName(11).equalsIgnoreCase("float")); + return true; + } + + @Test + public void testReadSchemaFromIndexFile() { + try { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(".carbonindex"); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new RuntimeException("Carbon index file not exists."); + } + String dataFilePath = carbonFiles[0].getAbsolutePath(); + + Schema schema = CarbonSchemaReader + .readSchemaInDataFile(dataFilePath) + .asOriginOrder(); + + assertEquals(schema.getFieldsLength(), 12); + checkSchema(schema); + + } catch (Throwable e) { + e.printStackTrace(); + } + } + + @After + public void tearDown() throws IOException { + FileUtils.deleteDirectory(new File(path)); + } + +}