diff --git a/.gitignore b/.gitignore index 5d66a40e90b..00e49343316 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ .settings .cache target/ +store/CSDK/cmake-build-debug/* .project .classpath metastore_db/ diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index faad0dccedc..46139c946c3 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -506,6 +506,10 @@ public final class CarbonCommonConstants { * File separator */ public static final String FILE_SEPARATOR = "/"; + /** + * ARRAY separator + */ + public static final String ARRAY_SEPARATOR = "\001"; /** * MAX_QUERY_EXECUTION_TIME */ diff --git a/docs/CSDK-guide.md b/docs/CSDK-guide.md new file mode 100644 index 00000000000..c4f4a31d7ea --- /dev/null +++ b/docs/CSDK-guide.md @@ -0,0 +1,197 @@ + + +# CSDK Guide + +CarbonData CSDK provides C++ interface to write and read carbon file. +CSDK use JNI to invoke java SDK in C++ code. + + +# CSDK Reader +This CSDK reader reads CarbonData file and carbonindex file at a given path. +External client can make use of this reader to read CarbonData files in C++ +code and without CarbonSession. + + +In the carbon jars package, there exist a carbondata-sdk.jar, +including SDK reader for CSDK. +## Quick example +``` +// 1. init JVM +JavaVM *jvm; +JNIEnv *initJVM() { + JNIEnv *env; + JavaVMInitArgs vm_args; + int parNum = 3; + int res; + JavaVMOption options[parNum]; + + options[0].optionString = "-Djava.compiler=NONE"; + options[1].optionString = "-Djava.class.path=../../sdk/target/carbondata-sdk.jar"; + options[2].optionString = "-verbose:jni"; + vm_args.version = JNI_VERSION_1_8; + vm_args.nOptions = parNum; + vm_args.options = options; + vm_args.ignoreUnrecognized = JNI_FALSE; + + res = JNI_CreateJavaVM(&jvm, (void **) &env, &vm_args); + if (res < 0) { + fprintf(stderr, "\nCan't create Java VM\n"); + exit(1); + } + + return env; +} + +// 2. create carbon reader and read data +// 2.1 read data from local disk +/** + * test read data from local disk, without projection + * + * @param env jni env + * @return + */ +bool readFromLocalWithoutProjection(JNIEnv *env) { + + CarbonReader carbonReaderClass; + carbonReaderClass.builder(env, "../resources/carbondata", "test"); + carbonReaderClass.build(); + + while (carbonReaderClass.hasNext()) { + jobjectArray row = carbonReaderClass.readNextRow(); + jsize length = env->GetArrayLength(row); + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(row, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("\n"); + } + carbonReaderClass.close(); +} + +// 2.2 read data from S3 + +/** + * read data from S3 + * parameter is ak sk endpoint + * + * @param env jni env + * @param argv argument vector + * @return + */ +bool readFromS3(JNIEnv *env, char *argv[]) { + CarbonReader reader; + + char *args[3]; + // "your access key" + args[0] = argv[1]; + // "your secret key" + args[1] = argv[2]; + // "your endPoint" + args[2] = argv[3]; + + reader.builder(env, "s3a://sdk/WriterOutput", "test"); + reader.withHadoopConf(3, args); + reader.build(); + printf("\nRead data from S3:\n"); + while (reader.hasNext()) { + jobjectArray row = reader.readNextRow(); + jsize length = env->GetArrayLength(row); + + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(row, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("\n"); + } + + reader.close(); +} + +// 3. destory JVM + (jvm)->DestroyJavaVM(); +``` +Find example code at main.cpp of CSDK module + +## API List +``` + /** + * create a CarbonReaderBuilder object for building carbonReader, + * CarbonReaderBuilder object can configure different parameter + * + * @param env JNIEnv + * @param path data store path + * @param tableName table name + * @return CarbonReaderBuilder object + */ + jobject builder(JNIEnv *env, char *path, char *tableName); + + /** + * Configure the projection column names of carbon reader + * + * @param argc argument counter + * @param argv argument vector + * @return CarbonReaderBuilder object + */ + jobject projection(int argc, char *argv[]); + + /** + * build carbon reader with argument vector + * it support multiple parameter + * like: key=value + * for example: fs.s3a.access.key=XXXX, XXXX is user's access key value + * + * @param argc argument counter + * @param argv argument vector + * @return CarbonReaderBuilder object + **/ + jobject withHadoopConf(int argc, char *argv[]); + + /** + * build carbonReader object for reading data + * it support read data from load disk + * + * @return carbonReader object + */ + jobject build(); + + /** + * Whether it has next row data + * + * @return boolean value, if it has next row, return true. if it hasn't next row, return false. + */ + jboolean hasNext(); + + /** + * read next row from data + * + * @return object array of one row + */ + jobjectArray readNextRow(); + + /** + * close the carbon reader + * + * @return boolean value + */ + jboolean close(); + +``` diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml index aa7b7c516b7..8dbab15cef1 100644 --- a/examples/spark2/pom.xml +++ b/examples/spark2/pom.xml @@ -80,7 +80,7 @@ org.apache.httpcomponents httpclient - 4.2 + ${httpclient.version} org.scalatest diff --git a/pom.xml b/pom.xml index 00a5287902b..d911093de35 100644 --- a/pom.xml +++ b/pom.xml @@ -113,6 +113,7 @@ UTF-8 1.1.2.6 2.7.2 + 4.2.5 2.11 2.11.8 compile @@ -483,6 +484,7 @@ hadoop-2.8 2.8.3 + 4.5.2 diff --git a/store/CSDK/CMakeLists.txt b/store/CSDK/CMakeLists.txt new file mode 100644 index 00000000000..c3c57a02ad6 --- /dev/null +++ b/store/CSDK/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required (VERSION 2.8) +project (CJDK) +set(CMAKE_BUILD_TYPE Debug) +SET (CMAKE_INSTALL_RPATH_USE_LINK_PATH true) + +find_package (JNI REQUIRED) + +include_directories (${JNI_INCLUDE_DIRS}) + +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set (SOURCE_FILES CarbonReader.cpp CarbonReader.h main.cpp) + +add_executable (CJDK ${SOURCE_FILES}) +get_filename_component (JAVA_JVM_LIBRARY_DIR ${JAVA_JVM_LIBRARY} DIRECTORY) +message (${JAVA_JVM_LIBRARY_DIR}) +target_link_libraries (CJDK ${JAVA_JVM_LIBRARY} ) + diff --git a/store/CSDK/CarbonReader.cpp b/store/CSDK/CarbonReader.cpp new file mode 100644 index 00000000000..040095723ad --- /dev/null +++ b/store/CSDK/CarbonReader.cpp @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CarbonReader.h" +#include + +jobject CarbonReader::builder(JNIEnv *env, char *path, char *tableName) { + + jniEnv = env; + jclass carbonReaderClass = env->FindClass("org/apache/carbondata/sdk/file/CarbonReader"); + jmethodID carbonReaderBuilderID = env->GetStaticMethodID(carbonReaderClass, "builder", + "(Ljava/lang/String;Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); + jstring jpath = env->NewStringUTF(path); + jstring jtableName = env->NewStringUTF(tableName); + jvalue args[2]; + args[0].l = jpath; + args[1].l = jtableName; + carbonReaderBuilderObject = env->CallStaticObjectMethodA(carbonReaderClass, carbonReaderBuilderID, args); + return carbonReaderBuilderObject; +} + +jobject CarbonReader::builder(JNIEnv *env, char *path) { + jniEnv = env; + jclass carbonReaderClass = env->FindClass("org/apache/carbondata/sdk/file/CarbonReader"); + jmethodID carbonReaderBuilderID = env->GetStaticMethodID(carbonReaderClass, "builder", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); + jstring jpath = env->NewStringUTF(path); + jvalue args[1]; + args[0].l = jpath; + carbonReaderBuilderObject = env->CallStaticObjectMethodA(carbonReaderClass, carbonReaderBuilderID, args); + return carbonReaderBuilderObject; +} + +jobject CarbonReader::projection(int argc, char *argv[]) { + jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); + jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "projection", + "([Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); + jclass objectArrayClass = jniEnv->FindClass("Ljava/lang/String;"); + jobjectArray array = jniEnv->NewObjectArray(argc, objectArrayClass, NULL); + for (int i = 0; i < argc; ++i) { + jstring value = jniEnv->NewStringUTF(argv[i]); + jniEnv->SetObjectArrayElement(array, i, value); + } + + jvalue args[1]; + args[0].l = array; + carbonReaderBuilderObject = jniEnv->CallObjectMethodA(carbonReaderBuilderObject, buildID, args); + return carbonReaderBuilderObject; +} + +jobject CarbonReader::withHadoopConf(char *key, char *value) { + jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); + jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "withHadoopConf", + "(Ljava/lang/String;Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonReaderBuilder;"); + + jvalue args[2]; + args[0].l = jniEnv->NewStringUTF(key); + args[1].l = jniEnv->NewStringUTF(value); + carbonReaderBuilderObject = jniEnv->CallObjectMethodA(carbonReaderBuilderObject, buildID, args); + return carbonReaderBuilderObject; +} + +jobject CarbonReader::build() { + jclass carbonReaderBuilderClass = jniEnv->GetObjectClass(carbonReaderBuilderObject); + jmethodID buildID = jniEnv->GetMethodID(carbonReaderBuilderClass, "build", + "()Lorg/apache/carbondata/sdk/file/CarbonReader;"); + carbonReaderObject = jniEnv->CallObjectMethod(carbonReaderBuilderObject, buildID); + return carbonReaderObject; +} + +jboolean CarbonReader::hasNext() { + jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); + jmethodID hasNextID = jniEnv->GetMethodID(carbonReader, "hasNext", "()Z"); + unsigned char hasNext = jniEnv->CallBooleanMethod(carbonReaderObject, hasNextID); + return hasNext; +} + +jobjectArray CarbonReader::readNextRow() { + jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); + jmethodID readNextRow2ID = jniEnv->GetMethodID(carbonReader, "readNextStringRow", "()[Ljava/lang/Object;"); + jobjectArray row = (jobjectArray) jniEnv->CallObjectMethod(carbonReaderObject, readNextRow2ID); + return row; +} + +jboolean CarbonReader::close() { + jclass carbonReader = jniEnv->GetObjectClass(carbonReaderObject); + jmethodID closeID = jniEnv->GetMethodID(carbonReader, "close", "()V"); + jniEnv->CallBooleanMethod(carbonReaderObject, closeID); +} \ No newline at end of file diff --git a/store/CSDK/CarbonReader.h b/store/CSDK/CarbonReader.h new file mode 100644 index 00000000000..861f704be1a --- /dev/null +++ b/store/CSDK/CarbonReader.h @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +class CarbonReader { +public: + /** + * jni env + */ + JNIEnv *jniEnv; + + /** + * carbonReaderBuilder object for building carbonReader + * it can configure some operation + */ + jobject carbonReaderBuilderObject; + + /** + * carbonReader object for reading data + */ + jobject carbonReaderObject; + + /** + * create a CarbonReaderBuilder object for building carbonReader, + * CarbonReaderBuilder object can configure different parameter + * + * @param env JNIEnv + * @param path data store path + * @param tableName table name + * @return CarbonReaderBuilder object + */ + jobject builder(JNIEnv *env, char *path, char *tableName); + + /** + * create a CarbonReaderBuilder object for building carbonReader, + * CarbonReaderBuilder object can configure different parameter + * + * @param env JNIEnv + * @param path data store path + * @return CarbonReaderBuilder object + * */ + jobject builder(JNIEnv *env, char *path); + + /** + * Configure the projection column names of carbon reader + * + * @param argc argument counter + * @param argv argument vector + * @return CarbonReaderBuilder object + */ + jobject projection(int argc, char *argv[]); + + /** + * configure parameter, including ak,sk and endpoint + * + * @param key key word + * @param value value + * @return CarbonReaderBuilder object + */ + jobject withHadoopConf(char *key, char *value); + + /** + * build carbonReader object for reading data + * it support read data from load disk + * + * @return carbonReader object + */ + jobject build(); + + /** + * Whether it has next row data + * + * @return boolean value, if it has next row, return true. if it hasn't next row, return false. + */ + jboolean hasNext(); + + /** + * read next row from data + * + * @return object array of one row + */ + jobjectArray readNextRow(); + + /** + * close the carbon reader + * + * @return boolean value + */ + jboolean close(); +}; diff --git a/store/CSDK/main.cpp b/store/CSDK/main.cpp new file mode 100644 index 00000000000..a8d1a87c793 --- /dev/null +++ b/store/CSDK/main.cpp @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include "CarbonReader.h" + +using namespace std; + +JavaVM *jvm; + +/** + * init jvm + * + * @return + */ +JNIEnv *initJVM() { + JNIEnv *env; + JavaVMInitArgs vm_args; + int parNum = 3; + int res; + JavaVMOption options[parNum]; + + options[0].optionString = "-Djava.compiler=NONE"; + options[1].optionString = "-Djava.class.path=../../sdk/target/carbondata-sdk.jar"; + options[2].optionString = "-verbose:jni"; + vm_args.version = JNI_VERSION_1_8; + vm_args.nOptions = parNum; + vm_args.options = options; + vm_args.ignoreUnrecognized = JNI_FALSE; + + res = JNI_CreateJavaVM(&jvm, (void **) &env, &vm_args); + if (res < 0) { + fprintf(stderr, "\nCan't create Java VM\n"); + exit(1); + } + + return env; +} + +/** + * test read data from local disk, without projection + * + * @param env jni env + * @return + */ +bool readFromLocalWithoutProjection(JNIEnv *env) { + + CarbonReader carbonReaderClass; + carbonReaderClass.builder(env, "../resources/carbondata"); + carbonReaderClass.build(); + + printf("\nRead data from local without projection:\n"); + + while (carbonReaderClass.hasNext()) { + jobjectArray row = carbonReaderClass.readNextRow(); + jsize length = env->GetArrayLength(row); + + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(row, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("\n"); + } + + carbonReaderClass.close(); +} + +/** + * test read data from local disk + * + * @param env jni env + * @return + */ +bool readFromLocal(JNIEnv *env) { + + CarbonReader reader; + reader.builder(env, "../resources/carbondata", "test"); + + char *argv[11]; + argv[0] = "stringField"; + argv[1] = "shortField"; + argv[2] = "intField"; + argv[3] = "longField"; + argv[4] = "doubleField"; + argv[5] = "boolField"; + argv[6] = "dateField"; + argv[7] = "timeField"; + argv[8] = "decimalField"; + argv[9] = "varcharField"; + argv[10] = "arrayField"; + reader.projection(11, argv); + + reader.build(); + + printf("\nRead data from local:\n"); + + while (reader.hasNext()) { + jobjectArray row = reader.readNextRow(); + jsize length = env->GetArrayLength(row); + + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(row, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("\n"); + } + + reader.close(); +} + +/** + * read data from S3 + * parameter is ak sk endpoint + * + * @param env jni env + * @param argv argument vector + * @return + */ +bool readFromS3(JNIEnv *env, char *argv[]) { + CarbonReader reader; + + char *args[3]; + // "your access key" + args[0] = argv[1]; + // "your secret key" + args[1] = argv[2]; + // "your endPoint" + args[2] = argv[3]; + + reader.builder(env, "s3a://sdk/WriterOutput", "test"); + reader.withHadoopConf("fs.s3a.access.key", argv[1]); + reader.withHadoopConf("fs.s3a.secret.key", argv[2]); + reader.withHadoopConf("fs.s3a.endpoint", argv[3]); + reader.build(); + printf("\nRead data from S3:\n"); + while (reader.hasNext()) { + jobjectArray row = reader.readNextRow(); + jsize length = env->GetArrayLength(row); + + int j = 0; + for (j = 0; j < length; j++) { + jobject element = env->GetObjectArrayElement(row, j); + char *str = (char *) env->GetStringUTFChars((jstring) element, JNI_FALSE); + printf("%s\t", str); + } + printf("\n"); + } + + reader.close(); +} + +/** + * This a example for C++ interface to read carbon file + * If you want to test read data fromS3, please input the parameter: ak sk endpoint + * + * @param argc argument counter + * @param argv argument vector + * @return + */ +int main(int argc, char *argv[]) { + // init jvm + JNIEnv *env; + env = initJVM(); + + if (argc > 3) { + readFromS3(env, argv); + } else { + readFromLocalWithoutProjection(env); + readFromLocal(env); + } + cout << "destory jvm\n\n"; + (jvm)->DestroyJavaVM(); + + cout << "\nfinish destory jvm"; + fprintf(stdout, "Java VM destory.\n"); + return 0; +} + diff --git a/store/sdk/pom.xml b/store/sdk/pom.xml index ea720a23aa7..24241130052 100644 --- a/store/sdk/pom.xml +++ b/store/sdk/pom.xml @@ -39,6 +39,11 @@ hadoop-aws ${hadoop.version} + + org.apache.httpcomponents + httpclient + ${httpclient.version} + diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java index be809e65845..8f2745700ab 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReader.java @@ -24,6 +24,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.annotations.InterfaceStability; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.util.CarbonTaskInfo; import org.apache.carbondata.core.util.ThreadLocalTaskInfo; @@ -90,6 +91,33 @@ public T readNextRow() throws IOException, InterruptedException { return currentReader.getCurrentValue(); } + /** + * Read and return next string row object + */ + public Object[] readNextStringRow() throws IOException, InterruptedException { + validateReader(); + T t = currentReader.getCurrentValue(); + Object[] objects = (Object[]) t; + String[] strings = new String[objects.length]; + for (int i = 0; i < objects.length; i++) { + if (objects[i] instanceof Object[]) { + Object[] arrayString = (Object[]) objects[i]; + StringBuffer stringBuffer = new StringBuffer(); + stringBuffer.append(String.valueOf(arrayString[0])); + if (arrayString.length > 1) { + for (int j = 1; j < arrayString.length; j++) { + stringBuffer.append(CarbonCommonConstants.ARRAY_SEPARATOR) + .append(String.valueOf(arrayString[j])); + } + } + strings[i] = stringBuffer.toString(); + } else { + strings[i] = String.valueOf(objects[i]); + } + } + return strings; + } + /** * Return a new {@link CarbonReaderBuilder} instance * diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java index af3480fe71a..151d57cf7bc 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java @@ -101,6 +101,22 @@ public CarbonReaderBuilder withHadoopConf(Configuration conf) { return this; } + /** + * configure hadoop configuration with key value + * + * @param key key word + * @param value value + * @return this object + */ + public CarbonReaderBuilder withHadoopConf(String key, String value) { + if (this.hadoopConf == null) { + this.hadoopConf = new Configuration(); + + } + this.hadoopConf.set(key, value); + return this; + } + /** * Build CarbonReader *