From e853036f4da432f11d9f19981768d4329661f37e Mon Sep 17 00:00:00 2001 From: xubo245 Date: Tue, 9 Oct 2018 15:16:09 +0800 Subject: [PATCH 1/2] [CARBONDATA-2996] CarbonSchemaReader support read schema from folder path 1.Deprecated readSchemaInIndexFile and readSchemaInDataFile, unify them to readSchema in SDK 2.delete readSchemaInIndexFile and readSchemaInDataFile, unify them to readSchema in CSDK 3.readSchema support read schema from folder path,carbonindex file, and carbondata file. and user can decide whether check all files schema --- docs/csdk-guide.md | 23 ++- docs/sdk-guide.md | 25 +++ .../examples/sdk/CarbonReaderExample.java | 2 +- store/CSDK/src/CarbonSchemaReader.cpp | 21 ++- store/CSDK/src/CarbonSchemaReader.h | 21 ++- store/CSDK/test/main.cpp | 46 +---- .../sdk/file/CarbonSchemaReader.java | 178 +++++++++++++++--- .../org/apache/carbondata/sdk/file/Field.java | 21 +++ .../apache/carbondata/sdk/file/Schema.java | 20 ++ .../carbondata/sdk/file/CarbonReaderTest.java | 14 +- .../sdk/file/CarbonSchemaReaderTest.java | 76 +++++++- 11 files changed, 342 insertions(+), 105 deletions(-) diff --git a/docs/csdk-guide.md b/docs/csdk-guide.md index b83b06d3fa3..95b9cf87735 100644 --- a/docs/csdk-guide.md +++ b/docs/csdk-guide.md @@ -219,23 +219,28 @@ release the memory and destroy JVM. ``` ``` /** - * read Schema from Data File + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema * - * @param path Data File path - * @return carbon schema object + * @param path file/folder path + * @return schema */ - jobject readSchemaInDataFile(char *path); + jobject readSchema(char *path); ``` ``` /** - * read Schema from index File + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema * - * @param path index File path - * @return carbon schema object + * @param path carbon data path + * @param validateSchema whether check all files schema + * @return schema */ - jobject readSchemaInIndexFile(char *path); - + jobject readSchema(char *path, bool validateSchema); ``` + ###Schema ``` /** diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index 8988dc33a77..6f5c58dabfa 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -727,6 +727,31 @@ Find example code at [CarbonReaderExample](https://github.com/apache/carbondata/ */ public static Schema readSchemaInIndexFile(String indexFilePath); ``` +``` + /** + * read schema from path, + * path can be folder path,carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path); +``` +``` + /** + * read schema from path, + * path can be folder path,carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema); +``` ``` /** diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java index a2f2eeecd70..c7300709a23 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java @@ -96,7 +96,7 @@ public boolean accept(File dir, String name) { throw new RuntimeException("Carbon index file not exists."); } Schema schema = CarbonSchemaReader - .readSchemaInIndexFile(dataFiles[0].getAbsolutePath()) + .readSchema(dataFiles[0].getAbsolutePath()) .asOriginOrder(); // Transform the schema String[] strings = new String[schema.getFields().length]; diff --git a/store/CSDK/src/CarbonSchemaReader.cpp b/store/CSDK/src/CarbonSchemaReader.cpp index 95eae7da3bd..c895f4bbd94 100644 --- a/store/CSDK/src/CarbonSchemaReader.cpp +++ b/store/CSDK/src/CarbonSchemaReader.cpp @@ -29,14 +29,14 @@ CarbonSchemaReader::CarbonSchemaReader(JNIEnv *env) { this->jniEnv = env; } -jobject CarbonSchemaReader::readSchemaInDataFile(char *path) { +jobject CarbonSchemaReader::readSchema(char *path) { if (path == NULL) { throw std::runtime_error("path parameter can't be NULL."); } - jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchemaInDataFile", - "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); + jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchema", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); if (methodID == NULL) { - throw std::runtime_error("Can't find the method in java: readSchemaInDataFile"); + throw std::runtime_error("Can't find the method in java: readSchema"); } jstring jPath = jniEnv->NewStringUTF(path); jvalue args[1]; @@ -48,21 +48,22 @@ jobject CarbonSchemaReader::readSchemaInDataFile(char *path) { return result; } -jobject CarbonSchemaReader::readSchemaInIndexFile(char *path) { +jobject CarbonSchemaReader::readSchema(char *path, bool validateSchema) { if (path == NULL) { throw std::runtime_error("path parameter can't be NULL."); } - jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchemaInIndexFile", - "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); + jmethodID methodID = jniEnv->GetStaticMethodID(carbonSchemaReaderClass, "readSchema", + "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/Schema;"); if (methodID == NULL) { - throw std::runtime_error("Can't find the method in java: readSchemaInDataFile"); + throw std::runtime_error("Can't find the method in java: readSchema"); } jstring jPath = jniEnv->NewStringUTF(path); - jvalue args[1]; + jvalue args[2]; args[0].l = jPath; + args[1].z = validateSchema; jobject result = jniEnv->CallStaticObjectMethodA(carbonSchemaReaderClass, methodID, args); if (jniEnv->ExceptionCheck()) { throw jniEnv->ExceptionOccurred(); } return result; -} \ No newline at end of file +} diff --git a/store/CSDK/src/CarbonSchemaReader.h b/store/CSDK/src/CarbonSchemaReader.h index 1b1299515df..5c1b324a2b6 100644 --- a/store/CSDK/src/CarbonSchemaReader.h +++ b/store/CSDK/src/CarbonSchemaReader.h @@ -40,19 +40,24 @@ class CarbonSchemaReader { CarbonSchemaReader(JNIEnv *env); /** - * read Schema from Data File + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema * - * @param path Data File path - * @return carbon schema object + * @param path file/folder path + * @return schema */ - jobject readSchemaInDataFile(char *path); + jobject readSchema(char *path); /** - * read Schema from index File + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema * - * @param path index File path - * @return carbon schema object + * @param path carbon data path + * @param validateSchema whether check all files schema + * @return schema */ - jobject readSchemaInIndexFile(char *path); + jobject readSchema(char *path, bool validateSchema); }; \ No newline at end of file diff --git a/store/CSDK/test/main.cpp b/store/CSDK/test/main.cpp index 5d875e5fc30..44a7c69a23e 100644 --- a/store/CSDK/test/main.cpp +++ b/store/CSDK/test/main.cpp @@ -126,17 +126,21 @@ void printResult(JNIEnv *env, CarbonReader reader) { } /** - * test read Schema from Index File + * test read Schema from path * * @param env jni env * @return whether it is success */ -bool readSchemaInIndexFile(JNIEnv *env, char *indexFilePath) { +bool readSchema(JNIEnv *env, char *Path, bool validateSchema) { printf("\nread Schema from Index File:\n"); CarbonSchemaReader carbonSchemaReader(env); jobject schema; try { - schema = carbonSchemaReader.readSchemaInIndexFile(indexFilePath); + if (validateSchema) { + schema = carbonSchemaReader.readSchema(Path, validateSchema); + } else { + schema = carbonSchemaReader.readSchema(Path); + } Schema carbonSchema(env, schema); int length = carbonSchema.getFieldsLength(); printf("schema length is:%d\n", length); @@ -148,42 +152,12 @@ bool readSchemaInIndexFile(JNIEnv *env, char *indexFilePath) { printf("Array Element Type Name is:%s\n", carbonSchema.getArrayElementTypeName(i)); } } - } catch (jthrowable e) { env->ExceptionDescribe(); } return true; } -/** - * test read Schema from Data File - * - * @param env jni env - * @return whether it is success - */ -bool readSchemaInDataFile(JNIEnv *env, char *dataFilePath) { - printf("\nread Schema from Data File:\n"); - CarbonSchemaReader carbonSchemaReader(env); - jobject schema; - try { - schema = carbonSchemaReader.readSchemaInDataFile(dataFilePath); - } catch (jthrowable e) { - env->ExceptionDescribe(); - } - Schema carbonSchema(env, schema); - int length = carbonSchema.getFieldsLength(); - printf("schema length is:%d\n", length); - for (int i = 0; i < length; i++) { - printf("%d\t", i); - printf("%s\t", carbonSchema.getFieldName(i)); - printf("%s\n", carbonSchema.getFieldDataTypeName(i)); - if (strcmp(carbonSchema.getFieldDataTypeName(i), "ARRAY") == 0) { - printf("Array Element Type Name is:%s\n", carbonSchema.getArrayElementTypeName(i)); - } - } - return true; -} - /** * test read data from local disk, without projection * @@ -647,12 +621,8 @@ int main(int argc, char *argv[]) { testReadNextBatchRow(env, S3Path, 100000, 100000, argv, 4, true); } else { tryCatchException(env); - char *indexFilePath = argv[1]; - char *dataFilePath = argv[2]; testCarbonProperties(env); testWriteData(env, "./data", 1, argv); - readSchemaInIndexFile(env, indexFilePath); - readSchemaInDataFile(env, dataFilePath); testWriteData(env, "./data", 1, argv); readFromLocalWithoutProjection(env, smallFilePath); readFromLocalWithProjection(env, smallFilePath); @@ -662,6 +632,8 @@ int main(int argc, char *argv[]) { testReadNextRow(env, path, printNum, argv, 0, false); testReadNextBatchRow(env, path, batch, printNum, argv, 0, true); testReadNextBatchRow(env, path, batch, printNum, argv, 0, false); + readSchema(env, path, false); + readSchema(env, path, true); } (jvm)->DestroyJavaVM(); diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java index fe8e16891e6..fbdd2318371 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java @@ -26,6 +26,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.FileReader; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.converter.SchemaConverter; import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl; @@ -34,10 +35,12 @@ import org.apache.carbondata.core.reader.CarbonHeaderReader; import org.apache.carbondata.core.reader.CarbonIndexFileReader; import org.apache.carbondata.core.util.CarbonUtil; -import org.apache.carbondata.core.util.path.CarbonTablePath; import org.apache.carbondata.format.FileFooter3; +import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException; import static org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchemaToWrapperColumnSchema; +import static org.apache.carbondata.core.util.path.CarbonTablePath.CARBON_DATA_EXT; +import static org.apache.carbondata.core.util.path.CarbonTablePath.INDEX_FILE_EXT; /** * Schema reader for carbon files, including carbondata file, carbonindex file, and schema file @@ -51,6 +54,7 @@ public class CarbonSchemaReader { * @return schema object * @throws IOException */ + @Deprecated public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOException { org.apache.carbondata.format.TableInfo tableInfo = CarbonUtil.readSchemaFile(schemaFilePath); SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl(); @@ -61,14 +65,122 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce return new Schema(schemaList); } + /** + * get carbondata/carbonindex file in path + * + * @param path carbon file path + * @return CarbonFile array + */ + private static CarbonFile[] getCarbonFile(String path, final String extension) + throws IOException { + String dataFilePath = path; + if (!(dataFilePath.contains(extension))) { + CarbonFile[] carbonFiles = FileFactory + .getCarbonFile(path) + .listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + if (file == null) { + return false; + } + return file.getName().endsWith(extension); + } + }); + if (carbonFiles == null || carbonFiles.length < 1) { + throw new IOException("Carbon file not exists."); + } + return carbonFiles; + } + return null; + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and will not check all files schema + * + * @param path file/folder path + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path) throws IOException { + return readSchema(path, false); + } + + /** + * read schema from path, + * path can be folder path, carbonindex file path, and carbondata file path + * and user can decide whether check all files schema + * + * @param path file/folder path + * @param validateSchema whether check all files schema + * @return schema + * @throws IOException + */ + public static Schema readSchema(String path, boolean validateSchema) throws IOException { + if (path.endsWith(INDEX_FILE_EXT)) { + return readSchemaFromIndexFile(path); + } else if (path.endsWith(CARBON_DATA_EXT)) { + return readSchemaFromDataFile(path); + } else if (validateSchema) { + CarbonFile[] carbonIndexFiles = getCarbonFile(path, INDEX_FILE_EXT); + Schema schema; + if (carbonIndexFiles != null && carbonIndexFiles.length != 0) { + schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); + for (int i = 1; i < carbonIndexFiles.length; i++) { + Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); + if (schema != schema2) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + CarbonFile[] carbonDataFiles = getCarbonFile(path, CARBON_DATA_EXT); + for (int i = 0; i < carbonDataFiles.length; i++) { + Schema schema2 = readSchemaFromDataFile(carbonDataFiles[i].getAbsolutePath()); + if (!schema.equals(schema2)) { + throw new CarbonDataLoadingException("Schema is different between different files."); + } + } + return schema; + } else { + throw new CarbonDataLoadingException("No carbonindex file in this path."); + } + } else { + String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); + if (indexFilePath != null) { + return readSchemaFromIndexFile(indexFilePath); + } else { + String dataFilePath = getCarbonFile(path, CARBON_DATA_EXT)[0].getAbsolutePath(); + if (dataFilePath == null) { + throw new CarbonDataLoadingException("No carbonindex and carbondata file in the path"); + } else { + return readSchemaFromDataFile(dataFilePath); + } + } + } + } + /** * Read carbondata file and return the schema + * This interface will be removed, + * please use readSchema instead of this interface * - * @param dataFilePath complete path including carbondata file name + * @param dataFilePath carbondata file store path * @return Schema object * @throws IOException */ + @Deprecated public static Schema readSchemaInDataFile(String dataFilePath) throws IOException { + return readSchema(dataFilePath, false); + } + + /** + * Read schema from carbondata file + * + * @param dataFilePath carbondata file path + * @return carbon data schema + * @throws IOException + */ + public static Schema readSchemaFromDataFile(String dataFilePath) throws IOException { CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath); List columnSchemaList = new ArrayList(); List schemaList = reader.readSchema(); @@ -82,40 +194,30 @@ public static Schema readSchemaInDataFile(String dataFilePath) throws IOExceptio } /** - * This method return the version details in formatted string by reading from carbondata file - * @param dataFilePath - * @return + * Read carbonindex file and return the schema + * This interface will be removed, + * please use readSchema instead of this interface + * + * @param indexFilePath complete path including index file name + * @return schema object * @throws IOException */ - public static String getVersionDetails(String dataFilePath) throws IOException { - long fileSize = - FileFactory.getCarbonFile(dataFilePath, FileFactory.getFileType(dataFilePath)).getSize(); - FileReader fileReader = FileFactory.getFileHolder(FileFactory.getFileType(dataFilePath)); - ByteBuffer buffer = - fileReader.readByteBuffer(FileFactory.getUpdatedFilePath(dataFilePath), fileSize - 8, 8); - fileReader.finish(); - CarbonFooterReaderV3 footerReader = new CarbonFooterReaderV3(dataFilePath, buffer.getLong()); - FileFooter3 footer = footerReader.readFooterVersion3(); - if (null != footer.getExtra_info()) { - return footer.getExtra_info().get(CarbonCommonConstants.CARBON_WRITTEN_BY_FOOTER_INFO) - + " in version: " + footer.getExtra_info() - .get(CarbonCommonConstants.CARBON_WRITTEN_VERSION); - } else { - return "Version Details are not found in carbondata file"; - } + @Deprecated + public static Schema readSchemaInIndexFile(String indexFilePath) throws IOException { + return readSchema(indexFilePath, false); } /** - * Read carbonindex file and return the schema + * Read schema from carbonindex file * - * @param indexFilePath complete path including index file name - * @return schema object + * @param indexFilePath carbonindex file path + * @return carbon data Schema * @throws IOException */ - public static Schema readSchemaInIndexFile(String indexFilePath) throws IOException { + private static Schema readSchemaFromIndexFile(String indexFilePath) throws IOException { CarbonFile indexFile = FileFactory.getCarbonFile(indexFilePath, FileFactory.getFileType(indexFilePath)); - if (!indexFile.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT)) { + if (!indexFile.getName().endsWith(INDEX_FILE_EXT)) { throw new IOException("Not an index file name"); } // read schema from the first index file @@ -144,4 +246,28 @@ public static Schema readSchemaInIndexFile(String indexFilePath) throws IOExcept } } + /** + * This method return the version details in formatted string by reading from carbondata file + * + * @param dataFilePath + * @return + * @throws IOException + */ + public static String getVersionDetails(String dataFilePath) throws IOException { + long fileSize = + FileFactory.getCarbonFile(dataFilePath, FileFactory.getFileType(dataFilePath)).getSize(); + FileReader fileReader = FileFactory.getFileHolder(FileFactory.getFileType(dataFilePath)); + ByteBuffer buffer = + fileReader.readByteBuffer(FileFactory.getUpdatedFilePath(dataFilePath), fileSize - 8, 8); + fileReader.finish(); + CarbonFooterReaderV3 footerReader = new CarbonFooterReaderV3(dataFilePath, buffer.getLong()); + FileFooter3 footer = footerReader.readFooterVersion3(); + if (null != footer.getExtra_info()) { + return footer.getExtra_info().get(CarbonCommonConstants.CARBON_WRITTEN_BY_FOOTER_INFO) + + " in version: " + footer.getExtra_info() + .get(CarbonCommonConstants.CARBON_WRITTEN_VERSION); + } else { + return "Version Details are not found in carbondata file"; + } + } } diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java index 065ff80de7e..fb9a6d860a8 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Field.java @@ -291,4 +291,25 @@ private StructField prepareSubFields(String fieldName, DataType dataType) { return new StructField(fieldName, dataType); } } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof Field) { + Field field = (Field) obj; + if ((!this.getDataType().equals(field.getDataType())) + || (!this.getFieldName().equals(field.getFieldName())) + || (!(this.getSchemaOrdinal() == (field.getSchemaOrdinal()))) + ) { + return false; + } + } else { + return false; + } + return true; + } } diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java index 05d3d9e3640..fb237252262 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/Schema.java @@ -151,4 +151,24 @@ public int compare(Field o1, Field o2) { }); return this; } + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof Schema) { + Schema schema = (Schema) obj; + for (int i = 0; i < this.fields.length; i++) { + if (!(schema.fields)[i].equals((this.fields)[i])) { + return false; + } + } + } else { + return false; + } + return true; + } } diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index d79a1ad8134..c176f06d4ac 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -777,7 +777,7 @@ public void testReadSchemaFromDataFile() throws IOException { }); Assert.assertTrue(dataFiles != null); Assert.assertTrue(dataFiles.length > 0); - Schema schema = CarbonSchemaReader.readSchemaInDataFile(dataFiles[0].getAbsolutePath()); + Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()); Assert.assertTrue(schema.getFields().length == 2); Assert.assertEquals("name", (schema.getFields())[0].getFieldName()); Assert.assertEquals("age", (schema.getFields())[1].getFieldName()); @@ -1021,7 +1021,7 @@ public void testReadSchemaInDataFileAndSort() throws IOException, InterruptedExc } }); - Schema schema = CarbonSchemaReader.readSchemaInDataFile(dataFiles2[0].getAbsolutePath()); + Schema schema = CarbonSchemaReader.readSchema(dataFiles2[0].getAbsolutePath()); // sort the schema Arrays.sort(schema.getFields(), new Comparator() { @@ -1140,7 +1140,7 @@ public void testReadUserSchema() throws IOException, InterruptedException { } }); - Schema schema = CarbonSchemaReader.readSchemaInIndexFile(dataFiles2[0].getAbsolutePath()).asOriginOrder(); + Schema schema = CarbonSchemaReader.readSchema(dataFiles2[0].getAbsolutePath()).asOriginOrder(); // Transform the schema String[] strings = new String[schema.getFields().length]; for (int i = 0; i < schema.getFields().length; i++) { @@ -1355,7 +1355,7 @@ public boolean accept(File dir, String name) { } }); - Schema schema = CarbonSchemaReader.readSchemaInIndexFile(dataFiles2[0].getAbsolutePath()).asOriginOrder(); + Schema schema = CarbonSchemaReader.readSchema(dataFiles2[0].getAbsolutePath()).asOriginOrder(); for (int i = 0; i < schema.getFields().length; i++) { System.out.println((schema.getFields())[i].getFieldName() + "\t" + schema.getFields()[i].getSchemaOrdinal()); @@ -1518,10 +1518,10 @@ public boolean accept(File dir, String name) { } }); if (dataFiles == null || dataFiles.length < 1) { - throw new RuntimeException("Carbon index file not exists."); + throw new RuntimeException("Carbon data file not exists."); } Schema schema = CarbonSchemaReader - .readSchemaInDataFile(dataFiles[0].getAbsolutePath()) + .readSchema(dataFiles[0].getAbsolutePath()) .asOriginOrder(); // Transform the schema String[] strings = new String[schema.getFields().length]; @@ -1613,7 +1613,7 @@ public boolean accept(File dir, String name) { throw new RuntimeException("Carbon index file not exists."); } Schema schema = CarbonSchemaReader - .readSchemaInIndexFile(dataFiles[0].getAbsolutePath()) + .readSchema(dataFiles[0].getAbsolutePath()) .asOriginOrder(); // Transform the schema int count = 0; diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java index db41fa08714..0031a45e75a 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java @@ -22,17 +22,20 @@ import java.util.HashMap; import java.util.Map; -import junit.framework.TestCase; import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.commons.io.FileUtils; -import org.junit.*; -public class CarbonSchemaReaderTest extends TestCase { +import junit.framework.TestCase; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +public class CarbonSchemaReaderTest extends TestCase { String path = "./testWriteFiles"; @Before @@ -101,18 +104,32 @@ public boolean accept(CarbonFile file) { String dataFilePath = carbonFiles[0].getAbsolutePath(); Schema schema = CarbonSchemaReader - .readSchemaInDataFile(dataFilePath) + .readSchema(dataFilePath) .asOriginOrder(); assertEquals(schema.getFieldsLength(), 12); checkSchema(schema); + } catch (Throwable e) { + Assert.fail(); + e.printStackTrace(); + } + } + @Test + public void testReadSchemaWithoutSchemaFilesSchema() { + try { + Schema schema = CarbonSchemaReader + .readSchema(path) + .asOriginOrder(); + checkSchema(schema); } catch (Throwable e) { + Assert.fail(); e.printStackTrace(); } } public boolean checkSchema(Schema schema) { + assertEquals(schema.getFields().length, 12); assert (schema.getFieldName(0).equalsIgnoreCase("stringField")); assert (schema.getFieldName(1).equalsIgnoreCase("shortField")); assert (schema.getFieldName(2).equalsIgnoreCase("intField")); @@ -159,16 +176,62 @@ public boolean accept(CarbonFile file) { if (carbonFiles == null || carbonFiles.length < 1) { throw new RuntimeException("Carbon index file not exists."); } - String dataFilePath = carbonFiles[0].getAbsolutePath(); + String indexFilePath = carbonFiles[0].getAbsolutePath(); Schema schema = CarbonSchemaReader - .readSchemaInDataFile(dataFilePath) + .readSchema(indexFilePath) .asOriginOrder(); assertEquals(schema.getFieldsLength(), 12); checkSchema(schema); + } catch (Throwable e) { + Assert.fail(); + e.printStackTrace(); + } + } + + @Test + public void testReadSchemaAndCheckFilesSchema() { + try { + Schema schema = CarbonSchemaReader + .readSchema(path, false) + .asOriginOrder(); + checkSchema(schema); + } catch (Throwable e) { + Assert.fail(); + e.printStackTrace(); + } + } + @Test + public void testReadSchemaWithDifferentSchema() { + try { + int num = 10; + Field[] fields = new Field[2]; + fields[0] = new Field("name", DataTypes.STRING); + fields[1] = new Field("age", DataTypes.INT); + CarbonWriter writer = CarbonWriter + .builder() + .outputPath(path) + .withCsvInput(new Schema(fields)) + .writtenBy("testReadSchemaWithDifferentSchema") + .build(); + + for (int i = 0; i < num; i++) { + writer.write(new String[]{"robot" + (i % 10), String.valueOf(i)}); + } + writer.close(); + try { + CarbonSchemaReader + .readSchema(path, true) + .asOriginOrder(); + Assert.fail(); + } catch (Exception e) { + Assert.assertTrue(e.getMessage() + .equalsIgnoreCase("Schema is different between different files.")); + } } catch (Throwable e) { + Assert.fail(); e.printStackTrace(); } } @@ -177,5 +240,4 @@ public boolean accept(CarbonFile file) { public void tearDown() throws IOException { FileUtils.deleteDirectory(new File(path)); } - } From 85996c691cc75399cd177fa76d5f8107c35fb657 Mon Sep 17 00:00:00 2001 From: xubo245 Date: Tue, 6 Nov 2018 20:45:49 +0800 Subject: [PATCH 2/2] optimize --- .../sdk/file/CarbonSchemaReader.java | 19 ++++++------------- .../sdk/file/CarbonSchemaReaderTest.java | 12 ++++++------ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java index fbdd2318371..aadd13a83f5 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java @@ -74,7 +74,7 @@ public static Schema readSchemaInSchemaFile(String schemaFilePath) throws IOExce private static CarbonFile[] getCarbonFile(String path, final String extension) throws IOException { String dataFilePath = path; - if (!(dataFilePath.contains(extension))) { + if (!(dataFilePath.endsWith(extension))) { CarbonFile[] carbonFiles = FileFactory .getCarbonFile(path) .listFiles(new CarbonFileFilter() { @@ -90,8 +90,10 @@ public boolean accept(CarbonFile file) { throw new IOException("Carbon file not exists."); } return carbonFiles; + } else { + throw new CarbonDataLoadingException("Please ensure path " + + path + " end with " + extension); } - return null; } /** @@ -129,7 +131,7 @@ public static Schema readSchema(String path, boolean validateSchema) throws IOEx schema = readSchemaFromIndexFile(carbonIndexFiles[0].getAbsolutePath()); for (int i = 1; i < carbonIndexFiles.length; i++) { Schema schema2 = readSchemaFromIndexFile(carbonIndexFiles[i].getAbsolutePath()); - if (schema != schema2) { + if (!schema.equals(schema2)) { throw new CarbonDataLoadingException("Schema is different between different files."); } } @@ -146,16 +148,7 @@ public static Schema readSchema(String path, boolean validateSchema) throws IOEx } } else { String indexFilePath = getCarbonFile(path, INDEX_FILE_EXT)[0].getAbsolutePath(); - if (indexFilePath != null) { - return readSchemaFromIndexFile(indexFilePath); - } else { - String dataFilePath = getCarbonFile(path, CARBON_DATA_EXT)[0].getAbsolutePath(); - if (dataFilePath == null) { - throw new CarbonDataLoadingException("No carbonindex and carbondata file in the path"); - } else { - return readSchemaFromDataFile(dataFilePath); - } - } + return readSchemaFromIndexFile(indexFilePath); } } diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java index 0031a45e75a..1e70ada17e6 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonSchemaReaderTest.java @@ -110,8 +110,8 @@ public boolean accept(CarbonFile file) { assertEquals(schema.getFieldsLength(), 12); checkSchema(schema); } catch (Throwable e) { - Assert.fail(); e.printStackTrace(); + Assert.fail(); } } @@ -123,8 +123,8 @@ public void testReadSchemaWithoutSchemaFilesSchema() { .asOriginOrder(); checkSchema(schema); } catch (Throwable e) { - Assert.fail(); e.printStackTrace(); + Assert.fail(); } } @@ -185,8 +185,8 @@ public boolean accept(CarbonFile file) { assertEquals(schema.getFieldsLength(), 12); checkSchema(schema); } catch (Throwable e) { - Assert.fail(); e.printStackTrace(); + Assert.fail(); } } @@ -194,12 +194,12 @@ public boolean accept(CarbonFile file) { public void testReadSchemaAndCheckFilesSchema() { try { Schema schema = CarbonSchemaReader - .readSchema(path, false) + .readSchema(path, true) .asOriginOrder(); checkSchema(schema); } catch (Throwable e) { - Assert.fail(); e.printStackTrace(); + Assert.fail(); } } @@ -231,8 +231,8 @@ public void testReadSchemaWithDifferentSchema() { .equalsIgnoreCase("Schema is different between different files.")); } } catch (Throwable e) { - Assert.fail(); e.printStackTrace(); + Assert.fail(); } }