diff --git a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapeInputFormat.java b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapeInputFormat.java index 51c340b249..b688e928fc 100644 --- a/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapeInputFormat.java +++ b/core/src/main/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapeInputFormat.java @@ -38,6 +38,19 @@ public class ShapeInputFormat extends CombineFileInputFormat { + /** + * suffix of attribute file + */ + private final static String DBF_SUFFIX = "dbf"; + /** + * suffix of shape record file + */ + private final static String SHP_SUFFIX = "shp"; + /** + * suffix of index file + */ + private final static String SHX_SUFFIX = "shx"; + public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { @@ -86,10 +99,13 @@ public List getSplits(JobContext job) for (Path filePath : filePathSizePair.keySet()) { String filename = FilenameUtils.removeExtension(filePath.getName()).toLowerCase(); + String suffix = FilenameUtils.getExtension(filePath.getName()).toLowerCase(); + fileSplitPathParts.add(filePath); fileSplitSizeParts.add(filePathSizePair.get(filePath)); - if (prevfilename != "" && !prevfilename.equals(filename)) { + if (prevfilename != "" && !prevfilename.equals(filename) + && (suffix.equals(SHX_SUFFIX) || suffix.equals(DBF_SUFFIX) || suffix.equals(SHP_SUFFIX))) { // compare file name and if it is different then all same filename is into CombileFileSplit splits.add(new CombineFileSplit(fileSplitPathParts.toArray(new Path[0]), Longs.toArray(fileSplitSizeParts))); fileSplitPathParts.clear(); diff --git a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java index 41de865994..6663e689aa 100644 --- a/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java +++ b/core/src/test/java/org/apache/sedona/core/formatMapper/shapefileParser/shapes/ShapefileReaderTest.java @@ -124,7 +124,7 @@ public void testShapefileEndWithUndefinedType() } /** - * Test correctness of parsing shapefile + * Test correctness of parsing shapefile. There are additional unnecessary files in directory * * @throws IOException */ diff --git a/core/src/test/resources/shapefiles/polygon/map.xml b/core/src/test/resources/shapefiles/polygon/map.xml new file mode 100644 index 0000000000..d2604b2b6a --- /dev/null +++ b/core/src/test/resources/shapefiles/polygon/map.xml @@ -0,0 +1,5 @@ + + testReadToGeometryRDD + map + additional file + \ No newline at end of file diff --git a/core/src/test/resources/shapefiles/polygon/map1.xml b/core/src/test/resources/shapefiles/polygon/map1.xml new file mode 100644 index 0000000000..dacd217187 --- /dev/null +++ b/core/src/test/resources/shapefiles/polygon/map1.xml @@ -0,0 +1,5 @@ + + testReadToGeometryRDD + map1 + additional file + \ No newline at end of file