From 472216e5f3142cb9aa0f101b3af4329855aee57a Mon Sep 17 00:00:00 2001 From: liuxian Date: Tue, 23 Oct 2018 14:43:51 +0800 Subject: [PATCH] fix --- .../datasources/parquet/ParquetFileFormat.scala | 13 +++++-------- .../apache/spark/sql/hive/orc/OrcFileFormat.scala | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index ea4f1592a7c2e..f04502d113acb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -352,17 +352,14 @@ class ParquetFileFormat (file: PartitionedFile) => { assert(file.partitionValues.numFields == partitionSchema.size) - val fileSplit = - new FileSplit(new Path(new URI(file.filePath)), file.start, file.length, Array.empty) - val filePath = fileSplit.getPath - + val filePath = new Path(new URI(file.filePath)) val split = new org.apache.parquet.hadoop.ParquetInputSplit( filePath, - fileSplit.getStart, - fileSplit.getStart + fileSplit.getLength, - fileSplit.getLength, - fileSplit.getLocations, + file.start, + file.start + file.length, + file.length, + Array.empty, null) val sharedConf = broadcastedHadoopConf.value.value diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index de8085f07db19..89e6ea8604974 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -154,13 +154,12 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable val job = Job.getInstance(conf) FileInputFormat.setInputPaths(job, file.filePath) - val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty) // Custom OrcRecordReader is used to get // ObjectInspector during recordReader creation itself and can // avoid NameNode call in unwrapOrcStructs per file. // Specifically would be helpful for partitioned datasets. val orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf)) - new SparkOrcNewRecordReader(orcReader, conf, fileSplit.getStart, fileSplit.getLength) + new SparkOrcNewRecordReader(orcReader, conf, file.start, file.length) } val recordsIterator = new RecordReaderIterator[OrcStruct](orcRecordReader)