Skip to content

Commit

Permalink
Add support for Hadoop paths (#5)
Browse files Browse the repository at this point in the history
Add support for Hadoop paths

Trying to load an Excel file from a Hadoop path (such as hdfs://localhost/users/shoffing/test_excel.xlsx) wasn't working. This change adds support for loading excel files from Hadoop and hdfs.
  • Loading branch information
shoffing authored and nightscape committed Mar 23, 2017
1 parent d742180 commit c7cc437
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions src/main/scala/com/crealytics/spark/excel/ExcelRelation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.util.Try

Expand All @@ -25,7 +26,9 @@ case class ExcelRelation(
)
(@transient val sqlContext: SQLContext)
extends BaseRelation with TableScan with PrunedScan {
val workbook = WorkbookFactory.create(new FileInputStream(location))
val path = new Path(location)
val inputStream = FileSystem.get(path.toUri, sqlContext.sparkContext.hadoopConfiguration).open(path)
val workbook = WorkbookFactory.create(inputStream)
val sheet = findSheet(workbook, sheetName)
val headers = sheet.getRow(0).cellIterator().asScala.to[Vector]
override val schema: StructType = inferSchema
Expand All @@ -34,7 +37,7 @@ extends BaseRelation with TableScan with PrunedScan {
private def findSheet(workBook: Workbook, sheetName: Option[String]): Sheet = {
sheetName.map { sn =>
Option(workBook.getSheet(sn)).getOrElse(
throw new IllegalArgumentException(s"Unknow sheet $sn")
throw new IllegalArgumentException(s"Unknown sheet $sn")
)
}.getOrElse(workBook.sheetIterator.next)
}
Expand Down

0 comments on commit c7cc437

Please sign in to comment.