Skip to content

Commit

Permalink
[SPARK-21285][ML] VectorAssembler reports the column name of unsuppor…
Browse files Browse the repository at this point in the history
…ted data type

## What changes were proposed in this pull request?
add the column name in the exception which is raised by unsupported data type.

## How was this patch tested?
+ [x] pass all tests.

Author: Yan Facai (颜发才) <facai.yan@gmail.com>

Closes #18523 from facaiy/ENH/vectorassembler_add_col.
  • Loading branch information
facaiy authored and yanboliang committed Jul 7, 2017
1 parent 7fcbb9b commit 56536e9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
override def transformSchema(schema: StructType): StructType = {
val inputColNames = $(inputCols)
val outputColName = $(outputCol)
val inputDataTypes = inputColNames.map(name => schema(name).dataType)
inputDataTypes.foreach {
case _: NumericType | BooleanType =>
case t if t.isInstanceOf[VectorUDT] =>
case other =>
throw new IllegalArgumentException(s"Data type $other is not supported.")
val incorrectColumns = inputColNames.flatMap { name =>
schema(name).dataType match {
case _: NumericType | BooleanType => None
case t if t.isInstanceOf[VectorUDT] => None
case other => Some(s"Data type $other of column $name is not supported.")
}
}
if (incorrectColumns.nonEmpty) {
throw new IllegalArgumentException(incorrectColumns.mkString("\n"))
}
if (schema.fieldNames.contains(outputColName)) {
throw new IllegalArgumentException(s"Output column $outputColName already exists.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ class VectorAssemblerSuite
val thrown = intercept[IllegalArgumentException] {
assembler.transform(df)
}
assert(thrown.getMessage contains "Data type StringType is not supported")
assert(thrown.getMessage contains
"Data type StringType of column a is not supported.\n" +
"Data type StringType of column b is not supported.\n" +
"Data type StringType of column c is not supported.")
}

test("ML attributes") {
Expand Down

0 comments on commit 56536e9

Please sign in to comment.