Skip to content

Commit

Permalink
remove support for arbitrary nested arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
cloud-fan committed Sep 10, 2014
1 parent ee8a724 commit e1a8898
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,74 +71,32 @@ case class GetItem(child: Expression, ordinal: Expression) extends Expression {
}

/**
* Returns the value of fields in the `child`.
* The type of `child` can be struct, or array of struct,
* or array of array of struct, or array of array ... of struct.
* Returns the value of fields in the Struct `child`.
*/
case class GetField(child: Expression, fieldName: String) extends UnaryExpression {
type EvaluatedType = Any

lazy val dataType = {
structType
buildDataType(field.dataType)
}

def dataType = field.dataType
override def nullable = child.nullable || field.nullable
override def foldable = child.foldable

private var _buildDataType = identity[DataType] _
private lazy val buildDataType = {
structType
_buildDataType
}

private var _nestedArrayCount = 0
private lazy val nestedArrayCount = {
structType
_nestedArrayCount
}

private def getStructType(t: DataType): StructType = t match {
case ArrayType(elementType, containsNull) =>
_buildDataType = {(t: DataType) => ArrayType(t, containsNull)} andThen _buildDataType
_nestedArrayCount += 1
getStructType(elementType)
protected def structType = child.dataType match {
case s: StructType => s
case otherType => sys.error(s"GetField is not valid on fields of type $otherType")
}

protected lazy val structType: StructType = {
child match {
case n: GetField =>
this._buildDataType = n._buildDataType
this._nestedArrayCount = n._nestedArrayCount
getStructType(n.field.dataType)
case _ => getStructType(child.dataType)
}
}

lazy val field =
structType.fields
.find(_.name == fieldName)
.getOrElse(sys.error(s"No such field $fieldName in ${child.dataType}"))

lazy val ordinal = structType.fields.indexOf(field)

override lazy val resolved = childrenResolved
override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[StructType]

override def eval(input: Row): Any = {
val baseValue = child.eval(input)
evaluateValue(baseValue, nestedArrayCount)
}

private def evaluateValue(v: Any, count: Int): Any = {
if (v == null) {
null
} else if (count > 0) {
v.asInstanceOf[Seq[_]].map(r => evaluateValue(r, count - 1))
} else {
v.asInstanceOf[Row](ordinal)
}
val baseValue = child.eval(input).asInstanceOf[Row]
if (baseValue == null) null else baseValue(ordinal)
}

override def toString = s"$child.$fieldName"
Expand Down
28 changes: 0 additions & 28 deletions sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -594,33 +594,5 @@ class JsonSuite extends QueryTest {
sql("select complexArrayOfStruct[0].field1[1].inner2[0], complexArrayOfStruct[1].field2[0][1] from jsonTable"),
("str2", 6) :: Nil
)

checkAnswer(
sql("select arrayOfStruct.field1, arrayOfStruct.field2 from jsonTable"),
(Seq(true, false, null), Seq("str1", null, null)) :: Nil
)

checkAnswer(
sql("select complexNestedArray.field, complexNestedArray.field.innerField from jsonTable"),
(
Seq(
Seq(
Seq("str1", null),
Seq("str2", null)
),
Seq(
Seq("str3", null),
Seq(null, "str4")
),
null
),

Seq(
Seq("str1", "str2"),
Seq("str3", null),
null
)
) :: Nil
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,31 +106,6 @@ object TestJsonData {
"inner1": "str4"
}],
"field2": [[5, 6], [7, 8]]
}],
"complexNestedArray": [
{
"field": [
{
"innerField": "str1"
},
{
"innerField": "str2"
}
]
},
{
"field": [
{
"innerField": "str3"
},
{
"otherInner": "str4"
}
]
},
{
"otherField": "str5"
}
]
}]
}""" :: Nil)
}

0 comments on commit e1a8898

Please sign in to comment.