Skip to content

Commit

Permalink
Support high-dimensional tensor in derivations
Browse files Browse the repository at this point in the history
  • Loading branch information
jaymo001 committed May 17, 2023
1 parent 53780f8 commit 9738ae6
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ private[offline] object FDSConversionUtils {
// 1D sparse tensor
case targetType: StructType if targetType.fields.size == 2 =>
convertRawValueTo1DFDSSparseTensorRow(rawFeatureValue, targetType)
// 1D dense tensor
case targetType: ArrayType if !targetType.elementType.isInstanceOf[ArrayType] =>
// dense tensor
case targetType: ArrayType =>
convertRawValueTo1DFDSDenseTensorRow(rawFeatureValue, targetType)
case otherType =>
throw new FeathrException(ErrorLabel.FEATHR_ERROR, s"Converting ${rawFeatureValue} to FDS Tensor type " +
Expand Down Expand Up @@ -279,6 +279,8 @@ private[offline] object FDSConversionUtils {
case _: FloatType =>
// If it's FloatType, then we know it's autoTz rules.
convertRawValueTo1DFDSDenseTensorRowAutoTz(rawFeatureValue)
case _: ArrayType =>
rawFeatureValue.asInstanceOf[Array[_]]
case _ =>
convertRawValueTo1DFDSDenseTensorRowTz(rawFeatureValue)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,94 @@ class AnchoredFeaturesIntegTest extends FeathrIntegTest {
setFeathrJobParam(ADD_DEFAULT_COL_FOR_MISSING_DATA, "false")
}

/*
* Test features with fdsExtract.
*/
@Test
def testFeaturesWithFdsExtract: Unit = {
val df = runLocalFeatureJoinForTest(
joinConfigAsString =
"""
| features: {
| key: a_id
| featureList: ["featureWithNullDerived"]
| }
""".stripMargin,
featureDefAsString =
"""
| anchors: {
| anchor1: {
| source: "anchorAndDerivations/nullValueSource.avro.json"
| key.sqlExpr: mId
| features: {
| featureWithNull {
| def.sqlExpr: "FDSExtract(denseValue)"
| type:{
| type: TENSOR
| tensorCategory: DENSE
| shape: [2,5]
| dimensionType: [INT, INT]
| valType: STRING
| }
| }
| }
| }
|}
|derivations: {
|featureWithNullDerived:{
| key: ["id"]
| inputs:
| {
| fv: {key: ["id"], feature: featureWithNull}
| }
| definition.sqlExpr: "coalesce(fv, ARRAY(ARRAY(\"aa\", \"bb\", \"cc\", \"dd\", \"ee\"), ARRAY(\"UNK\", \"UNK\", \"UNK\", \"UNK\", \"UNK\")))"
| type:
| {
| type: TENSOR
| tensorCategory: DENSE
| shape: [2,5]
| dimensionType: [INT, INT]
| valType: STRING
| }
|}
|}
""".stripMargin,
observationDataPath = "anchorAndDerivations/testMVELLoopExpFeature-observations.csv")

val selectedColumns = Seq("a_id", "featureWithNullDerived")
val filteredDf = df.data.select(selectedColumns.head, selectedColumns.tail: _*)

val expectedDf = ss.createDataFrame(
ss.sparkContext.parallelize(
Seq(
Row(
// a_id
"1",
// featureWithNull
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a"))),
),
Row(
// a_id
"2",
// f3eatureWithNull
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("UNK", "UNK", "UNK", "UNK", "UNK")))
),
Row(
// a_id
"3",
// featureWithNull
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a")),
)))),
StructType(
List(
StructField("a_id", StringType, true),
StructField("featureWithNull", ArrayType(ArrayType(StringType, true), true), true)
)))

def cmpFunc(row: Row): String = row.get(0).toString

FeathrTestUtils.assertDataFrameApproximatelyEquals(filteredDf, expectedDf, cmpFunc)
}

/*
* Test features with null values.
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
version=1.0.4-rc5
version=1.0.4-rc6
SONATYPE_AUTOMATIC_RELEASE=true
POM_ARTIFACT_ID=feathr_2.12

0 comments on commit 9738ae6

Please sign in to comment.