diff --git a/feathr-impl/src/main/scala/com/linkedin/feathr/offline/transformation/FDSConversionUtils.scala b/feathr-impl/src/main/scala/com/linkedin/feathr/offline/transformation/FDSConversionUtils.scala index 4d66c6aa2..5e6f3652c 100644 --- a/feathr-impl/src/main/scala/com/linkedin/feathr/offline/transformation/FDSConversionUtils.scala +++ b/feathr-impl/src/main/scala/com/linkedin/feathr/offline/transformation/FDSConversionUtils.scala @@ -49,8 +49,8 @@ private[offline] object FDSConversionUtils { // 1D sparse tensor case targetType: StructType if targetType.fields.size == 2 => convertRawValueTo1DFDSSparseTensorRow(rawFeatureValue, targetType) - // 1D dense tensor - case targetType: ArrayType if !targetType.elementType.isInstanceOf[ArrayType] => + // dense tensor + case targetType: ArrayType => convertRawValueTo1DFDSDenseTensorRow(rawFeatureValue, targetType) case otherType => throw new FeathrException(ErrorLabel.FEATHR_ERROR, s"Converting ${rawFeatureValue} to FDS Tensor type " + @@ -279,6 +279,8 @@ private[offline] object FDSConversionUtils { case _: FloatType => // If it's FloatType, then we know it's autoTz rules. convertRawValueTo1DFDSDenseTensorRowAutoTz(rawFeatureValue) + case _: ArrayType => + rawFeatureValue.asInstanceOf[Array[_]] case _ => convertRawValueTo1DFDSDenseTensorRowTz(rawFeatureValue) } diff --git a/feathr-impl/src/test/scala/com/linkedin/feathr/offline/AnchoredFeaturesIntegTest.scala b/feathr-impl/src/test/scala/com/linkedin/feathr/offline/AnchoredFeaturesIntegTest.scala index 3d35101a1..43a3ffc41 100644 --- a/feathr-impl/src/test/scala/com/linkedin/feathr/offline/AnchoredFeaturesIntegTest.scala +++ b/feathr-impl/src/test/scala/com/linkedin/feathr/offline/AnchoredFeaturesIntegTest.scala @@ -566,6 +566,94 @@ class AnchoredFeaturesIntegTest extends FeathrIntegTest { setFeathrJobParam(ADD_DEFAULT_COL_FOR_MISSING_DATA, "false") } + /* + * Test features with fdsExtract. + */ + @Test + def testFeaturesWithFdsExtract: Unit = { + val df = runLocalFeatureJoinForTest( + joinConfigAsString = + """ + | features: { + | key: a_id + | featureList: ["featureWithNullDerived"] + | } + """.stripMargin, + featureDefAsString = + """ + | anchors: { + | anchor1: { + | source: "anchorAndDerivations/nullValueSource.avro.json" + | key.sqlExpr: mId + | features: { + | featureWithNull { + | def.sqlExpr: "FDSExtract(denseValue)" + | type:{ + | type: TENSOR + | tensorCategory: DENSE + | shape: [2,5] + | dimensionType: [INT, INT] + | valType: STRING + | } + | } + | } + | } + |} + |derivations: { + |featureWithNullDerived:{ + | key: ["id"] + | inputs: + | { + | fv: {key: ["id"], feature: featureWithNull} + | } + | definition.sqlExpr: "coalesce(fv, ARRAY(ARRAY(\"aa\", \"bb\", \"cc\", \"dd\", \"ee\"), ARRAY(\"UNK\", \"UNK\", \"UNK\", \"UNK\", \"UNK\")))" + | type: + | { + | type: TENSOR + | tensorCategory: DENSE + | shape: [2,5] + | dimensionType: [INT, INT] + | valType: STRING + | } + |} + |} + """.stripMargin, + observationDataPath = "anchorAndDerivations/testMVELLoopExpFeature-observations.csv") + + val selectedColumns = Seq("a_id", "featureWithNullDerived") + val filteredDf = df.data.select(selectedColumns.head, selectedColumns.tail: _*) + + val expectedDf = ss.createDataFrame( + ss.sparkContext.parallelize( + Seq( + Row( + // a_id + "1", + // featureWithNull + mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a"))), + ), + Row( + // a_id + "2", + // f3eatureWithNull + mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("UNK", "UNK", "UNK", "UNK", "UNK"))) + ), + Row( + // a_id + "3", + // featureWithNull + mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a")), + )))), + StructType( + List( + StructField("a_id", StringType, true), + StructField("featureWithNull", ArrayType(ArrayType(StringType, true), true), true) + ))) + + def cmpFunc(row: Row): String = row.get(0).toString + + FeathrTestUtils.assertDataFrameApproximatelyEquals(filteredDf, expectedDf, cmpFunc) + } /* * Test features with null values. diff --git a/gradle.properties b/gradle.properties index 570596788..8c0b90e81 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,3 +1,3 @@ -version=1.0.4-rc5 +version=1.0.4-rc6 SONATYPE_AUTOMATIC_RELEASE=true POM_ARTIFACT_ID=feathr_2.12