Skip to content

Commit

Permalink
Support sql expression in FDSExtract (#1112)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaymo001 committed Mar 9, 2023
1 parent e42bb19 commit cb29dd9
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ private[offline] object FeatureTransformation {
def parseMultiDimTensorExpr(featureDef: String): String = {
// String char should be one more than the len of the keyword to account for '('. The end should be 1 less than length of feature string
// to account for ')'.
featureDef.substring(featureDef.indexOf("(") + 1, featureDef.indexOf(")"))
featureDef.substring(featureDef.indexOf("(") + 1, featureDef.lastIndexOf(")"))
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,81 @@
"name": "value",
"type": ["null", "int"],
"default": null
}]
},
{
"name" : "denseValue",
"type" : [ {
"type" : "array",
"items" : [ {
"type" : "array",
"items" : [ "string", "null" ]
}, "null" ]
}, "null" ]
} ]
},
"data": [{
"mId": 1,
"value": { "int": 1 }
"value": { "int": 1 },
"denseValue" : {
"array" : [ {
"array" : [ {
"string" : "aa"
}, {
"string" : "bb"
}, {
"string" : "cc"
}, {
"string" : "dd"
}, {
"string" : "ee"
} ]
}, {
"array" : [ {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
} ]
} ]
}
}, {
"mId": 2,
"value": null
"value": null,
"denseValue" : null
}, {
"mId": 3,
"value": { "int": 3 }
}]
}
"value": { "int": 3 },
"denseValue" : {
"array" : [ {
"array" : [ {
"string" : "aa"
}, {
"string" : "bb"
}, {
"string" : "cc"
}, {
"string" : "dd"
}, {
"string" : "ee"
} ]
}, {
"array" : [ {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
}, {
"string" : "a"
} ]
}]
}
}]
}
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,17 @@ class AnchoredFeaturesIntegTest extends FeathrIntegTest {
| source: "anchorAndDerivations/nullValueSource.avro.json"
| key.sqlExpr: mId
| features: {
| featureWithNull.def.sqlExpr: FDSExtract(value)
| }
| featureWithNull {
| def.sqlExpr: "FDSExtract(coalesce(denseValue, ARRAY(ARRAY(\"aa\", \"bb\", \"cc\", \"dd\", \"ee\"), ARRAY(\"UNK\", \"UNK\", \"UNK\", \"UNK\", \"UNK\")) ))"
| type:{
| type: TENSOR
| tensorCategory: DENSE
| shape: [2,5]
| dimensionType: [INT, INT]
| valType: STRING
| }
| }
| }
| }
|}
""".stripMargin,
Expand All @@ -423,21 +432,25 @@ class AnchoredFeaturesIntegTest extends FeathrIntegTest {
// a_id
"1",
// featureWithNull
1),
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a"))),
),
Row(
// a_id
"2",
// featureWithNull
null),
// f3eatureWithNull
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("UNK", "UNK", "UNK", "UNK", "UNK")))
),
Row(
// a_id
"3",
// featureWithNull
3))),
mutable.WrappedArray.make(Array(Array("aa", "bb", "cc", "dd", "ee"), Array("a", "a", "a", "a", "a")),
)))),
StructType(
List(
StructField("a_id", StringType, true),
StructField("featureWithNull", IntegerType, true))))
StructField("featureWithNull", ArrayType(ArrayType(StringType, true), true), true)
)))

def cmpFunc(row: Row): String = row.get(0).toString

Expand Down

0 comments on commit cb29dd9

Please sign in to comment.