From 8b644cb00afed612be0480636ab27a2692b92fde Mon Sep 17 00:00:00 2001 From: James Baiera Date: Thu, 10 Jun 2021 17:13:07 -0400 Subject: [PATCH] Fix failing strict wildcard pushdown tests (#1683) * Handle changes to the wildcard query in strict mode * fix more test --- .../integration/AbstractScalaEsSparkSQL.scala | 22 +++++++++++++------ .../integration/AbstractScalaEsSparkSQL.scala | 22 +++++++++++++------ .../integration/AbstractScalaEsSparkSQL.scala | 22 +++++++++++++------ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index a4d12518d..9b05a3db3 100644 --- a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -998,15 +998,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1014,15 +1018,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1036,7 +1044,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0)) diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index fc6ace3df..9be766b9c 100644 --- a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1055,15 +1055,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1071,15 +1075,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0)) diff --git a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index 54f7771ba..86acac5cb 100644 --- a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1055,15 +1055,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1071,15 +1075,19 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0))