From 6746227f4355a8bcbff02624dc04d763235ea766 Mon Sep 17 00:00:00 2001 From: James Baiera Date: Tue, 8 Jun 2021 15:55:01 -0400 Subject: [PATCH 1/2] Handle changes to the wildcard query in strict mode --- .../integration/AbstractScalaEsSparkSQL.scala | 18 +++++++++++++----- .../integration/AbstractScalaEsSparkSQL.scala | 18 +++++++++++++----- .../integration/AbstractScalaEsSparkSQL.scala | 18 +++++++++++++----- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index 44b4f5fe6..c9de54492 100644 --- a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1005,8 +1005,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1021,8 +1025,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1036,7 +1044,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0)) diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index d16499d98..e85bb3435 100644 --- a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1062,8 +1062,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1078,8 +1082,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0)) diff --git a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index 54f7771ba..917d0b93b 100644 --- a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1062,8 +1062,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("feb", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("feb", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1078,8 +1082,12 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus } filter.show - assertEquals(1, filter.count()) - assertEquals("jan", filter.select("tag").take(1)(0)(0)) + if (strictPushDown) { + assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased + } else { + assertEquals(1, filter.count()) + assertEquals("jan", filter.select("tag").take(1)(0)(0)) + } } @Test @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus @Test def testDataSourcePushDown12And() { val df = esDataSource("pd_and") - var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O"))) + var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan"))) assertEquals(1, filter.count()) assertEquals("jan", filter.select("tag").take(1)(0)(0)) From 14af9c6451c05032b3b6ef821321356571b2eff8 Mon Sep 17 00:00:00 2001 From: James Baiera Date: Tue, 8 Jun 2021 17:01:20 -0400 Subject: [PATCH 2/2] fix more test --- .../spark/integration/AbstractScalaEsSparkSQL.scala | 4 ++-- .../spark/integration/AbstractScalaEsSparkSQL.scala | 4 ++-- .../spark/integration/AbstractScalaEsSparkSQL.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index c9de54492..feb395ffb 100644 --- a/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-13/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -998,7 +998,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return @@ -1018,7 +1018,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return diff --git a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index e85bb3435..489321981 100644 --- a/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-20/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1055,7 +1055,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return @@ -1075,7 +1075,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return diff --git a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala index 917d0b93b..86acac5cb 100644 --- a/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala +++ b/spark/sql-30/src/itest/scala/org/elasticsearch/spark/integration/AbstractScalaEsSparkSQL.scala @@ -1055,7 +1055,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_starts_with") var filter = df.filter(df("airport").startsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return @@ -1075,7 +1075,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus val df = esDataSource("pd_ends_with") var filter = df.filter(df("airport").endsWith("O")) - if (!keepHandledFilters) { + if (!keepHandledFilters && !strictPushDown) { // term query pick field with multi values assertEquals(2, filter.count()) return