Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -998,31 +998,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
val df = esDataSource("pd_starts_with")
var filter = df.filter(df("airport").startsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
}
}

@Test
def testDataSourcePushDown10EndsWith() {
val df = esDataSource("pd_ends_with")
var filter = df.filter(df("airport").endsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
}
}

@Test
Expand All @@ -1036,7 +1044,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
@Test
def testDataSourcePushDown12And() {
val df = esDataSource("pd_and")
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you help me understand why df("airport").endsWith("O") changed to df("tag").equalTo("jan") here and on sql-20/AbstractScalaEsSparkSQL.scala:1104?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The endsWith predicate translates into a wildcard query. Usually we would lowercase the O character to make the query string be *o. Wildcard queries are term level queries and do not have analyzers applied to them. When run in "strict" mode, the query keeps the character uppercased (*O) which does not match the term in lucene. Lowercasing the O in the test would lead to Spark filtering the data out from the results (since "o" != "O").

I swapped the endsWith predicate on the And-testing-methods because I wanted to bring back positive matches from the query in all test cases instead of assuming empty results when just strict mode is enabled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

more concisely: The bug deals with strict mode being enabled, and this test shouldn't be affected by that setting. Removing the feature that does depend on the setting from the test (hopefully) makes it simpler to reason about.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok, thanks for the explanation. LGTM!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!


assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1055,31 +1055,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
val df = esDataSource("pd_starts_with")
var filter = df.filter(df("airport").startsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
}
}

@Test
def testDataSourcePushDown10EndsWith() {
val df = esDataSource("pd_ends_with")
var filter = df.filter(df("airport").endsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
}
}

@Test
Expand All @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
@Test
def testDataSourcePushDown12And() {
val df = esDataSource("pd_and")
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))

assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1055,31 +1055,39 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
val df = esDataSource("pd_starts_with")
var filter = df.filter(df("airport").startsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("feb", filter.select("tag").take(1)(0)(0))
}
}

@Test
def testDataSourcePushDown10EndsWith() {
val df = esDataSource("pd_ends_with")
var filter = df.filter(df("airport").endsWith("O"))

if (!keepHandledFilters) {
if (!keepHandledFilters && !strictPushDown) {
// term query pick field with multi values
assertEquals(2, filter.count())
return
}

filter.show
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
if (strictPushDown) {
assertEquals(0, filter.count()) // Strict means specific terms matching, and the terms are lowercased
} else {
assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
}
}

@Test
Expand All @@ -1093,7 +1101,7 @@ class AbstractScalaEsScalaSparkSQL(prefix: String, readMetadata: jl.Boolean, pus
@Test
def testDataSourcePushDown12And() {
val df = esDataSource("pd_and")
var filter = df.filter(df("reason").isNotNull.and(df("airport").endsWith("O")))
var filter = df.filter(df("reason").isNotNull.and(df("tag").equalTo("jan")))

assertEquals(1, filter.count())
assertEquals("jan", filter.select("tag").take(1)(0)(0))
Expand Down