Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paimon-python/pypaimon/common/predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_by_simple_stats(self, stat: SimpleStats, row_count: int) -> bool:
null_count = stat.null_counts[self.index]

if self.method == 'isNull':
return null_count is not None and null_count > 0
return null_count is None or null_count > 0
if self.method == 'isNotNull':
return null_count is None or row_count is None or null_count < row_count

Expand Down
31 changes: 29 additions & 2 deletions paimon-python/pypaimon/tests/predicates_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
import pyarrow as pa

from pypaimon import CatalogFactory, Schema
from pypaimon.table.row.generic_row import GenericRowDeserializer
from pypaimon.common.predicate import Predicate
from pypaimon.manifest.schema.simple_stats import SimpleStats
from pypaimon.table.row.generic_row import GenericRow, GenericRowDeserializer


def _check_filtered_result(read_builder, expected_df):
Expand Down Expand Up @@ -373,8 +375,33 @@ def test_or_predicates(self):
_check_filtered_result(table.new_read_builder().with_filter(predicate),
self.df.loc[[0, 3, 4]])

def test_is_null(self):
stat_no_count = SimpleStats(
min_values=GenericRow([], []),
max_values=GenericRow([], []),
null_counts=[None],
)
pred = Predicate(method="isNull", index=0, field="c", literals=None)
self.assertTrue(
pred.test_by_simple_stats(stat_no_count, 10),
"isNull must keep file when null_count is missing",
)
# null_count == 0 -> can prune
stat_zero = SimpleStats(
min_values=GenericRow([], []),
max_values=GenericRow([], []),
null_counts=[0],
)
self.assertFalse(pred.test_by_simple_stats(stat_zero, 10))
# null_count > 0 -> keep
stat_positive = SimpleStats(
min_values=GenericRow([], []),
max_values=GenericRow([], []),
null_counts=[3],
)
self.assertTrue(pred.test_by_simple_stats(stat_positive, 10))

def test_filter_with_null_and_or(self):
from pypaimon.common.predicate import Predicate
from pypaimon.table.row.offset_row import OffsetRow

p_gt = Predicate(method='greaterThan', index=1, field='score', literals=[10])
Expand Down