Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing Bigtable row filters for sampling and labeling. #1320

Merged
merged 1 commit into from
Dec 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions gcloud/bigtable/row.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,31 @@ def to_pb(self):
return data_pb2.RowFilter(row_key_regex_filter=self.regex)


class RowSampleFilter(RowFilter):
"""Matches all cells from a row with probability p.

:type sample: float
:param sample: The probability of matching a cell (must be in the
interval ``[0, 1]``).
"""

def __init__(self, sample):
self.sample = sample

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.sample == self.sample

def to_pb(self):
"""Converts the row filter to a protobuf.

:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
return data_pb2.RowFilter(row_sample_filter=self.sample)


class FamilyNameRegexFilter(_RegexFilter):
"""Row filter for a family name regular expression.

Expand Down Expand Up @@ -522,3 +547,39 @@ def to_pb(self):
:returns: The converted current object.
"""
return data_pb2.RowFilter(strip_value_transformer=self.flag)


class ApplyLabelFilter(RowFilter):
"""Filter to apply labels to cells.

Intended to be used as an intermediate filter on a pre-existing filtered
result set. This was if two sets are combined, the label can tell where
the cell(s) originated.This allows the client to determine which results
were produced from which part of the filter.

.. note::

Due to a technical limitation, it is not currently possible to apply
multiple labels to a cell.

:type label: str
:param label: Label to apply to cells in the output row. Values must be
at most 15 characters long, and match the pattern
``[a-z0-9\\-]+``.
"""

def __init__(self, label):
self.label = label

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.label == self.label

def to_pb(self):
"""Converts the row filter to a protobuf.

:rtype: :class:`.data_pb2.RowFilter`
:returns: The converted current object.
"""
return data_pb2.RowFilter(apply_label_transformer=self.label)
72 changes: 72 additions & 0 deletions gcloud/bigtable/test_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,42 @@ def test_to_pb(self):
self.assertEqual(pb_val, expected_pb)


class TestRowSampleFilter(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import RowSampleFilter
return RowSampleFilter

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_constructor(self):
sample = object()
row_filter = self._makeOne(sample)
self.assertTrue(row_filter.sample is sample)

def test___eq__type_differ(self):
sample = object()
row_filter1 = self._makeOne(sample)
row_filter2 = object()
self.assertNotEqual(row_filter1, row_filter2)

def test___eq__same_value(self):
sample = object()
row_filter1 = self._makeOne(sample)
row_filter2 = self._makeOne(sample)
self.assertEqual(row_filter1, row_filter2)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2

sample = 0.25
row_filter = self._makeOne(sample)
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(row_sample_filter=sample)
self.assertEqual(pb_val, expected_pb)


class TestFamilyNameRegexFilter(unittest2.TestCase):

def _getTargetClass(self):
Expand Down Expand Up @@ -591,3 +627,39 @@ def test_to_pb(self):
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(strip_value_transformer=flag)
self.assertEqual(pb_val, expected_pb)


class TestApplyLabelFilter(unittest2.TestCase):

def _getTargetClass(self):
from gcloud.bigtable.row import ApplyLabelFilter
return ApplyLabelFilter

def _makeOne(self, *args, **kwargs):
return self._getTargetClass()(*args, **kwargs)

def test_constructor(self):
label = object()
row_filter = self._makeOne(label)
self.assertTrue(row_filter.label is label)

def test___eq__type_differ(self):
label = object()
row_filter1 = self._makeOne(label)
row_filter2 = object()
self.assertNotEqual(row_filter1, row_filter2)

def test___eq__same_value(self):
label = object()
row_filter1 = self._makeOne(label)
row_filter2 = self._makeOne(label)
self.assertEqual(row_filter1, row_filter2)

def test_to_pb(self):
from gcloud.bigtable._generated import bigtable_data_pb2 as data_pb2

label = u'label'
row_filter = self._makeOne(label)
pb_val = row_filter.to_pb()
expected_pb = data_pb2.RowFilter(apply_label_transformer=label)
self.assertEqual(pb_val, expected_pb)