Skip to content

Commit

Permalink
Pulling in individualized RowFilters from upstream.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Dec 23, 2015
1 parent 764df9f commit d6164dc
Show file tree
Hide file tree
Showing 6 changed files with 1,113 additions and 800 deletions.
31 changes: 16 additions & 15 deletions gcloud_bigtable/happybase/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@
from gcloud_bigtable.happybase.batch import Batch
from gcloud_bigtable.happybase.batch import _WAL_SENTINEL
from gcloud_bigtable.happybase.batch import _get_column_pairs
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import CellsColumnLimitFilter
from gcloud_bigtable.row import ColumnQualifierRegexFilter
from gcloud_bigtable.row import FamilyNameRegexFilter
from gcloud_bigtable.row import RowFilterChain
from gcloud_bigtable.row import RowFilterUnion
from gcloud_bigtable.row import RowKeyRegexFilter
from gcloud_bigtable.row import TimestampRange
from gcloud_bigtable.row import TimestampRangeFilter
from gcloud_bigtable.table import Table as _LowLevelTable


Expand Down Expand Up @@ -204,7 +208,7 @@ def _filter_chain_helper(column=None, versions=None, timestamp=None,
:type filters: list
:param filters: (Optional) List of existing filters to be extended.
:rtype: :class:`.RowFilterChain`, :class:`.RowFilter`
:rtype: :class:`.RowFilter`
:returns: The chained filter created, or just a single filter if only
one was needed.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
Expand All @@ -215,14 +219,14 @@ def _filter_chain_helper(column=None, versions=None, timestamp=None,

if column is not None:
column_family_id, column_qualifier = column.split(':')
fam_filter = RowFilter(family_name_regex_filter=column_family_id)
qual_filter = RowFilter(column_qualifier_regex_filter=column_qualifier)
fam_filter = FamilyNameRegexFilter(column_family_id)
qual_filter = ColumnQualifierRegexFilter(column_qualifier)
filters.extend([fam_filter, qual_filter])
if versions is not None:
filters.append(RowFilter(cells_per_column_limit_filter=versions))
filters.append(CellsColumnLimitFilter(versions))
time_range = _convert_to_time_range(timestamp=timestamp)
if time_range is not None:
filters.append(RowFilter(timestamp_range_filter=time_range))
filters.append(TimestampRangeFilter(time_range))

num_filters = len(filters)
if num_filters == 0:
Expand All @@ -243,22 +247,20 @@ def _columns_filter_helper(columns):
* an entire column family: ``fam`` or ``fam:``
* an single column: ``fam:col``
:rtype: :class:`.RowFilterUnion`, :class:`.RowFilter`
:rtype: :class:`.RowFilter`
:returns: The union filter created containing all of the matched columns.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
filters to union.
"""
filters = []
for column_family_id, column_qualifier in _get_column_pairs(columns):
fam_filter = FamilyNameRegexFilter(column_family_id)
if column_qualifier is not None:
fam_filter = RowFilter(family_name_regex_filter=column_family_id)
qual_filter = RowFilter(
column_qualifier_regex_filter=column_qualifier)
qual_filter = ColumnQualifierRegexFilter(column_qualifier)
combined_filter = RowFilterChain(
filters=[fam_filter, qual_filter])
filters.append(combined_filter)
else:
fam_filter = RowFilter(family_name_regex_filter=column_family_id)
filters.append(fam_filter)

num_filters = len(filters)
Expand All @@ -276,14 +278,14 @@ def _row_keys_filter_helper(row_keys):
:type row_keys: list
:param row_keys: Iterable containing row keys (as strings).
:rtype: :class:`.RowFilterUnion`, :class:`.RowFilter`
:rtype: :class:`.RowFilter`
:returns: The union filter created containing all of the row keys.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
filters to union.
"""
filters = []
for row_key in row_keys:
filters.append(RowFilter(row_key_regex_filter=row_key))
filters.append(RowKeyRegexFilter(row_key))

num_filters = len(filters)
if num_filters == 0:
Expand Down Expand Up @@ -618,8 +620,7 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None,
* an entire column family: ``fam`` or ``fam:``
* an single column: ``fam:col``
:type filter: :class:`RowFilter`, :class:`RowFilterChain`,
:class:`RowFilterUnion` or :class:`ConditionalRowFilter`
:type filter: :class:`.RowFilter`
:param filter: (Optional) An additional filter (beyond column and
row range filters supported here). HappyBase / HBase
users will have used this as an HBase filter string. See
Expand Down
69 changes: 35 additions & 34 deletions gcloud_bigtable/happybase/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,31 +204,32 @@ def test_no_filters(self):
self._callFUT()

def test_single_filter(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import CellsColumnLimitFilter

versions = 1337
result = self._callFUT(versions=versions)
self.assertTrue(isinstance(result, RowFilter))
self.assertTrue(isinstance(result, CellsColumnLimitFilter))
# Relies on the fact that RowFilter instances can
# only have one value set.
self.assertEqual(result.cells_per_column_limit_filter, versions)
self.assertEqual(result.num_cells, versions)

def test_existing_filters(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import CellsColumnLimitFilter

filters = []
versions = 1337
result = self._callFUT(versions=versions, filters=filters)
# Make sure filters has grown.
self.assertEqual(filters, [result])

self.assertTrue(isinstance(result, RowFilter))
self.assertTrue(isinstance(result, CellsColumnLimitFilter))
# Relies on the fact that RowFilter instances can
# only have one value set.
self.assertEqual(result.cells_per_column_limit_filter, versions)
self.assertEqual(result.num_cells, versions)

def _column_helper(self, num_filters, versions=None, timestamp=None):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import ColumnQualifierRegexFilter
from gcloud_bigtable.row import FamilyNameRegexFilter
from gcloud_bigtable.row import RowFilterChain

col_fam = 'cf1'
Expand All @@ -240,46 +241,45 @@ def _column_helper(self, num_filters, versions=None, timestamp=None):
self.assertEqual(len(result.filters), num_filters)
fam_filter = result.filters[0]
qual_filter = result.filters[1]
self.assertTrue(isinstance(fam_filter, RowFilter))
self.assertTrue(isinstance(qual_filter, RowFilter))
self.assertTrue(isinstance(fam_filter, FamilyNameRegexFilter))
self.assertTrue(isinstance(qual_filter, ColumnQualifierRegexFilter))

# Relies on the fact that RowFilter instances can
# only have one value set.
self.assertEqual(fam_filter.family_name_regex_filter, col_fam)
self.assertEqual(qual_filter.column_qualifier_regex_filter, qual)
self.assertEqual(fam_filter.regex, col_fam)
self.assertEqual(qual_filter.regex, qual)

return result

def test_column_only(self):
self._column_helper(num_filters=2)

def test_with_versions(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import CellsColumnLimitFilter

versions = 11
result = self._column_helper(num_filters=3, versions=versions)

version_filter = result.filters[2]
self.assertTrue(isinstance(version_filter, RowFilter))
self.assertTrue(isinstance(version_filter, CellsColumnLimitFilter))
# Relies on the fact that RowFilter instances can
# only have one value set.
self.assertEqual(version_filter.cells_per_column_limit_filter,
versions)
self.assertEqual(version_filter.num_cells, versions)

def test_with_timestamp(self):
from gcloud_bigtable._non_upstream_helpers import (
_microseconds_to_timestamp)
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import TimestampRange
from gcloud_bigtable.row import TimestampRangeFilter

timestamp = 1441928298571
result = self._column_helper(num_filters=3, timestamp=timestamp)

range_filter = result.filters[2]
self.assertTrue(isinstance(range_filter, RowFilter))
self.assertTrue(isinstance(range_filter, TimestampRangeFilter))
# Relies on the fact that RowFilter instances can
# only have one value set.
time_range = range_filter.timestamp_range_filter
time_range = range_filter.range_
self.assertTrue(isinstance(time_range, TimestampRange))
self.assertEqual(time_range.start, None)
ts_dt = _microseconds_to_timestamp(1000 * timestamp)
Expand All @@ -304,16 +304,17 @@ def test_no_columns(self):
self._callFUT(columns)

def test_single_column(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import FamilyNameRegexFilter

col_fam = 'cf1'
columns = [col_fam]
result = self._callFUT(columns)
expected_result = RowFilter(family_name_regex_filter=col_fam)
expected_result = FamilyNameRegexFilter(col_fam)
self.assertEqual(result, expected_result)

def test_column_and_column_familieis(self):
from gcloud_bigtable.row import RowFilter
def test_column_and_column_families(self):
from gcloud_bigtable.row import ColumnQualifierRegexFilter
from gcloud_bigtable.row import FamilyNameRegexFilter
from gcloud_bigtable.row import RowFilterChain
from gcloud_bigtable.row import RowFilterUnion

Expand All @@ -328,15 +329,15 @@ def test_column_and_column_familieis(self):
filter1 = result.filters[0]
filter2 = result.filters[1]

self.assertTrue(isinstance(filter1, RowFilter))
self.assertEqual(filter1.family_name_regex_filter, col_fam1)
self.assertTrue(isinstance(filter1, FamilyNameRegexFilter))
self.assertEqual(filter1.regex, col_fam1)

self.assertTrue(isinstance(filter2, RowFilterChain))
filter2a, filter2b = filter2.filters
self.assertTrue(isinstance(filter2a, RowFilter))
self.assertEqual(filter2a.family_name_regex_filter, col_fam2)
self.assertTrue(isinstance(filter2b, RowFilter))
self.assertEqual(filter2b.column_qualifier_regex_filter, col_qual2)
self.assertTrue(isinstance(filter2a, FamilyNameRegexFilter))
self.assertEqual(filter2a.regex, col_fam2)
self.assertTrue(isinstance(filter2b, ColumnQualifierRegexFilter))
self.assertEqual(filter2b.regex, col_qual2)


class Test__row_keys_filter_helper(unittest2.TestCase):
Expand All @@ -351,27 +352,27 @@ def test_no_rows(self):
self._callFUT(row_keys)

def test_single_row(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import RowKeyRegexFilter

row_key = b'row-key'
row_keys = [row_key]
result = self._callFUT(row_keys)
expected_result = RowFilter(row_key_regex_filter=row_key)
expected_result = RowKeyRegexFilter(row_key)
self.assertEqual(result, expected_result)

def test_many_rows(self):
from gcloud_bigtable.row import RowFilter
from gcloud_bigtable.row import RowFilterUnion
from gcloud_bigtable.row import RowKeyRegexFilter

row_key1 = b'row-key1'
row_key2 = b'row-key2'
row_key3 = b'row-key3'
row_keys = [row_key1, row_key2, row_key3]
result = self._callFUT(row_keys)

filter1 = RowFilter(row_key_regex_filter=row_key1)
filter2 = RowFilter(row_key_regex_filter=row_key2)
filter3 = RowFilter(row_key_regex_filter=row_key3)
filter1 = RowKeyRegexFilter(row_key1)
filter2 = RowKeyRegexFilter(row_key2)
filter3 = RowKeyRegexFilter(row_key3)
expected_result = RowFilterUnion(filters=[filter1, filter2, filter3])
self.assertEqual(result, expected_result)

Expand Down

0 comments on commit d6164dc

Please sign in to comment.