Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add get_cell() and get_cells() methods to PartialRowData #4564

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
109 changes: 109 additions & 0 deletions bigtable/google/cloud/bigtable/row_data.py
Expand Up @@ -22,6 +22,16 @@
from google.cloud._helpers import _to_bytes


_MISSING_COLUMN_FAMILY = (
'Column family {} is not among the cells stored in this row.')
_MISSING_COLUMN = (
'Column {} is not among the cells stored in this row in the '
'column family {}.')
_MISSING_INDEX = (
'Index {!r} is not valid for the cells stored in this row for column {} '
'in the column family {}. There are {} such cells.')


class Cell(object):
"""Representation of a Google Cloud Bigtable Cell.

Expand Down Expand Up @@ -171,6 +181,105 @@ def row_key(self):
"""
return self._row_key

def _get_cells_no_copy(self, column_family_id, column):
"""Get a time series of cells stored on this instance.

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cells
are located.

Returns:
List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the
specified column.

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
"""
try:
column_family = self._cells[column_family_id]
except KeyError:
raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id))

try:
cells = column_family[column]
except KeyError:
raise KeyError(_MISSING_COLUMN.format(column, column_family_id))

return cells

def get_cell(self, column_family_id, column, index=0):
"""Get a single cell stored on this instance.

.. note::

This returns a copy of the actual cell (so that the
caller cannot mutate internal state).

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cell
is located.
index (Optional[int]): The offset within the series of values. If
not specified, will return the first cell.

Returns:
~google.cloud.bigtable.row_data.Cell: The cell stored in the
specified column.

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
IndexError: If ``index`` cannot be found within the cells stored
in this row for the given ``column_family_id``, ``column``
pair.
"""
cells = self._get_cells_no_copy(column_family_id, column)

try:
cell = cells[index]
except (TypeError, IndexError):
num_cells = len(cells)
msg = _MISSING_INDEX.format(
index, column, column_family_id, num_cells)
raise IndexError(msg)

return copy.deepcopy(cell)

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


def get_cells(self, column_family_id, column):
"""Get a time series of cells stored on this instance.

.. note::

This returns a copy of the actual cells (so that the
caller cannot mutate internal state).

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cells
are located.

Returns:
List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the
specified column.

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
"""
cells = self._get_cells_no_copy(column_family_id, column)
return copy.deepcopy(cells)

This comment was marked as spam.



class InvalidReadRowsResponse(RuntimeError):
"""Exception raised to to invalid response data from back-end."""
Expand Down
149 changes: 142 additions & 7 deletions bigtable/tests/unit/test_row_data.py
Expand Up @@ -13,6 +13,10 @@
# limitations under the License.


import datetime
import json
import operator
import os
import unittest

import mock
Expand All @@ -30,7 +34,6 @@ def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def _from_pb_test_helper(self, labels=None):
import datetime
from google.cloud._helpers import _EPOCH
from google.cloud.bigtable._generated import (
data_pb2 as data_v2_pb2)
Expand Down Expand Up @@ -174,6 +177,138 @@ def test_to_dict(self):
}
self.assertEqual(result, expected_result)

def test_get_cell_defaults(self):
family_name = u'name1'
qual = b'col1'
cell = _make_cell(b'')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qual: [cell],
},
}

result = partial_row_data.get_cell(family_name, qual)
# Make sure we get a copy, not the original.
self.assertIsNot(result, cell)
self.assertEqual(result, cell)

def test_get_cell_explicit_index(self):
family_name = u'name1'
qual = b'col1'
cell1 = _make_cell(b'1')
cell2 = _make_cell(b'2')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qual: [cell1, cell2],
},
}

result = partial_row_data.get_cell(family_name, qual, index=1)
# Make sure we get a copy, not the original.
self.assertIsNot(result, cell2)
self.assertEqual(result, cell2)

def test_get_cell_bad_family(self):
from google.cloud.bigtable import row_data

family_name = u'name1'
partial_row_data = self._make_one(None)
self.assertEqual(partial_row_data._cells, {})

with self.assertRaises(KeyError) as exc_info:
partial_row_data.get_cell(family_name, None)

expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name)
self.assertEqual(exc_info.exception.args, (expected_arg,))

def test_get_cell_bad_column(self):
from google.cloud.bigtable import row_data

family_name = u'name1'
qual = b'col1'

partial_row_data = self._make_one(None)
partial_row_data._cells = {family_name: {}}

with self.assertRaises(KeyError) as exc_info:
partial_row_data.get_cell(family_name, qual)

expected_arg = row_data._MISSING_COLUMN.format(qual, family_name)
self.assertEqual(exc_info.exception.args, (expected_arg,))

def test_get_cell_bad_index(self):
from google.cloud.bigtable import row_data

family_name = u'name1'
qual = b'col1'

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qual: [],
},
}

for index in (5, 'not-int'):
with self.assertRaises(IndexError) as exc_info:
partial_row_data.get_cell(family_name, qual, index=index)

expected_arg = row_data._MISSING_INDEX.format(
index, qual, family_name, 0)
self.assertEqual(exc_info.exception.args, (expected_arg,))

def test_get_cells(self):
family_name = u'name1'
qual = b'col1'
cell = _make_cell(b'hi-mom')

partial_row_data = self._make_one(None)
cells = [cell]
partial_row_data._cells = {
family_name: {
qual: cells,
},
}

result = partial_row_data.get_cells(family_name, qual)
# Make sure we get a copy, not the original.
self.assertIsNot(result, cells)
self.assertEqual(result, cells)
self.assertIsNot(result[0], cell)
self.assertEqual(result[0], cell)

def test_get_cells_bad_family(self):
from google.cloud.bigtable import row_data

family_name = u'name1'
partial_row_data = self._make_one(None)
self.assertEqual(partial_row_data._cells, {})

with self.assertRaises(KeyError) as exc_info:
partial_row_data.get_cells(family_name, None)

expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name)
self.assertEqual(exc_info.exception.args, (expected_arg,))

def test_get_cells_bad_column(self):
from google.cloud.bigtable import row_data

family_name = u'name1'
qual = b'col1'

partial_row_data = self._make_one(None)
partial_row_data._cells = {family_name: {}}

with self.assertRaises(KeyError) as exc_info:
partial_row_data.get_cells(family_name, qual)

expected_arg = row_data._MISSING_COLUMN.format(qual, family_name)
self.assertEqual(exc_info.exception.args, (expected_arg,))

def test_cells_property(self):
partial_row_data = self._make_one(None)
cells = {1: 2}
Expand Down Expand Up @@ -433,8 +568,6 @@ def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def _load_json_test(self, test_name):
import os

if self.__class__._json_tests is None:
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'read-rows-acceptance-test.json')
Expand Down Expand Up @@ -500,8 +633,6 @@ def test_invalid_commit_with_chunk(self):
# JSON Error cases: incomplete final row

def _sort_flattend_cells(self, flattened):
import operator

key_func = operator.itemgetter('rk', 'fm', 'qual')
return sorted(flattened, key=key_func)

Expand Down Expand Up @@ -717,8 +848,6 @@ def _parse_readrows_acceptance_tests(filename):
test/resources/com/google/cloud/bigtable/grpc/scanner/v2/
read-rows-acceptance-test.json
"""
import json

with open(filename) as json_file:
test_json = json.load(json_file)

Expand All @@ -727,3 +856,9 @@ def _parse_readrows_acceptance_tests(filename):
chunks = _generate_cell_chunks(test['chunks'])
results = test['results']
yield name, chunks, results


def _make_cell(value):
from google.cloud.bigtable import row_data

return row_data.Cell(value, datetime.datetime.utcnow())