Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigTable: provide better access to cell values #4908

Merged
merged 10 commits into from
Mar 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
106 changes: 106 additions & 0 deletions bigtable/google/cloud/bigtable/row_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@
from google.cloud._helpers import _datetime_from_microseconds
from google.cloud._helpers import _to_bytes

_MISSING_COLUMN_FAMILY = (
'Column family {} is not among the cells stored in this row.')
_MISSING_COLUMN = (
'Column {} is not among the cells stored in this row in the '
'column family {}.')
_MISSING_INDEX = (
'Index {!r} is not valid for the cells stored in this row for column {} '
'in the column family {}. There are {} such cells.')


class Cell(object):
"""Representation of a Google Cloud Bigtable Cell.
Expand Down Expand Up @@ -175,6 +184,103 @@ def row_key(self):
"""
return self._row_key

def find_cells(self, column_family_id, column):
"""Get a time series of cells stored on this instance.

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cells
are located.

Returns:
List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the
specified column.

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
"""
try:
column_family = self._cells[column_family_id]
except KeyError:
raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id))

try:
cells = column_family[column]
except KeyError:
raise KeyError(_MISSING_COLUMN.format(column, column_family_id))

return cells

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.


def cell_value(self, column_family_id, column, index=0):
"""Get a single cell value stored on this instance.

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cell
is located.
index (Optional[int]): The offset within the series of values. If
not specified, will return the first cell.

Returns:
~google.cloud.bigtable.row_data.Cell value: The cell value stored
in the specified column and specified index.

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
IndexError: If ``index`` cannot be found within the cells stored
in this row for the given ``column_family_id``, ``column``
pair.
"""
cells = self.find_cells(column_family_id, column)

try:
cell = cells[index]
except (TypeError, IndexError):
num_cells = len(cells)
msg = _MISSING_INDEX.format(
index, column, column_family_id, num_cells)
raise IndexError(msg)

return cell.value

def cell_values(self, column_family_id, column, max_count=None):
"""Get a time series of cells stored on this instance.

Args:
column_family_id (str): The ID of the column family. Must be of the
form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
column (bytes): The column within the column family where the cells
are located.
max_count (int): The maximum number of cells to use.

Returns:
A generator which provides: cell.value, cell.timestamp_micros
for each cell in the list of cells

Raises:
KeyError: If ``column_family_id`` is not among the cells stored
in this row.
KeyError: If ``column`` is not among the cells stored in this row
for the given ``column_family_id``.
"""
cells = self.find_cells(column_family_id, column)
if max_count is None:
max_count = len(cells)

for index, cell in enumerate(cells):
if index == max_count:
break

yield cell.value, cell.timestamp_micros


class InvalidReadRowsResponse(RuntimeError):
"""Exception raised to to invalid response data from back-end."""
Expand Down
99 changes: 97 additions & 2 deletions bigtable/tests/unit/test_row_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@


import unittest
import timeit
import csv


class TestCell(unittest.TestCase):
Expand Down Expand Up @@ -171,6 +169,97 @@ def test_to_dict(self):
}
self.assertEqual(result, expected_result)

def test_cell_value(self):
family_name = u'name1'
qualifier = b'col1'
cell = _make_cell(b'value-bytes')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qualifier: [cell],
},
}

result = partial_row_data.cell_value(family_name, qualifier)
self.assertEqual(result, cell.value)

def test_cell_value_invalid_index(self):
family_name = u'name1'
qualifier = b'col1'
cell = _make_cell(b'')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qualifier: [cell],
},
}

with self.assertRaises(IndexError):
partial_row_data.cell_value(family_name, qualifier, index=None)

def test_cell_value_invalid_column_family_key(self):
family_name = u'name1'
qualifier = b'col1'

partial_row_data = self._make_one(None)

with self.assertRaises(KeyError):
partial_row_data.cell_value(family_name, qualifier)

def test_cell_value_invalid_column_key(self):
family_name = u'name1'
qualifier = b'col1'

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {},
}

with self.assertRaises(KeyError):
partial_row_data.cell_value(family_name, qualifier)

def test_cell_values(self):
family_name = u'name1'
qualifier = b'col1'
cell = _make_cell(b'value-bytes')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qualifier: [cell],
},
}

values = []
for value, timestamp_micros in partial_row_data.cell_values(
family_name, qualifier):
values.append(value)

self.assertEqual(values[0], cell.value)

def test_cell_values_with_max_count(self):
family_name = u'name1'
qualifier = b'col1'
cell_1 = _make_cell(b'value-bytes-1')
cell_2 = _make_cell(b'value-bytes-2')

partial_row_data = self._make_one(None)
partial_row_data._cells = {
family_name: {
qualifier: [cell_1, cell_2],
},
}

values = []
for value, timestamp_micros in partial_row_data.cell_values(
family_name, qualifier, max_count=1):
values.append(value)

self.assertEqual(1, len(values))
self.assertEqual(values[0], cell_1.value)

def test_cells_property(self):
partial_row_data = self._make_one(None)
cells = {1: 2}
Expand Down Expand Up @@ -732,3 +821,9 @@ def _ReadRowsResponseCellChunkPB(*args, **kw):
message.family_name.value = family_name
message.qualifier.value = qualifier
return message


def _make_cell(value):
from google.cloud.bigtable import row_data

return row_data.Cell(value, TestCell.timestamp_micros)