Skip to content

Commit

Permalink
Adding scan()/put()/delete()/batch() methods to HappyBase table.
Browse files Browse the repository at this point in the history
They are just interfaces for now.
  • Loading branch information
dhermes committed Sep 4, 2015
1 parent 44adf05 commit d65fd59
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gcloud_bigtable/happybase/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def delete(self, row, columns=None, wal=None):
"""Delete data from a row in the table owned by this batch.
:type row: str
:param row: The row key where the mutation will be "put".
:param row: The row key where the delete will occur.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
Expand Down
185 changes: 185 additions & 0 deletions gcloud_bigtable/happybase/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,188 @@ def cells(self, row, column, versions=None, timestamp=None,
temporarily until the method is implemented.
"""
raise NotImplementedError('Temporarily not implemented.')

def scan(self, row_start=None, row_stop=None, row_prefix=None,
columns=None, filter=None, timestamp=None,
include_timestamp=False, batch_size=1000, scan_batching=None,
limit=None, sorted_columns=False):
"""Create a scanner for data in this table.
This method returns a generator that can be used for looping over the
matching rows.
If ``row_prefix`` is specified, only rows with row keys matching the
prefix will be returned. If given, ``row_start`` and ``row_stop``
cannot be used.
.. note::
Both ``row_start`` and ``row_stop`` can be :data:`None` to specify
the start and the end of the table respectively. If both are
omitted, a full table scan is done. Note that this usually results
in severe performance problems.
:type row_start: str
:param row_start: (Optional) Row key where the scanner should start
(includes ``row_start``). If not specified, reads
from the first key. If the table does not contain
``row_start``, it will start from the next key after
it that **is** contained in the table.
:type row_stop: str
:param row_stop: (Optional) Row key where the scanner should stop
(excludes ``row_stop``). If not specified, reads
until the last key. The table does not have to contain
``row_stop``.
:type row_prefix: str
:param row_prefix: (Optional) Prefix to match row keys.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* an single column: ``fam:col``
:type filter: str
:param filter: (Optional) An HBase filter string. See
http://hbase.apache.org/0.94/book/thrift.html
for more details.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before (or
at) the timestamp will be returned.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:type batch_size: int
:param batch_size: Number of results to retrieve per batch. Defaults
to 1000. Should be kept large unless individual
row results are very large.
:type scan_batching: bool
:param scan_batching: Unused parameter. Provided for compatibility
with HappyBase, but irrelevant for Cloud Bigtable
since it does not have concepts of batching or
caching for scans.
:type limit: int
:param limit: (Optional) Maximum number of rows to return.
:type sorted_columns: bool
:param sorted_columns: Flag to indicate if the returned columns need
to be sorted.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
"""
raise NotImplementedError('Temporarily not implemented.')

def put(self, row, data, timestamp=None, wal=True):
"""Insert data into a row in this table.
.. note::
This method will send a request with a single "put" mutation.
In many situations, :meth:`batch` is a more appropriate
method to manipulate data since it helps combine many mutations
into a single request.
:type row: str
:param row: The row key where the mutation will be "put".
:type data: dict
:param data: Dictionary containing the data to be inserted. The keys
are columns names (of the form ``fam:col``) and the values
are strings (bytes) to be stored in those columns.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that the mutation will be applied at.
:type wal: :data:`NoneType <types.NoneType>`
:param wal: Unused parameter (to be passed to a created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
"""
raise NotImplementedError('Temporarily not implemented.')

def delete(self, row, columns=None, timestamp=None, wal=True):
"""Delete data from a row in this table.
This method deletes the entire ``row`` if ``columns`` is not
specified.
.. note::
This method will send a request with a single delete mutation.
In many situations, :meth:`batch` is a more appropriate
method to manipulate data since it helps combine many mutations
into a single request.
:type row: str
:param row: The row key where the delete will occur.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* an single column: ``fam:col``
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that the mutation will be applied at.
:type wal: :data:`NoneType <types.NoneType>`
:param wal: Unused parameter (to be passed to a created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
"""
raise NotImplementedError('Temporarily not implemented.')

def batch(self, timestamp=None, batch_size=None, transaction=False,
wal=True):
"""Create a new batch operation for this table.
This method returns a new :class:`.Batch` instance that can be used
for mass data manipulation.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that all mutations will be applied at.
:type batch_size: int
:param batch_size: (Optional) The maximum number of mutations to allow
to accumulate before committing them.
:type transaction: bool
:param transaction: Flag indicating if the mutations should be sent
transactionally or not. If ``transaction=True`` and
an error occurs while a :class:`Batch` is active,
then none of the accumulated mutations will be
committed. If ``batch_size`` is set, the mutation
can't be transactional.
:type wal: :data:`NoneType <types.NoneType>`
:param wal: Unused parameter (to be passed to the created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
"""
raise NotImplementedError('Temporarily not implemented.')
58 changes: 58 additions & 0 deletions gcloud_bigtable/happybase/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,61 @@ def test_cells(self):
table.cells(row_key, column, versions=versions,
timestamp=timestamp,
include_timestamp=include_timestamp)

def test_scan(self):
name = 'table-name'
connection = object()
table = self._makeOne(name, connection)

row_start = 'row-start'
row_stop = 'row-stop'
row_prefix = 'row-prefix'
columns = ['fam:col1', 'fam:col2']
filter_ = 'KeyOnlyFilter ()'
timestamp = None
include_timestamp = True
batch_size = 1337
scan_batching = None
limit = 123
sorted_columns = True
with self.assertRaises(NotImplementedError):
table.scan(row_start=row_start, row_stop=row_stop,
row_prefix=row_prefix, columns=columns, filter=filter_,
timestamp=timestamp,
include_timestamp=include_timestamp,
batch_size=batch_size, scan_batching=scan_batching,
limit=limit, sorted_columns=sorted_columns)

def test_put(self):
name = 'table-name'
connection = object()
table = self._makeOne(name, connection)

row = 'row-key'
data = {'fam:col': 'foo'}
timestamp = None
with self.assertRaises(NotImplementedError):
table.put(row, data, timestamp=timestamp)

def test_delete(self):
name = 'table-name'
connection = object()
table = self._makeOne(name, connection)

row = 'row-key'
columns = ['fam:col1', 'fam:col2']
timestamp = None
with self.assertRaises(NotImplementedError):
table.delete(row, columns=columns, timestamp=timestamp)

def test_batch(self):
name = 'table-name'
connection = object()
table = self._makeOne(name, connection)

timestamp = object()
batch_size = 42
transaction = False # Must be False when batch_size is non-null
with self.assertRaises(NotImplementedError):
table.batch(timestamp=timestamp, batch_size=batch_size,
transaction=transaction)

0 comments on commit d65fd59

Please sign in to comment.