Skip to content

Commit

Permalink
Implementing HappyBase Table.families().
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Sep 7, 2015
1 parent 81cd285 commit 572190c
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 45 deletions.
98 changes: 68 additions & 30 deletions gcloud_bigtable/happybase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@

"""Google Cloud Bigtable HappyBase package.
Intended to emulate the HappyBase library using Google Cloud Bigtable
as the backing store.
This package is intended to emulate the HappyBase library using
Google Cloud Bigtable as the backing store.
Differences in Public API
-------------------------
Some concepts from HBase/Thrift do not map directly to the Cloud
Bigtable API. As a result, the following instance methods and functions
Expand All @@ -34,9 +37,7 @@
* :meth:`Table.counter_set() \
<gcloud_bigtable.happybase.table.Table.counter_set>` - method can't
be atomic, so we disable it
This also means that calling :meth:`.Connection.delete_table` with
``disable=True`` can't be supported.
* The ``__version__`` value for the HappyBase package is :data`None`
In addition, the many of the constants from :mod:`.connection` are specific
to HBase and are defined as :data:`None` in our module:
Expand All @@ -53,33 +54,70 @@
Two of these ``DEFAULT_HOST`` and ``DEFAULT_PORT``, are even imported in
the main ``happybase`` package.
The :class:`.Connection` constructor **disables** the use of several arguments
and will a :class:`ValueError <exceptions.ValueError>` if any of them are
passed in as keyword arguments. The arguments are:
- ``host``
- ``port``
- ``compat``
- ``transport``
- ``protocol``
In order to make :class:`.Connection` compatible with Cloud Bigtable, we
add a ``client`` keyword argument to allow user's to pass in their own
clients (which they can construct beforehand).
Any uses of the ``wal`` (Write Ahead Log) argument will result in a
:class:`ValueError <exceptions.ValueError>` as well. This includes
uses in:
* :class:`.Batch` constructor
* :meth:`.Batch.put`
* :meth:`.Batch.delete`
* :meth:`Table.put() <gcloud_bigtable.happybase.table.Table.put>`
* :meth:`Table.delete() <gcloud_bigtable.happybase.table.Table.delete>`
* :meth:`Table.batch() <gcloud_bigtable.happybase.table.Table.batch>` factory
Finally, we do not provide the ``util`` module. Though it is public in the
HappyBase library, it provides no core functionality.
API Behavior Changes
--------------------
* Since there is no concept of an enabled / disabled table, calling
:meth:`.Connection.delete_table` with ``disable=True`` can't be supported.
Using that argument will result in a
:class:`ValueError <exceptions.ValueError>`.
* The :class:`.Connection` constructor **disables** the use of several
arguments and will throw a :class:`ValueError <exceptions.ValueError>` if
any of them are passed in as keyword arguments. The arguments are:
* ``host``
* ``port``
* ``compat``
* ``transport``
* ``protocol``
* In order to make :class:`.Connection` compatible with Cloud Bigtable, we
add a ``cluster`` keyword argument to allow user's to pass in their own
:class:`.Cluster` (which they can construct beforehand).
For example:
.. code:: python
from gcloud_bigtable.client import Client
client = Client(project_id=PROJECT_ID, admin=True)
cluster = client.cluster(zone, cluster_id)
cluster.reload()
from gcloud_bigtable.happybase import Connection
connection = Connection(cluster=cluster)
* Any uses of the ``wal`` (Write Ahead Log) argument will result in a
:class:`ValueError <exceptions.ValueError>` as well. This includes
uses in:
* :class:`.Batch` constructor
* :meth:`.Batch.put`
* :meth:`.Batch.delete`
* :meth:`Table.put() <gcloud_bigtable.happybase.table.Table.put>`
* :meth:`Table.delete() <gcloud_bigtable.happybase.table.Table.delete>`
* :meth:`Table.batch() <gcloud_bigtable.happybase.table.Table.batch>` factory
* When calling :meth:`.Connection.create_table`, the majority of HBase column
family options cannot be used. Among
* ``max_versions``
* ``compression``
* ``in_memory``
* ``bloom_filter_type``
* ``bloom_filter_vector_size``
* ``bloom_filter_nb_hashes``
* ``block_cache_enabled``
* ``time_to_live``
Only ``max_versions`` and ``time_to_live`` are availabe in Cloud Bigtable
(as ``max_num_versions`` and ``max_age``).
In addition to using a dictionary for specifying column family options,
we also accept instances of :class:`.GarbageCollectionRule`,
:class:`.GarbageCollectionRuleUnion` or
:class:`.GarbageCollectionRuleIntersection`.
"""

from gcloud_bigtable.happybase.batch import Batch
Expand Down
3 changes: 3 additions & 0 deletions gcloud_bigtable/happybase/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=None,
if cluster is None:
self._cluster = _get_cluster(timeout=timeout)
else:
if timeout is not None:
raise ValueError('Timeout cannot be used when an existing '
'cluster is passed')
self._cluster = cluster.copy()

if autoconnect:
Expand Down
64 changes: 61 additions & 3 deletions gcloud_bigtable/happybase/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
"""Google Cloud Bigtable HappyBase table module."""


from gcloud_bigtable.column_family import GarbageCollectionRule
from gcloud_bigtable.column_family import GarbageCollectionRuleIntersection
from gcloud_bigtable.table import Table as _LowLevelTable


def make_row(cell_map, include_timestamp):
"""Make a row dict for a Thrift cell mapping.
Expand Down Expand Up @@ -64,6 +69,55 @@ def make_ordered_row(sorted_columns, include_timestamp):
sorted_columns, include_timestamp)


def _gc_rule_to_dict(gc_rule):
"""Converts garbage collection rule to dictionary if possible.
This is in place to support dictionary values was was done
in HappyBase, which has somewhat different garbage collection rule
settings for column families.
Only does this if the garbage collection rule is:
* Simple :class:`.GarbageCollectionRule` with ``max_age``
* Simple :class:`.GarbageCollectionRule` with ``max_num_versions``
* Composite :class:`.GarbageCollectionRuleIntersection` with
two rules each for ``max_age`` and ``max_num_versions``
Otherwise, just returns the input without change.
:type gc_rule: :class:`.GarbageCollectionRule`,
:class:`.GarbageCollectionRuleIntersection`, or
:class:`.GarbageCollectionRuleUnion`
:param gc_rule: A garbae collection rule to convert to a dictionary
(if possible).
:rtype: dict,
:class:`.GarbageCollectionRuleIntersection`, or
:class:`.GarbageCollectionRuleUnion`
:returns: The converted garbage collection rule.
"""
result = gc_rule
if isinstance(gc_rule, GarbageCollectionRule):
result = {}
# We assume that the GC rule has a single value.
if gc_rule.max_num_versions is not None:
result['max_versions'] = gc_rule.max_num_versions
if gc_rule.max_age is not None:
result['time_to_live'] = gc_rule.max_age.total_seconds()
elif isinstance(gc_rule, GarbageCollectionRuleIntersection):
if len(gc_rule.rules) == 2:
rule1, rule2 = gc_rule.rules
if (isinstance(rule1, GarbageCollectionRule) and
isinstance(rule2, GarbageCollectionRule)):
rule1 = _gc_rule_to_dict(rule1)
rule2 = _gc_rule_to_dict(rule2)
key1, = rule1.keys()
key2, = rule2.keys()
if key1 != key2:
result = {key1: rule1[key1], key2: rule2[key2]}
return result


class Table(object):
"""Representation of Cloud Bigtable table.
Expand All @@ -86,10 +140,14 @@ def __repr__(self):
def families(self):
"""Retrieve the column families for this table.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
:rtype: dict
:returns: Mapping from column family name to garbage collection rule
for a column family.
"""
raise NotImplementedError('Temporarily not implemented.')
table = _LowLevelTable(self.name, self.connection._cluster)
column_family_map = table.list_column_families()
return {col_fam: _gc_rule_to_dict(col_fam_obj.gc_rule)
for col_fam, col_fam_obj in column_family_map.items()}

def regions(self):
"""Retrieve the regions for this table.
Expand Down
11 changes: 7 additions & 4 deletions gcloud_bigtable/happybase/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,13 @@ def test_constructor_missing_cluster(self):
mock_get_cluster.check_called(self, [()], [{'timeout': timeout}])

def test_constructor_explicit(self):
timeout = object()
table_prefix = 'table-prefix'
table_prefix_separator = 'sep'
cluster_copy = _Cluster()
cluster = _Cluster(copies=[cluster_copy])

connection = self._makeOne(
autoconnect=False, timeout=timeout,
autoconnect=False,
table_prefix=table_prefix,
table_prefix_separator=table_prefix_separator,
cluster=cluster)
Expand Down Expand Up @@ -232,6 +231,10 @@ def test_constructor_with_protocol(self):
with self.assertRaises(ValueError):
self._makeOne(protocol=object())

def test_constructor_with_timeout_and_cluster(self):
with self.assertRaises(ValueError):
self._makeOne(cluster=object(), timeout=object())

def test_open(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)
Expand Down Expand Up @@ -395,7 +398,7 @@ def test_create_table(self):

# Just one table would have been created.
table_instance, = table_instances
self.assertEqual(table_instance.args, ('table-name', cluster))
self.assertEqual(table_instance.args, (name, cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.create_calls, 1)

Expand Down Expand Up @@ -431,7 +434,7 @@ def test_delete_table(self):

# Just one table would have been created.
table_instance, = instances
self.assertEqual(table_instance.args, ('table-name', cluster))
self.assertEqual(table_instance.args, (name, cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.delete_calls, 1)

Expand Down
8 changes: 3 additions & 5 deletions gcloud_bigtable/happybase/test_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,12 @@ def test_constructor_defaults(self):
self.assertTrue(connection._cluster is cluster_copy)

def test_constructor_passes_kwargs(self):
timeout = 1000
table_prefix = 'foo'
table_prefix_separator = '<>'
cluster = _Cluster() # Avoid implicit environ check.

size = 1
pool = self._makeOne(size, timeout=timeout, table_prefix=table_prefix,
pool = self._makeOne(size, table_prefix=table_prefix,
table_prefix_separator=table_prefix_separator,
cluster=cluster)

Expand Down Expand Up @@ -110,18 +109,17 @@ def test_constructor_infers_cluster(self):
all_copies = [cluster_copy] * size
cluster = _Cluster(copies=all_copies)

timeout = object()
mock_get_cluster = _MockCalled(cluster)
with _Monkey(MUT, _get_cluster=mock_get_cluster):
pool = self._makeOne(size, timeout=timeout)
pool = self._makeOne(size)

for connection in pool._queue.queue:
self.assertTrue(isinstance(connection, Connection))
# We know that the Connection() constructor will
# call cluster.copy().
self.assertTrue(connection._cluster is cluster_copy)

mock_get_cluster.check_called(self, [()], [{'timeout': timeout}])
mock_get_cluster.check_called(self, [()], [{'timeout': None}])

def test_constructor_non_integer_size(self):
size = None
Expand Down

0 comments on commit 572190c

Please sign in to comment.