Skip to content

Commit

Permalink
Implementing HappyBase Connection.create_table().
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Sep 5, 2015
1 parent 7249de5 commit c783444
Show file tree
Hide file tree
Showing 2 changed files with 254 additions and 21 deletions.
106 changes: 102 additions & 4 deletions gcloud_bigtable/happybase/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
"""Google Cloud Bigtable HappyBase connection module."""


import datetime
import six

from gcloud_bigtable.client import Client
from gcloud_bigtable.column_family import GarbageCollectionRule
from gcloud_bigtable.column_family import GarbageCollectionRuleIntersection
from gcloud_bigtable.happybase.table import Table
from gcloud_bigtable.table import Table as _LowLevelTable

Expand Down Expand Up @@ -72,6 +75,56 @@ def _get_cluster(timeout=None):
return clusters[0]


def _parse_family_option(option):
"""Parses a column family option into a garbage collection rule.
.. note::
If ``option`` is not a dictionary, the type is not checked.
:type option: :class:`dict`,
:class:`.GarbageCollectionRule`,
:class:`.GarbageCollectionRuleUnion`,
:class:`.GarbageCollectionRuleIntersection`
:param option: A column family option passes as a dictionary value in
:meth:`Connection.create_table`.
:rtype: :class:`.GarbageCollectionRule`,
:class:`.GarbageCollectionRuleUnion`,
:class:`.GarbageCollectionRuleIntersection`
:returns: A garbage collection rule parsed from the input.
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
dictionary but keys other than ``max_versions`` and
``time_to_live`` are used.
"""
result = option
if isinstance(result, dict):
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
raise ValueError('Cloud Bigtable only supports max_versions and '
'time_to_live column family settings',
'Received', result.keys())

max_num_versions = result.get('max_versions')
max_age = None
if 'time_to_live' in result:
max_age = datetime.timedelta(seconds=result['time_to_live'])

if len(result) == 0:
result = None
elif len(result) == 1:
if max_num_versions is None:
result = GarbageCollectionRule(max_age=max_age)
else:
result = GarbageCollectionRule(
max_num_versions=max_num_versions)
else: # By our check above we know this means len(result) == 2.
rule1 = GarbageCollectionRule(max_age=max_age)
rule2 = GarbageCollectionRule(max_num_versions=max_num_versions)
result = GarbageCollectionRuleIntersection(rules=[rule1, rule2])

return result


class Connection(object):
"""Connection to Cloud Bigtable backend.
Expand Down Expand Up @@ -267,16 +320,61 @@ def tables(self):
def create_table(self, name, families):
"""Create a table.
.. warning::
The only column family options from HappyBase that are able to be
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
.. note::
This method is **not** atomic. The Cloud Bigtable API separates
the creation of a table from the creation of column families. Thus
this method needs to send 1 request for the table creation and 1
request for each column family. If any of these fails, the method
will fail, but the progress made towards completion cannot be
rolled back.
Values in ``families`` represent column family options. In HappyBase,
these are dictionaries, corresponding to the ``ColumnDescriptor``
structure in the Thrift API. The accepted keys are:
* ``max_versions`` (``int``)
* ``compression`` (``str``)
* ``in_memory`` (``bool``)
* ``bloom_filter_type`` (``str``)
* ``bloom_filter_vector_size`` (``int``)
* ``bloom_filter_nb_hashes`` (``int``)
* ``block_cache_enabled`` (``bool``)
* ``time_to_live`` (``int``)
:type name: str
:param name: The name of the table to be created.
:type families: dict
:param families: The name and options for each column family.
:param families: Dictionary with column family names as keys and column
family options as the values. The options can be among
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
temporarily until the method is implemented.
* :class:`dict`
* :class:`.GarbageCollectionRule`
* :class:`.GarbageCollectionRuleUnion`
* :class:`.GarbageCollectionRuleIntersection`
"""
raise NotImplementedError('Temporarily not implemented.')
# Parse all keys before making any API requests.
gc_rule_dict = {}
for column_family_name, option in families.items():
if column_family_name.endswith(':'):
column_family_name = column_family_name[:-1]
gc_rule_dict[column_family_name] = _parse_family_option(option)

# Create table instance and then make API calls.
name = self._table_name(name)
low_level_table = _LowLevelTable(name, self._cluster)
low_level_table.create()

for column_family_name, gc_rule in gc_rule_dict.items():
column_family = low_level_table.column_family(
column_family_name, gc_rule=gc_rule)
column_family.create()

def delete_table(self, name, disable=False):
"""Delete the specified table.
Expand Down
169 changes: 152 additions & 17 deletions gcloud_bigtable/happybase/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,74 @@ def test_with_failed_zones(self):
failed_zones=[failed_zone])


class Test__parse_family_option(unittest2.TestCase):

def _callFUT(self, option):
from gcloud_bigtable.happybase.connection import _parse_family_option
return _parse_family_option(option)

def test_dictionary_no_keys(self):
option = {}
result = self._callFUT(option)
self.assertEqual(result, None)

def test_dictionary_bad_key(self):
option = {'badkey': None}
with self.assertRaises(ValueError):
self._callFUT(option)

def test_dictionary_versions_key(self):
from gcloud_bigtable.column_family import GarbageCollectionRule

versions = 42
option = {'max_versions': versions}
result = self._callFUT(option)

gc_rule = GarbageCollectionRule(max_num_versions=versions)
self.assertEqual(result, gc_rule)

def test_dictionary_ttl_key(self):
import datetime
from gcloud_bigtable.column_family import GarbageCollectionRule

time_to_live = 24 * 60 * 60
max_age = datetime.timedelta(days=1)
option = {'time_to_live': time_to_live}
result = self._callFUT(option)

gc_rule = GarbageCollectionRule(max_age=max_age)
self.assertEqual(result, gc_rule)

def test_dictionary_both_keys(self):
import datetime
from gcloud_bigtable.column_family import GarbageCollectionRule
from gcloud_bigtable.column_family import (
GarbageCollectionRuleIntersection)

versions = 42
time_to_live = 24 * 60 * 60
option = {
'max_versions': versions,
'time_to_live': time_to_live,
}
result = self._callFUT(option)

max_age = datetime.timedelta(days=1)
# NOTE: This relies on the order of the rules in the method we are
# calling matching this order here.
gc_rule1 = GarbageCollectionRule(max_age=max_age)
gc_rule2 = GarbageCollectionRule(max_num_versions=versions)
gc_rule = GarbageCollectionRuleIntersection(
rules=[gc_rule1, gc_rule2])
self.assertEqual(result, gc_rule)

def test_non_dictionary(self):
option = object()
self.assertFalse(isinstance(option, dict))
result = self._callFUT(option)
self.assertEqual(result, option)


class TestConnection(unittest2.TestCase):

def _getTargetClass(self):
Expand Down Expand Up @@ -297,40 +365,72 @@ def test_tables_with_prefix(self):
self.assertEqual(result, [unprefixed_table_name1])

def test_create_table(self):
import operator
from gcloud_bigtable._testing import _MockCalled
from gcloud_bigtable._testing import _Monkey
from gcloud_bigtable.happybase import connection as MUT

cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)
mock_gc_rule = object()
mock_parse_family_option = _MockCalled(mock_gc_rule)

name = 'table-name'
families = {}
with self.assertRaises(NotImplementedError):
col_fam1 = 'cf1'
col_fam_option1 = object()
col_fam2 = 'cf2'
col_fam_option2 = object()
families = {
col_fam1: col_fam_option1,
# A trailing colon is also allowed.
col_fam2 + ':': col_fam_option2,
}
table_instances = []
col_fam_instances = []
with _Monkey(MUT, _LowLevelTable=_MockLowLevelTable,
_parse_family_option=mock_parse_family_option):
_MockLowLevelTable._instances = table_instances
_MockLowLevelColumnFamily._instances = col_fam_instances
connection.create_table(name, families)

# Just one table would have been created.
table_instance, = table_instances
self.assertEqual(table_instance.args, ('table-name', cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.create_calls, 1)

# Check if our mock was called twice, but we don't know the order.
mock_called = mock_parse_family_option.called_args
self.assertEqual(len(mock_called), 2)
self.assertEqual(map(len, mock_called), [1, 1])
self.assertEqual(set(mock_called[0] + mock_called[1]),
set([col_fam_option1, col_fam_option2]))

# We expect two column family instances created, but don't know the
# order due to non-deterministic dict.items().
col_fam_instances.sort(key=operator.attrgetter('column_family_id'))
self.assertEqual(col_fam_instances[0].column_family_id, col_fam1)
self.assertEqual(col_fam_instances[0].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_instances[0].create_calls, 1)
self.assertEqual(col_fam_instances[1].column_family_id, col_fam2)
self.assertEqual(col_fam_instances[1].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_instances[1].create_calls, 1)

def test_delete_table(self):
from gcloud_bigtable._testing import _Monkey
from gcloud_bigtable.happybase import connection as MUT

cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)

class MockLowLevelTable(object):

_instances = []

def __init__(self, *args, **kwargs):
self._instances.append(self)
self.args = args
self.kwargs = kwargs
self.delete_calls = 0

def delete(self):
self.delete_calls += 1

name = 'table-name'
with _Monkey(MUT, _LowLevelTable=MockLowLevelTable):
instances = []
with _Monkey(MUT, _LowLevelTable=_MockLowLevelTable):
_MockLowLevelTable._instances = instances
connection.delete_table(name)

# Just one table would have been created.
table_instance, = MockLowLevelTable._instances
table_instance, = instances
self.assertEqual(table_instance.args, ('table-name', cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.delete_calls, 1)
Expand Down Expand Up @@ -416,3 +516,38 @@ def copy(self):

def list_tables(self):
return self.list_tables_result


class _MockLowLevelTable(object):

_instances = []

def __init__(self, *args, **kwargs):
self._instances.append(self)
self.args = args
self.kwargs = kwargs
self.delete_calls = 0
self.create_calls = 0

def delete(self):
self.delete_calls += 1

def create(self):
self.create_calls += 1

def column_family(self, column_family_id, gc_rule=None):
return _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)


class _MockLowLevelColumnFamily(object):

_instances = []

def __init__(self, column_family_id, gc_rule=None):
self._instances.append(self)
self.column_family_id = column_family_id
self.gc_rule = gc_rule
self.create_calls = 0

def create(self):
self.create_calls += 1

0 comments on commit c783444

Please sign in to comment.