From ae1c5a418df84b88c5806a9c28395fb0711c48ad Mon Sep 17 00:00:00 2001 From: zblanco Date: Fri, 1 Jul 2016 15:56:28 -0700 Subject: [PATCH] adding MapDatum --- scripts/generator.py | 46 +++++++++++++++++++++++++++++++++++++-- tests/res/bad_dist.json | 7 ++++++ tests/res/map_gen-01.json | 18 +++++++++++++++ tests/res/map_gen-02.json | 18 +++++++++++++++ tests/test_generator.py | 40 +++++++++++++++++++++++++++++++++- 5 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 tests/res/map_gen-01.json create mode 100644 tests/res/map_gen-02.json diff --git a/scripts/generator.py b/scripts/generator.py index cc75339..57247cc 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -44,28 +44,42 @@ def check_schema(self, schema): datum = IntDatum(field) elif 'decimal' == field_type: datum = DecimalDatum(field) + elif 'map' == field_type: + datum = MapDatum(field) else: raise RuntimeError('Field type was not found. Please change the field type or implement a new datum') datum.check() # Check to make sure the field has necessary attributes -# print("FIELD LENGTH: " + str(len(self.data_fields))) self.data_fields.append(datum) def generate(self): data = {} + maps = [] for datum in self.data_fields: + if datum.type == 'map': + maps.append(datum) + continue # put off mappers until end + val = datum.generate(self.rand) data[datum.field_name] = val + + for mapper in maps: + val = mapper.generate(self.rand, data) + data[mapper.field_name] = val + return data class AbstractDatum(object): def __init__(self, field): - self.field_name = field['fieldName'] self.field = field self.check_for_key('fieldName') + self.check_for_key('type') + self.field_name = field['fieldName'] + self.type = field['type'] + def check_for_key(self, key_name): if not key_name in self.field: @@ -183,8 +197,36 @@ def __init__(self, field): def generate(self, rand): return int(round(NumberDatum.generate(self, rand))) + +class MapDatum(AbstractDatum): + def __init__(self, field): + AbstractDatum.__init__(self, field) + self.field = field + self.check() + def check(self): + self.check_for_key('map') + self.check_for_key('mapFromField') + if not type(self.field['map']) == dict: + raise ValueError('Expected map key to be a dict object') + if not (type(self.field['mapFromField']) == str or type(self.field['mapFromField']) == unicode): + raise ValueError('Expected mapFromField key to be a dict object') + + self.maps = self.field['map'] + self.map_from = str(self.field['mapFromField']) + + def generate(self, rand, data): + + if not self.map_from in data: + raise ValueError('Could not get key: ' + self.map_from + ' in data') + + key = data[self.map_from] # Get data from the map_from field + + try: + return self.maps[key] # Get the mapped value from the given key + except KeyError as e: + return '' diff --git a/tests/res/bad_dist.json b/tests/res/bad_dist.json index 500d5df..5b5820e 100644 --- a/tests/res/bad_dist.json +++ b/tests/res/bad_dist.json @@ -1,4 +1,11 @@ [ + { + "fieldName": "field1", + "type": "int", + "distribution": "gauss", + "mu": 50, + "sigma": 10 + }, { "fieldName": "field1", "type": "int", diff --git a/tests/res/map_gen-01.json b/tests/res/map_gen-01.json new file mode 100644 index 0000000..12baf29 --- /dev/null +++ b/tests/res/map_gen-01.json @@ -0,0 +1,18 @@ +[ + { + "fieldName": "field1", + "type": "string", + "values": ["a", "b", "c", "d", "y"] + }, + { + "fieldName": "field2", + "type": "map", + "mapFromField": "field1", + "map": { + "a": "vowel", + "b": "consonant", + "c": "consonant", + "d": "consonant" + } + } +] \ No newline at end of file diff --git a/tests/res/map_gen-02.json b/tests/res/map_gen-02.json new file mode 100644 index 0000000..7988c51 --- /dev/null +++ b/tests/res/map_gen-02.json @@ -0,0 +1,18 @@ +[ + { + "fieldName": "field1", + "type": "string", + "values": ["a", "b", "c", "d", "y"] + }, + { + "fieldName": "field2", + "type": "map", + "mapFromField": "field3", + "map": { + "a": "vowel", + "b": "consonant", + "c": "consonant", + "d": "consonant" + } + } +] \ No newline at end of file diff --git a/tests/test_generator.py b/tests/test_generator.py index 8f2b341..4ba08cb 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -3,6 +3,7 @@ from mock import Mock from scripts.generator import DataGenerator from scripts.generator import AbstractDatum +from scripts.generator import MapDatum class TestDataGenerator(unittest.TestCase): @@ -136,7 +137,44 @@ def test_bad_distribution(self, mock1): assert 'Distribution can only be one of: uniform, exponential, gaussian, or gamma' in str(e) - + @mock.patch('scripts.config.get_conf_dir', return_value='res/') + def test_map_gen_good(self, mock1): + try: + gen = DataGenerator('map_gen-01.json') + try: + for i in range(50): + data = gen.generate() + if data['field1'] == 'y': + assert data['field2'] == '' + elif data['field1'] == 'a': + assert data['field2'] == 'vowel' + else: + assert data['field2'] == 'consonant' + except ValueError as e: + print e + pass + except ValueError as e: + print e + pass + + @mock.patch('scripts.config.get_conf_dir', return_value='res/') + def test_map_datum_bad(self, mock1): + missing_fields= {'fieldName': '1111'} + missing_fields['type'] = 'map' + missing_fields['map'] = '' + missing_fields['mapFromField'] = 1 + try: + md = MapDatum(missing_fields) + except ValueError as e: + print str(e) + assert 'Expected map key to be a dict object' in str(e) + finally: + missing_fields['map'] = {} + try: + md = MapDatum(missing_fields) + except ValueError as e: + print str(e) + assert 'Expected mapFromField key to be a dict object' in str(e)