Skip to content
This repository has been archived by the owner on Jan 14, 2020. It is now read-only.

Commit

Permalink
first run of generator with string datum implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacBlanco committed Jul 1, 2016
1 parent 442e409 commit 8501dd5
Show file tree
Hide file tree
Showing 8 changed files with 402 additions and 0 deletions.
38 changes: 38 additions & 0 deletions conf/generator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[
{
"fieldName": "string_data",
"type": "string",
"values": {
"a": 0.1,
"b": 0.2
},
"valuesA": ["a", "b", "c", "d"]
},
{
"fieldName": "int_data",
"type": "int",
"distribution": "uniform"
},
{
"fieldName": "float_data",
"type": "float",
"valuesA": ["a", "b", "c", "d"],
"distribution": "gaussian"
},
{
"fieldName": "boolean_data",
"type": "boolean",
"values": {
"a": 0.1,
"b": 0.2
}
},
{
"fieldName": "map_data",
"type": "map",
"values": {
"a": 'c',
"b": 'd'
}
}
]
133 changes: 133 additions & 0 deletions scripts/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import json, config, random
from abc import abstractmethod


class DataGenerator():

def __init__(self, schema, seed=''):
self.data_fields = []
self.schema = schema
if not seed == '':
random.seed(seed)
self.check_schema(schema)

# Returns true/false whether or not the schema is valid
# Raises an exception?
def check_schema(self, schema):
path = config.get_conf_dir() + schema

with open(path) as data_file:
conf = json.load(data_file)

if not type(conf) == list:
raise TypeError('Root of JSON Schema is not a list')

for field in conf:
if not 'fieldName' in field:
raise KeyError('Could not find \'fieldName\' in field of schema: ' + schema )

if not 'type' in field:
raise KeyError('Could not find \'type\' in field of schema: ' + schema)


field_type = field['type']
datum = AbstractDatum(field)
if 'string' == field_type:
datum = StringDatum(field)
else:
raise RuntimeError('Field type was not found. Please change the field type or implement the datum correctly')

datum.check() # Check to make sure the field has necessary attributes
# print("FIELD LENGTH: " + str(len(self.data_fields)))
self.data_fields.append(datum)

def generate(self):
data = []
for field in self.data_fields:
val = field.generate(random)
data.append(val)
return data


class AbstractDatum(object):

def __init__(self, field):
self.field_name = field['fieldName']
self.field = field
self.check_for_key('fieldName')

def check_for_key(self, key_name):
if not key_name in self.field:
raise KeyError('Missing key: ' + key_name + ' in ' + self.field_name)
else:
return True

# A method to determine whether or not the schema object has the necessary fields.
@abstractmethod
def check(self):
raise NotImplementedError('AbstractDatum: This method should have been implemented by a sublcass')

@abstractmethod
def generate(self, rand):
raise NotImplementedError('AbstractDatum This method should have been implemented by a sublcass')

class StringDatum(AbstractDatum):
values = []
def __init__(self, field):
AbstractDatum.__init__(self, field)
#calculate CDF if necessary
self.values = [] # list will be sorted by cumulative probability
if type(self.field['values']) == dict:
csum = 0
for key in self.field['values']:
prob = self.field['values'][key]
csum += prob
entry = {}
entry['key'] = key
entry['prob'] = csum
self.values.append(entry)

def check(self):
self.check_for_key('type')
assert (self.field['type'] == 'string')
val_type = type(self.field['values'])
assert (val_type == list or val_type == dict)

def generate(self, rand):
if type(self.field['values']) == list:
num_items = len(self.field['values'])
index = rand.randint(0, num_items - 1)
return self.field['values'][index]
elif type(self.field['values']) == dict:
val = random.random()
for i in range(len(self.values)):
if val < self.values[i]['prob']:
return self.values[i]['key']




























31 changes: 31 additions & 0 deletions tests/res/char_gen_bad-01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[
{
"type": "string",
"values": ["a", "b", "c", "d"]
},
{
"fieldName": "field2",
"type": "string",
"values": ["e", "f", "g", "h"]
},
{
"fieldName": "field3",
"type": "string",
"values": {
"i": 0.5,
"j": 0.2,
"k": 0.2,
"l": 0.1
}
},
{
"fieldName": "field4",
"type": "string",
"values": {
"m": 0.2,
"n": 0.7,
"o": 0.05,
"p": 0.05
}
}
]
31 changes: 31 additions & 0 deletions tests/res/char_gen_bad-02.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[
{
"fieldName": "field2",
"type": "string",
"values": ["a", "b", "c", "d"]
},
{
"fieldName": "field2",
"values": ["e", "f", "g", "h"]
},
{
"fieldName": "field3",
"type": "string",
"values": {
"i": 0.5,
"j": 0.2,
"k": 0.2,
"l": 0.1
}
},
{
"fieldName": "field4",
"type": "string",
"values": {
"m": 0.2,
"n": 0.7,
"o": 0.05,
"p": 0.05
}
}
]
34 changes: 34 additions & 0 deletions tests/res/char_gen_bad-03.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"schema" : [
{
"fieldName": "field2",
"type": "string",
"values": ["a", "b", "c", "d"]
},
{
"fieldName": "field2",
"type": "string",
"values": ["e", "f", "g", "h"]
},
{
"fieldName": "field3",
"type": "string",
"values": {
"i": 0.5,
"j": 0.2,
"k": 0.2,
"l": 0.1
}
},
{
"fieldName": "field4",
"type": "string",
"values": {
"m": 0.2,
"n": 0.7,
"o": 0.05,
"p": 0.05
}
}
]
}
7 changes: 7 additions & 0 deletions tests/res/char_gen_bad-04.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
{
"fieldName": "field2",
"type": "asdaf",
"values": ["a", "b", "c", "d"]
}
]
32 changes: 32 additions & 0 deletions tests/res/char_gen_good.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[
{
"fieldName": "field1",
"type": "string",
"values": ["a", "b", "c", "d"]
},
{
"fieldName": "field2",
"type": "string",
"values": ["e", "f", "g", "h"]
},
{
"fieldName": "field3",
"type": "string",
"values": {
"i": 0.5,
"j": 0.2,
"k": 0.2,
"l": 0.1
}
},
{
"fieldName": "field4",
"type": "string",
"values": {
"m": 0.2,
"n": 0.7,
"o": 0.05,
"p": 0.05
}
}
]
Loading

0 comments on commit 8501dd5

Please sign in to comment.