Skip to content

Commit

Permalink
Merge pull request #142 from dimagi/cz/data-types
Browse files Browse the repository at this point in the history
Add explicit data type support
  • Loading branch information
czue committed Jun 3, 2020
2 parents 9995d75 + 98d5faf commit d377788
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 97 deletions.
3 changes: 2 additions & 1 deletion commcare_export/cli.py
Expand Up @@ -15,6 +15,7 @@
from commcare_export import excel_query
from commcare_export import writers
from commcare_export.checkpoint import CheckpointManagerProvider
from commcare_export.misc import default_to_json
from commcare_export.utils import get_checkpoint_manager
from commcare_export.commcare_hq_client import CommCareHqClient, LATEST_KNOWN_VERSION
from commcare_export.commcare_minilinq import CommCareHqEnv
Expand Down Expand Up @@ -328,7 +329,7 @@ def main_with_args(args):
exit_status = evaluate_query(env, query)

if args.output_format == 'json':
print(json.dumps(list(writer.tables.values()), indent=4, default=RepeatableIterator.to_jvalue))
print(json.dumps(list(writer.tables.values()), indent=4, default=default_to_json))

return exit_status

Expand Down
24 changes: 24 additions & 0 deletions commcare_export/data_types.py
@@ -0,0 +1,24 @@
import sqlalchemy

DATA_TYPE_TEXT = 'text'
DATA_TYPE_BOOLEAN = 'boolean'
DATA_TYPE_DATE = 'date'
DATA_TYPE_DATETIME = 'datetime'
DATA_TYPE_INTEGER = 'integer'

DATA_TYPES_TO_SQLALCHEMY_TYPES = {
DATA_TYPE_BOOLEAN: sqlalchemy.Boolean(),
DATA_TYPE_DATETIME: sqlalchemy.DateTime(),
DATA_TYPE_DATE: sqlalchemy.Date(),
DATA_TYPE_INTEGER: sqlalchemy.Integer(),
}

class UnknownDataType(Exception):
pass


def get_sqlalchemy_type(data_type):
if data_type not in DATA_TYPES_TO_SQLALCHEMY_TYPES:
raise UnknownDataType(data_type)
else:
return DATA_TYPES_TO_SQLALCHEMY_TYPES[data_type]
2 changes: 1 addition & 1 deletion commcare_export/env.py
Expand Up @@ -451,7 +451,7 @@ def lookup(self, key): raise NotFound()

def emit_table(self, table_spec):
self.emitted = True
table_spec['rows'] = self._unwrap_row_vals(table_spec['rows'])
table_spec.rows = self._unwrap_row_vals(table_spec.rows)
self.writer.write_table(table_spec)

def has_emitted_tables(self):
Expand Down
19 changes: 14 additions & 5 deletions commcare_export/excel_query.py
Expand Up @@ -306,13 +306,18 @@ def parse_sheet(worksheet, mappings=None, column_enforcer=None):
else:
output_table_name = worksheet.title
output_headings = get_column_by_name(worksheet, 'field')
output_types = get_column_by_name(worksheet, 'data type') or []
output_fields = compile_fields(worksheet, mappings=mappings)

if not output_fields:
headings = []
data_types = []
source = source_expr
body = None
else:
# note: if we want to add data types to the columns added by the column_enforcer
# this will have to conditionally move into the if/else below
data_types = [Literal(data_type.value) for data_type in output_types]
if column_enforcer is not None:
(headings, body) = require_column_in_sheet(worksheet.title,
data_source,
Expand All @@ -333,12 +338,14 @@ def parse_sheet(worksheet, mappings=None, column_enforcer=None):
source,
body,
root_doc_expr,
data_types,
)


class SheetParts(namedtuple('SheetParts', 'name headings source body root_expr')):
def __new__(cls, name, headings, source, body, root_expr=None):
return super(SheetParts, cls).__new__(cls, name, headings, source, body, root_expr)
class SheetParts(namedtuple('SheetParts', 'name headings source body root_expr data_types')):
def __new__(cls, name, headings, source, body, root_expr=None, data_types=None):
data_types = data_types or []
return super(SheetParts, cls).__new__(cls, name, headings, source, body, root_expr, data_types)

@property
def columns(self):
Expand Down Expand Up @@ -435,7 +442,8 @@ def get_multi_emit_query(source, sheets, missing_value):
source=root_expr,
body=sheet.body
),
missing_value=missing_value
missing_value=missing_value,
data_types=sheet.data_types,
)
)

Expand Down Expand Up @@ -464,7 +472,8 @@ def _get_source(source, root_expr):
source=_get_source(sheet.source, sheet.root_expr),
body=sheet.body
),
missing_value=missing_value
missing_value=missing_value,
data_types=sheet.data_types,
)
return Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal([sheet.name])), emit)

Expand Down
29 changes: 19 additions & 10 deletions commcare_export/minilinq.py
Expand Up @@ -8,6 +8,8 @@

from commcare_export.repeatable_iterator import RepeatableIterator

from commcare_export.specs import TableSpec

logger = logging.getLogger(__name__)

class MiniLinq(object):
Expand Down Expand Up @@ -400,12 +402,13 @@ class Emit(MiniLinq):
are actually lists - it is just crashy instead.
"""

def __init__(self, table, headings, source, missing_value=None):
def __init__(self, table, headings, source, missing_value=None, data_types=None):
"(str, [str], [MiniLinq]) -> MiniLinq"
self.table = table
self.headings = headings
self.source = source
self.missing_value = missing_value
self.data_types = data_types or []

@unwrap('cell')
def coerce_cell_blithely(self, cell):
Expand All @@ -428,31 +431,37 @@ def coerce_row(self, row):

def eval(self, env):
rows = self.source.eval(env)
env.emit_table({'name': self.table,
'headings': [heading.eval(env) for heading in self.headings],
'rows': map(self.coerce_row, rows)})
env.emit_table(TableSpec(
name=self.table,
headings=[heading.eval(env) for heading in self.headings],
rows=list(map(self.coerce_row, rows)),
))

@classmethod
def from_jvalue(cls, jvalue):
fields = jvalue['Emit']

return cls(table = fields['table'],
source = MiniLinq.from_jvalue(fields['source']),
headings = [MiniLinq.from_jvalue(heading) for heading in fields['headings']],
missing_value=fields.get('missing_value'))
return cls(
table=fields['table'],
source=MiniLinq.from_jvalue(fields['source']),
headings=[MiniLinq.from_jvalue(heading) for heading in fields['headings']],
missing_value=fields.get('missing_value'),
data_types=fields.get('data_types'),
)

def to_jvalue(self):
return {'Emit': {'table': self.table,
'headings': [heading.to_jvalue() for heading in self.headings],
'source': self.source.to_jvalue(),
'missing_value': self.missing_value}}
'missing_value': self.missing_value,
'data_types': [heading.to_jvalue() for heading in self.headings]}}

def __eq__(self, other):
return (
isinstance(other, Emit) and self.table == other.table
and self.headings == other.headings
and self.source == other.source
and self.missing_value == other.missing_value
and self.data_types == other.data_types
)

def __repr__(self):
Expand Down
7 changes: 7 additions & 0 deletions commcare_export/misc.py
Expand Up @@ -47,3 +47,10 @@ def unwrap_val(val):
val = val.value

return val


def default_to_json(obj):
if hasattr(obj, 'toJSON'):
return obj.toJSON()
else:
return RepeatableIterator.to_jvalue(obj)
26 changes: 26 additions & 0 deletions commcare_export/specs.py
@@ -0,0 +1,26 @@


class TableSpec:

def __init__(self, name, headings, rows, data_types=None):
self.name = name
self.headings = headings
self.rows = rows
self.data_types = data_types or []

def __eq__(self, other):
return (
isinstance(other, TableSpec)
and other.name == self.name
and other.headings == self.headings
and other.rows == self.rows
and other.data_types == self.data_types
)

def toJSON(self):
return {
'name': self.name,
'headings': self.headings,
'rows': self.rows,
'data_types': self.data_types,
}

0 comments on commit d377788

Please sign in to comment.