Skip to content

Commit

Permalink
Merge pull request #48 from dimagi/missing-value
Browse files Browse the repository at this point in the history
allow specifying missing value replacement
  • Loading branch information
biyeun committed Nov 28, 2016
2 parents c49d349 + 981f0d9 commit a9659e2
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 20 deletions.
3 changes: 2 additions & 1 deletion commcare_export/cli.py
Expand Up @@ -53,6 +53,7 @@ def main(argv):
parser.add_argument('--output-format', default='json', choices=['json', 'csv', 'xls', 'xlsx', 'sql', 'markdown'], help='Output format')
parser.add_argument('--output', metavar='PATH', default='reports.zip', help='Path to output; defaults to `reports.zip`.')
parser.add_argument('--strict-types', default=False, action='store_true', help="When saving to a SQL database don't allow changing column types once they are created.")
parser.add_argument('--missing-value', default='', help="Value to use when a field is missing from the form / case.")

args = parser.parse_args(argv)

Expand Down Expand Up @@ -102,7 +103,7 @@ def main_with_args(args):
if os.path.splitext(args.query)[1] in ['.xls', '.xlsx']:
import openpyxl
workbook = openpyxl.load_workbook(args.query)
query = excel_query.compile_workbook(workbook)
query = excel_query.compile_workbook(workbook, args.missing_value)
else:
with io.open(args.query, encoding='utf-8') as fh:
query = MiniLinq.from_jvalue(json.loads(fh.read()))
Expand Down
15 changes: 9 additions & 6 deletions commcare_export/excel_query.py
Expand Up @@ -177,7 +177,7 @@ def compile_source(worksheet):
return FlatMap(source=api_query,
body=Reference(str(data_source_jsonpath)))

def compile_sheet(worksheet, mappings=None):
def compile_sheet(worksheet, mappings=None, missing_value=None):
mappings = mappings or {}
source_expr = compile_source(worksheet)

Expand All @@ -192,11 +192,14 @@ def compile_sheet(worksheet, mappings=None):
headings = [Literal(output_heading.value) for output_heading in output_headings]
source = Map(source=source_expr, body=List(output_fields))

return Emit(table = output_table_name,
headings = headings,
source = source)
return Emit(
table=output_table_name,
headings=headings,
source=source,
missing_value=missing_value
)

def compile_workbook(workbook):
def compile_workbook(workbook, missing_value=None):
"""
Returns a MiniLinq corresponding to the Excel configuration, which
consists of the following sheets:
Expand All @@ -216,7 +219,7 @@ def compile_workbook(workbook):
emit_sheets = [sheet_name for sheet_name in workbook.get_sheet_names() if sheet_name != 'Mappings']

for sheet in emit_sheets:
queries.append(compile_sheet(workbook.get_sheet_by_name(sheet), mappings))
queries.append(compile_sheet(workbook.get_sheet_by_name(sheet), mappings, missing_value))

return List(queries) # Moderate hack

Expand Down
22 changes: 17 additions & 5 deletions commcare_export/minilinq.py
Expand Up @@ -375,15 +375,18 @@ class Emit(MiniLinq):
are actually lists - it is just crashy instead.
"""

def __init__(self, table, headings, source):
def __init__(self, table, headings, source, missing_value=None):
"(str, [str], [MiniLinq]) -> MiniLinq"
self.table = table
self.headings = headings
self.source = source
self.missing_value = missing_value or ''

@unwrap('cell')
def coerce_cell_blithely(self, cell):
if isinstance(cell, list):
if not cell: # jsonpath returns empty list when path is not present
return self.missing_value
return ','.join([self.coerce_cell(item) for item in cell])
else:
return cell
Expand Down Expand Up @@ -411,18 +414,27 @@ def from_jvalue(cls, jvalue):

return cls(table = fields['table'],
source = MiniLinq.from_jvalue(fields['source']),
headings = [MiniLinq.from_jvalue(heading) for heading in fields['headings']])
headings = [MiniLinq.from_jvalue(heading) for heading in fields['headings']],
missing_value=fields.get('missing_value'))

def to_jvalue(self):
return {'Emit': {'table': self.table,
'headings': [heading.to_jvalue() for heading in self.headings],
'source': self.source.to_jvalue()}}
'source': self.source.to_jvalue(),
'missing_value': self.missing_value}}

def __eq__(self, other):
return isinstance(other, Emit) and self.table == other.table and self.headings == other.headings and self.source == other.source
return (
isinstance(other, Emit) and self.table == other.table
and self.headings == other.headings
and self.source == other.source
and self.missing_value == other.missing_value
)

def __repr__(self):
return '%s(table=%r, headings=%r, source=%r)' % (self.__class__.__name__, self.table, self.headings, self.source)
return '%s(table=%r, headings=%r, source=%r, missing_value=%r)' % (
self.__class__.__name__, self.table, self.headings, self.source, self.missing_value
)

### Register everything with the root parser ###

Expand Down
9 changes: 5 additions & 4 deletions tests/test_excel_query.py
Expand Up @@ -136,8 +136,8 @@ def test_compile_workbook(self):
test_cases = [
('004_TwoDataSources.xlsx',
List([
Emit(table='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form"))),
Emit(table='Cases', headings=[], source=Apply(Reference("api_data"), Literal("case")))
Emit(table='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form")), missing_value='---'),
Emit(table='Cases', headings=[], source=Apply(Reference("api_data"), Literal("case")), missing_value='---')
])),
('007_Mappings.xlsx',
List([Emit(table='Forms',
Expand All @@ -147,14 +147,15 @@ def test_compile_workbook(self):
source=Map(source=Apply(Reference("api_data"), Literal("form")),
body=List([
compile_mapped_field(field_mappings, Reference("type"))
])))])),
])),
missing_value='---')])),

]

for filename, minilinq in test_cases:
print('Compiling workbook %s' % filename) # This output will be captured by pytest and printed in case of failure; helpful to isolate which test case
abs_path = os.path.join(os.path.dirname(__file__), filename)
compiled = compile_workbook(openpyxl.load_workbook(abs_path))
compiled = compile_workbook(openpyxl.load_workbook(abs_path), missing_value='---')
# Print will be suppressed by pytest unless it fails
if not (compiled == minilinq):
print('In %s:' % filename)
Expand Down
9 changes: 5 additions & 4 deletions tests/test_minilinq.py
Expand Up @@ -113,14 +113,15 @@ def test_flatmap(self):
pass

def test_emit(self):
env = BuiltInEnv() | JsonPathEnv({'foo': {'baz': 3, 'bar': True}})
env = BuiltInEnv() | JsonPathEnv({'foo': {'baz': 3, 'bar': True, 'boo': None}})
Emit(table='Foo',
headings=[Literal('foo')],
source=List([
List([ Reference('foo.baz'), Reference('foo.bar') ])
])).eval(env)
List([ Reference('foo.baz'), Reference('foo.bar'), Reference('foo.foo'), Reference('foo.boo') ])
]),
missing_value='---').eval(env)

assert list(list(env.emitted_tables())[0]['rows']) == [[3, True]]
assert list(list(env.emitted_tables())[0]['rows']) == [[3, True, '---', None]]

def test_from_jvalue(self):
assert MiniLinq.from_jvalue({"Ref": "form.log_subreport"}) == Reference("form.log_subreport")
Expand Down

0 comments on commit a9659e2

Please sign in to comment.