From 56035ef983e15b6b9405d297e1c94a85ea940005 Mon Sep 17 00:00:00 2001 From: Delena Malan Date: Mon, 24 Jun 2019 09:04:04 +0200 Subject: [PATCH] Fix KeyError "field" for reordered columns and don't return non-matching-header when it's actually a missing-header (#298) * Don't throw non-matching-header when it's actually missing-header * Fix field error * Update spec --- goodtables/checks/extra_header.py | 11 +++++++-- goodtables/checks/missing_header.py | 9 ++++++- goodtables/checks/non_matching_header.py | 24 ++++++++++++++++--- tests/checks/test_non_matching_header.py | 15 +++++++----- tests/scenarios/missing_headers.yml | 20 ++++++++++++++++ ...s_with_missing_header_and_extra_header.yml | 17 +++++++++++++ 6 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 tests/scenarios/missing_headers.yml create mode 100644 tests/scenarios/reordered_headers_with_missing_header_and_extra_header.yml diff --git a/goodtables/checks/extra_header.py b/goodtables/checks/extra_header.py index 384c8a8..9cabe56 100644 --- a/goodtables/checks/extra_header.py +++ b/goodtables/checks/extra_header.py @@ -25,7 +25,7 @@ def check_headers(self, cells, sample): for cell in copy(cells): # Skip if cell has field - if 'field' in cell: + if 'field' in cell and cell['field'] is not None: continue # Infer field @@ -42,7 +42,14 @@ def check_headers(self, cells, sample): # Add error/remove column else: - error = Error('extra-header', cell) + message_substitutions = { + 'header': '"{}"'.format(cell.get('header')), + } + error = Error( + 'extra-header', + cell, + message_substitutions=message_substitutions + ) errors.append(error) cells.remove(cell) diff --git a/goodtables/checks/missing_header.py b/goodtables/checks/missing_header.py index 9301c05..a831e20 100644 --- a/goodtables/checks/missing_header.py +++ b/goodtables/checks/missing_header.py @@ -22,7 +22,14 @@ def missing_header(cells, sample): continue # Add error - error = Error('missing-header', cell) + message_substitutions = { + 'field_name': '"{}"'.format(cell['field'].name), + } + error = Error( + 'missing-header', + cell, + message_substitutions=message_substitutions + ) errors.append(error) # Remove cell diff --git a/goodtables/checks/non_matching_header.py b/goodtables/checks/non_matching_header.py index d495c06..516cb13 100644 --- a/goodtables/checks/non_matching_header.py +++ b/goodtables/checks/non_matching_header.py @@ -8,6 +8,7 @@ from copy import copy from ..registry import check from ..error import Error +from ..cells import create_cell # Module API @@ -33,6 +34,7 @@ def check_headers(self, cells, sample=None): def _check_with_ordering(cells): # Update fields order to maximally match headers order headers = [cell.get('header') for cell in cells] + new_headers = [] for index, header in enumerate(headers): if header is None: continue @@ -49,6 +51,21 @@ def _check_with_ordering(cells): # Given field matches some header also swap if _slugify(field_name) == _slugify(cell.get('header')): _swap_fields(cells[index], cell) + field_name = _get_field_name(cells[index]) + + field_name = _get_field_name(cells[index]) + if _slugify(header) != _slugify(field_name): + # Cell header and field_name are still different + # which means there is no matching field for the header. + # Add a new cell with an empty header so that the missing-header + # error will be thrown later. + new_cell = create_cell(header=None, field=cell.get('field')) + new_headers.append(new_cell) + # Change the current cell's field to None + cell['field'] = None + + # Add the new headers + cells.extend(new_headers) # Run check with no field ordering return _check_without_ordering(cells) @@ -60,10 +77,11 @@ def _check_without_ordering(cells): for cell in copy(cells): if cell.get('field') is not None: header = cell.get('header') - if header != cell['field'].name: + if header != cell['field'].name and header is not None: # Add error message_substitutions = { 'field_name': '"{}"'.format(cell['field'].name), + 'header': '"{}"'.format(cell.get('header')), } error = Error( 'non-matching-header', @@ -92,6 +110,6 @@ def _slugify(string): def _swap_fields(cell1, cell2): - field1 = cell1['field'] - cell1['field'] = cell2['field'] + field1 = cell1.get('field', None) + cell1['field'] = cell2.get('field', None) cell2['field'] = field1 diff --git a/tests/checks/test_non_matching_header.py b/tests/checks/test_non_matching_header.py index 85d9ed8..d726b63 100644 --- a/tests/checks/test_non_matching_header.py +++ b/tests/checks/test_non_matching_header.py @@ -47,7 +47,9 @@ def test_check_non_matching_header_order_fields(log): non_matching_header = NonMatchingHeader(order_fields=True) errors = non_matching_header.check_headers(cells) assert log(errors) == [] - assert len(cells) == 3 + # A new header cell will be added by the non-matching-header check because + # there is no field for the "name3" header + assert len(cells) == 4 def test_check_non_matching_header_order_fields_problem(log): @@ -61,16 +63,17 @@ def test_check_non_matching_header_order_fields_problem(log): assert log(errors) == [ (None, 2, 'non-matching-header'), ] - assert len(cells) == 2 + # New header cells will be added by the non-matching-header check because + # there are no fields for the "name2" and "name3" headers + assert len(cells) == 4 def test_check_non_matching_header_with_empty_header_name(log): + # When the header is None, it means there's a missing header. cells = [ goodtables.cells.create_cell(None, field=Field({'name': 'name3'}), column_number=1), ] non_matching_header = NonMatchingHeader(order_fields=True) errors = non_matching_header.check_headers(cells) - assert log(errors) == [ - (None, 1, 'non-matching-header'), - ] - assert len(cells) == 0 + assert log(errors) == [] + assert len(cells) == 1 diff --git a/tests/scenarios/missing_headers.yml b/tests/scenarios/missing_headers.yml new file mode 100644 index 0000000..907c527 --- /dev/null +++ b/tests/scenarios/missing_headers.yml @@ -0,0 +1,20 @@ +primary_key_field: + source: + - [id, last_name, first_name, language] + - [1, Alex, John, English] + - [2, Peters, John, Afrikaans] + - [3, Smith, Paul, Zulu] + schema: + fields: + - {name: id, type: number} + - {name: first_name} + - {name: last_name} + - {name: age} + - {name: country} + primaryKey: id + checks: + - schema + - structure + order_fields: true + report: + - [1, null, 5, 'missing-header'] diff --git a/tests/scenarios/reordered_headers_with_missing_header_and_extra_header.yml b/tests/scenarios/reordered_headers_with_missing_header_and_extra_header.yml new file mode 100644 index 0000000..e19dd65 --- /dev/null +++ b/tests/scenarios/reordered_headers_with_missing_header_and_extra_header.yml @@ -0,0 +1,17 @@ +primary_key_field: + source: + - [LastName, FirstName, Address] + - [Test, Tester, 23 Avenue] + schema: + fields: + - {name: id} + - {name: FirstName} + - {name: LastName} + primaryKey: id + checks: + - schema + - structure + order_fields: true + report: + - [1, null, null, 'missing-header'] + - [1, null, 3, 'extra-header']