Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Fix KeyError "field" for reordered columns and don't return non-match…
Browse files Browse the repository at this point in the history
…ing-header when it's actually a missing-header (#298)

* Don't throw non-matching-header when it's actually missing-header

* Fix field error

* Update spec
  • Loading branch information
delenamalan authored and roll committed Jun 24, 2019
1 parent 00537be commit 56035ef
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 12 deletions.
11 changes: 9 additions & 2 deletions goodtables/checks/extra_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_headers(self, cells, sample):
for cell in copy(cells):

# Skip if cell has field
if 'field' in cell:
if 'field' in cell and cell['field'] is not None:
continue

# Infer field
Expand All @@ -42,7 +42,14 @@ def check_headers(self, cells, sample):

# Add error/remove column
else:
error = Error('extra-header', cell)
message_substitutions = {
'header': '"{}"'.format(cell.get('header')),
}
error = Error(
'extra-header',
cell,
message_substitutions=message_substitutions
)
errors.append(error)
cells.remove(cell)

Expand Down
9 changes: 8 additions & 1 deletion goodtables/checks/missing_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@ def missing_header(cells, sample):
continue

# Add error
error = Error('missing-header', cell)
message_substitutions = {
'field_name': '"{}"'.format(cell['field'].name),
}
error = Error(
'missing-header',
cell,
message_substitutions=message_substitutions
)
errors.append(error)

# Remove cell
Expand Down
24 changes: 21 additions & 3 deletions goodtables/checks/non_matching_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from copy import copy
from ..registry import check
from ..error import Error
from ..cells import create_cell


# Module API
Expand All @@ -33,6 +34,7 @@ def check_headers(self, cells, sample=None):
def _check_with_ordering(cells):
# Update fields order to maximally match headers order
headers = [cell.get('header') for cell in cells]
new_headers = []
for index, header in enumerate(headers):
if header is None:
continue
Expand All @@ -49,6 +51,21 @@ def _check_with_ordering(cells):
# Given field matches some header also swap
if _slugify(field_name) == _slugify(cell.get('header')):
_swap_fields(cells[index], cell)
field_name = _get_field_name(cells[index])

field_name = _get_field_name(cells[index])
if _slugify(header) != _slugify(field_name):
# Cell header and field_name are still different
# which means there is no matching field for the header.
# Add a new cell with an empty header so that the missing-header
# error will be thrown later.
new_cell = create_cell(header=None, field=cell.get('field'))
new_headers.append(new_cell)
# Change the current cell's field to None
cell['field'] = None

# Add the new headers
cells.extend(new_headers)

# Run check with no field ordering
return _check_without_ordering(cells)
Expand All @@ -60,10 +77,11 @@ def _check_without_ordering(cells):
for cell in copy(cells):
if cell.get('field') is not None:
header = cell.get('header')
if header != cell['field'].name:
if header != cell['field'].name and header is not None:
# Add error
message_substitutions = {
'field_name': '"{}"'.format(cell['field'].name),
'header': '"{}"'.format(cell.get('header')),
}
error = Error(
'non-matching-header',
Expand Down Expand Up @@ -92,6 +110,6 @@ def _slugify(string):


def _swap_fields(cell1, cell2):
field1 = cell1['field']
cell1['field'] = cell2['field']
field1 = cell1.get('field', None)
cell1['field'] = cell2.get('field', None)
cell2['field'] = field1
15 changes: 9 additions & 6 deletions tests/checks/test_non_matching_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def test_check_non_matching_header_order_fields(log):
non_matching_header = NonMatchingHeader(order_fields=True)
errors = non_matching_header.check_headers(cells)
assert log(errors) == []
assert len(cells) == 3
# A new header cell will be added by the non-matching-header check because
# there is no field for the "name3" header
assert len(cells) == 4


def test_check_non_matching_header_order_fields_problem(log):
Expand All @@ -61,16 +63,17 @@ def test_check_non_matching_header_order_fields_problem(log):
assert log(errors) == [
(None, 2, 'non-matching-header'),
]
assert len(cells) == 2
# New header cells will be added by the non-matching-header check because
# there are no fields for the "name2" and "name3" headers
assert len(cells) == 4


def test_check_non_matching_header_with_empty_header_name(log):
# When the header is None, it means there's a missing header.
cells = [
goodtables.cells.create_cell(None, field=Field({'name': 'name3'}), column_number=1),
]
non_matching_header = NonMatchingHeader(order_fields=True)
errors = non_matching_header.check_headers(cells)
assert log(errors) == [
(None, 1, 'non-matching-header'),
]
assert len(cells) == 0
assert log(errors) == []
assert len(cells) == 1
20 changes: 20 additions & 0 deletions tests/scenarios/missing_headers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
primary_key_field:
source:
- [id, last_name, first_name, language]
- [1, Alex, John, English]
- [2, Peters, John, Afrikaans]
- [3, Smith, Paul, Zulu]
schema:
fields:
- {name: id, type: number}
- {name: first_name}
- {name: last_name}
- {name: age}
- {name: country}
primaryKey: id
checks:
- schema
- structure
order_fields: true
report:
- [1, null, 5, 'missing-header']
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
primary_key_field:
source:
- [LastName, FirstName, Address]
- [Test, Tester, 23 Avenue]
schema:
fields:
- {name: id}
- {name: FirstName}
- {name: LastName}
primaryKey: id
checks:
- schema
- structure
order_fields: true
report:
- [1, null, null, 'missing-header']
- [1, null, 3, 'extra-header']

0 comments on commit 56035ef

Please sign in to comment.