Skip to content

Commit

Permalink
Add expected headers argument
Browse files Browse the repository at this point in the history
Add a new argument to `get_all_records` to provide the list of expected
headers.

The given expected headers must:
- be unique
- be part of the complete headers list
- must not contain extra headers

This will provide a way for users to use this method and still
have *some* duplicated headers that are not relevant to pull.

This will ensure the columns that matters have unique headers.

Closes #1007
  • Loading branch information
lavigne958 committed Apr 6, 2022
1 parent 5181e11 commit 11b432a
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 3 deletions.
26 changes: 23 additions & 3 deletions gspread/worksheet.py
Expand Up @@ -376,6 +376,7 @@ def get_all_records(
allow_underscores_in_numeric_literals=False,
numericise_ignore=None,
value_render_option=None,
expected_headers=None,
):
"""Returns a list of dictionaries, all of them having the contents of
the spreadsheet with the head row as keys and each of these
Expand All @@ -399,6 +400,7 @@ def get_all_records(
:param str value_render_option: (optional) Determines how values should
be rendered in the the output. See `ValueRenderOption`_ in
the Sheets API.
:param list expected_headers: (optional) List of expected headers, they must be unique.
.. _ValueRenderOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption
"""
Expand All @@ -412,9 +414,27 @@ def get_all_records(

keys = data[idx]

# Check keys are uniques
if len(keys) != len(set(keys)):
raise GSpreadException("headers must be uniques")
# if no given expected headers, expect all of them
if expected_headers is None:
expected_headers = keys

# keys must:
# - be uniques
# - be part of the complete header list
# - not contain extra headers
expected = set(expected_headers)
headers = set(keys)

# make sure they are uniques
if len(expected) != len(expected_headers):
raise GSpreadException("the given 'expected_headers' are not uniques")

if not expected & headers == expected:
raise GSpreadException(
"the given 'expected_headers' contains unknown headers: {}".format(
expected & headers
)
)

if numericise_ignore == ["all"]:
values = data[idx + 1 :]
Expand Down
28 changes: 28 additions & 0 deletions tests/worksheet_test.py
Expand Up @@ -629,6 +629,34 @@ def test_get_all_records_duplicate_keys(self):
with pytest.raises(GSpreadException):
self.sheet.get_all_records()

@pytest.mark.vcr()
def test_get_all_records_expected_headers(self):
self.sheet.resize(4, 4)

# put in new values, made from three lists
rows = [
["A1", "B2", "C3", "C3"],
[1, "b2", 1.45, ""],
["", "", "", ""],
["A4", 0.4, "", 4],
]
cell_list = self.sheet.range("A1:D4")
for cell, value in zip(cell_list, itertools.chain(*rows)):
cell.value = value
self.sheet.update_cells(cell_list)

expected_headers = ["A1", "B2"]
read_records = self.sheet.get_all_records(
expected_headers=expected_headers,
)

expected_values_1 = dict(zip(rows[0], rows[1]))
expected_values_2 = dict(zip(rows[0], rows[2]))
expected_values_3 = dict(zip(rows[0], rows[3]))
self.assertDictEqual(expected_values_1, read_records[0])
self.assertDictEqual(expected_values_2, read_records[1])
self.assertDictEqual(expected_values_3, read_records[2])

@pytest.mark.vcr()
def test_get_all_records_numericise_unformatted(self):
self.sheet.resize(2, 4)
Expand Down

2 comments on commit 11b432a

@bnbchu
Copy link

@bnbchu bnbchu commented on 11b432a Apr 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi, the error message "gspread.exceptions.GSpreadException: the given 'expected_headers' contains unknown headers: {}" is showing the headers that are found instead of the unknown headers.

@lavigne958
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you're right, I will fix it in the next release. Thank you for your comment.

Please sign in to comment.