Add expected headers argument

Add a new argument to `get_all_records` to provide the list of expected headers. The given expected headers must: - be unique - be part of the complete headers list - must not contain extra headers This will provide a way for users to use this method and still have *some* duplicated headers that are not relevant to pull. This will ensure the columns that matters have unique headers. Closes #1007
burnash · Apr 6, 2022 · 11b432a · 11b432a · bnbchu · Apr 13, 2022
1 parent 5181e11
commit 11b432a
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 3 deletions.
diff --git a/gspread/worksheet.py b/gspread/worksheet.py
@@ -376,6 +376,7 @@ def get_all_records(
         allow_underscores_in_numeric_literals=False,
         numericise_ignore=None,
         value_render_option=None,
+        expected_headers=None,
     ):
         """Returns a list of dictionaries, all of them having the contents of
         the spreadsheet with the head row as keys and each of these
@@ -399,6 +400,7 @@ def get_all_records(
         :param str value_render_option: (optional) Determines how values should
             be rendered in the the output. See `ValueRenderOption`_ in
             the Sheets API.
+        :param list expected_headers: (optional) List of expected headers, they must be unique.
 
         .. _ValueRenderOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption
         """
@@ -412,9 +414,27 @@ def get_all_records(
 
         keys = data[idx]
 
-        # Check keys are uniques
-        if len(keys) != len(set(keys)):
-            raise GSpreadException("headers must be uniques")
+        # if no given expected headers, expect all of them
+        if expected_headers is None:
+            expected_headers = keys
+
+        # keys must:
+        # - be uniques
+        # - be part of the complete header list
+        # - not contain extra headers
+        expected = set(expected_headers)
+        headers = set(keys)
+
+        # make sure they are uniques
+        if len(expected) != len(expected_headers):
+            raise GSpreadException("the given 'expected_headers' are not uniques")
+
+        if not expected & headers == expected:
+            raise GSpreadException(
+                "the given 'expected_headers' contains unknown headers: {}".format(
+                    expected & headers
+                )
+            )
 
         if numericise_ignore == ["all"]:
             values = data[idx + 1 :]

diff --git a/tests/worksheet_test.py b/tests/worksheet_test.py
@@ -629,6 +629,34 @@ def test_get_all_records_duplicate_keys(self):
         with pytest.raises(GSpreadException):
             self.sheet.get_all_records()
 
+    @pytest.mark.vcr()
+    def test_get_all_records_expected_headers(self):
+        self.sheet.resize(4, 4)
+
+        # put in new values, made from three lists
+        rows = [
+            ["A1", "B2", "C3", "C3"],
+            [1, "b2", 1.45, ""],
+            ["", "", "", ""],
+            ["A4", 0.4, "", 4],
+        ]
+        cell_list = self.sheet.range("A1:D4")
+        for cell, value in zip(cell_list, itertools.chain(*rows)):
+            cell.value = value
+        self.sheet.update_cells(cell_list)
+
+        expected_headers = ["A1", "B2"]
+        read_records = self.sheet.get_all_records(
+            expected_headers=expected_headers,
+        )
+
+        expected_values_1 = dict(zip(rows[0], rows[1]))
+        expected_values_2 = dict(zip(rows[0], rows[2]))
+        expected_values_3 = dict(zip(rows[0], rows[3]))
+        self.assertDictEqual(expected_values_1, read_records[0])
+        self.assertDictEqual(expected_values_2, read_records[1])
+        self.assertDictEqual(expected_values_3, read_records[2])
+
     @pytest.mark.vcr()
     def test_get_all_records_numericise_unformatted(self):
         self.sheet.resize(2, 4)