Sort schema columns alphabetically (#4595)

* Adds logic to sort column names returned by the query runner. If `sorted` raises an Exception it returns the column names unaltered from the query runner. * Moves table name sorting from model code into schema handler. * Moves token sorting into the model code. * Replaces single-quotes with double-quotes for consistency. * Applies black formatting to changes. * Moves schema sort into separate method. Adds test. * Fixes output schema variable name. Without this the sorted cache is never returned! ____ ____ ____ _____ / __ \/ __ \/ __ \/ ___/ / /_/ / /_/ / /_/ (__ ) \____/\____/ .___/____/ /_/ * Adds test case guaranteeing that the model actually _uses_ the schema sorter. Related to a31f901
getredash · Feb 9, 2020 · cee1a07 · cee1a07
1 parent 42b1ead
commit cee1a07
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 6 deletions.
diff --git a/redash/models/__init__.py b/redash/models/__init__.py
@@ -187,15 +187,27 @@ def get_schema(self, refresh=False):
 
         if cache is None:
             query_runner = self.query_runner
-            schema = sorted(
-                query_runner.get_schema(get_stats=refresh), key=lambda t: t["name"]
-            )
+            schema = query_runner.get_schema(get_stats=refresh)
 
-            redis_connection.set(self._schema_key, json_dumps(schema))
+            try:
+                out_schema = self._sort_schema(schema)
+            except Exception:
+                logging.exception(
+                    "Error sorting schema columns for data_source {}".format(self.id)
+                )
+                out_schema = schema
+            finally:
+                redis_connection.set(self._schema_key, json_dumps(out_schema))
         else:
-            schema = json_loads(cache)
+            out_schema = json_loads(cache)
+
+        return out_schema
 
-        return schema
+    def _sort_schema(self, schema):
+        return [
+            {"name": i["name"], "columns": sorted(i["columns"])}
+            for i in sorted(schema, key=lambda x: x["name"])
+        ]
 
     @property
     def _schema_key(self):

diff --git a/tests/models/test_data_sources.py b/tests/models/test_data_sources.py
@@ -47,6 +47,53 @@ def test_get_schema_skips_cache_with_refresh_true(self):
             self.assertEqual(new_return_value, schema)
             self.assertEqual(patched_get_schema.call_count, 2)
 
+    def test_schema_sorter(self):
+        input_data = [
+            {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
+            {
+                "name": "all_terain_vehicle",
+                "columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
+            },
+        ]
+
+        expected_output = [
+            {
+                "name": "all_terain_vehicle",
+                "columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
+            },
+            {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
+        ]
+
+        real_output = self.factory.data_source._sort_schema(input_data)
+
+        self.assertEqual(real_output, expected_output)
+
+    def test_model_uses_schema_sorter(self):
+        orig_schema = [
+            {"name": "zoo", "columns": ["is_zebra", "is_snake", "is_cow"]},
+            {
+                "name": "all_terain_vehicle",
+                "columns": ["has_wheels", "has_engine", "has_all_wheel_drive"],
+            },
+        ]
+
+        sorted_schema = [
+            {
+                "name": "all_terain_vehicle",
+                "columns": ["has_all_wheel_drive", "has_engine", "has_wheels"],
+            },
+            {"name": "zoo", "columns": ["is_cow", "is_snake", "is_zebra"]},
+        ]
+
+        with mock.patch(
+            "redash.query_runner.pg.PostgreSQL.get_schema"
+        ) as patched_get_schema:
+            patched_get_schema.return_value = orig_schema
+
+            out_schema = self.factory.data_source.get_schema()
+
+            self.assertEqual(out_schema, sorted_schema)
+
 
 class TestDataSourceCreate(BaseTestCase):
     def test_adds_data_source_to_default_group(self):