apache · craig-rueda · Jan 28, 2020 · Jan 23, 2020 · nytai · Jan 24, 2020
diff --git a/superset/result_set.py b/superset/result_set.py
@@ -18,6 +18,7 @@
 """ Superset wrapper around pyarrow.Table.
 """
 import datetime
+import json
 import logging
 import re
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type
@@ -27,6 +28,7 @@
 import pyarrow as pa
 
 from superset import db_engine_specs
+from superset.utils import core as utils
 
 
 def dedup(l: List[str], suffix: str = "__", case_sensitive: bool = True) -> List[str]:
@@ -86,7 +88,18 @@ def __init__(
         # related: https://issues.apache.org/jira/browse/ARROW-5248
         if pa_data:
             for i, column in enumerate(column_names):
-                if pa.types.is_temporal(pa_data[i].type):
+                # TODO: revisit nested column serialization once Arrow 1.0 is released with:
+                # https://github.com/apache/arrow/pull/6199
+                # Related issue: #8978
+                if pa.types.is_nested(pa_data[i].type):
+                    stringify_func = lambda item: json.dumps(
+                        item, default=utils.json_iso_dttm_ser
+                    )
+                    vfunc = np.vectorize(stringify_func)
+                    strigified_arr = vfunc(array[:, i])
+                    pa_data[i] = pa.array(strigified_arr)
+
+                elif pa.types.is_temporal(pa_data[i].type):
                     sample = self.first_nonempty(array[:, i])
                     if sample and isinstance(sample, datetime.datetime):
                         try:

diff --git a/tests/result_set_tests.py b/tests/result_set_tests.py
@@ -124,6 +124,46 @@ def test_nullable_bool(self):
             ],
         )
 
+    def test_nested_types(self):
+        data = [
+            (
+                4,
+                [{"table_name": "unicode_test", "database_id": 1}],
+                [1, 2, 3],
+                {"chart_name": "scatter"},
+            ),
+            (
+                3,
+                [{"table_name": "birth_names", "database_id": 1}],
+                [4, 5, 6],
+                {"chart_name": "plot"},
+            ),
+        ]
+        cursor_descr = [("id",), ("dict_arr",), ("num_arr",), ("map_col",)]
+        results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+        self.assertEqual(results.columns[0]["type"], "INT")
+        self.assertEqual(results.columns[1]["type"], "STRING")
+        self.assertEqual(results.columns[2]["type"], "STRING")
+        self.assertEqual(results.columns[3]["type"], "STRING")
+        df = results.to_pandas_df()
+        self.assertEqual(
+            df_to_records(df),
+            [
+                {
+                    "id": 4,
+                    "dict_arr": '[{"table_name": "unicode_test", "database_id": 1}]',
+                    "num_arr": "[1, 2, 3]",
+                    "map_col": '{"chart_name": "scatter"}',
+                },
+                {
+                    "id": 3,
+                    "dict_arr": '[{"table_name": "birth_names", "database_id": 1}]',
+                    "num_arr": "[4, 5, 6]",
+                    "map_col": '{"chart_name": "plot"}',
+                },
+            ],
+        )
+
     def test_empty_datetime(self):
         data = [(None,)]
         cursor_descr = [("ds", "timestamp", None, None, None, None, True)]