Skip to content

Commit

Permalink
[BUGFIX]: Check datatype of results before converting to DataFrame (#…
Browse files Browse the repository at this point in the history
…4108)

* conditional check on datatype of results before converting to df

fix type checking

fix conditional checks

remove trailing whitespace and fix df_data fallback def

actually remove trailing whitespace

generalized type check to check all columns for dict

refactor dict col check

* move df conversion to helper and add unit test

add missing newlines

another missing newline

fix quotes

more quote fixes
  • Loading branch information
marcusianlevine authored and mistercrunch committed Jan 24, 2018
1 parent 2c72a7a commit 4bc5fe5
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
27 changes: 22 additions & 5 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import uuid

from celery.exceptions import SoftTimeLimitExceeded
import numpy as np
import pandas as pd
import sqlalchemy
from sqlalchemy.orm import sessionmaker
Expand Down Expand Up @@ -85,6 +86,26 @@ def get_session(nullpool):
return session


def convert_results_to_df(cursor_description, data):
"""Convert raw query results to a DataFrame."""
column_names = (
[col[0] for col in cursor_description] if cursor_description else [])
column_names = dedup(column_names)

# check whether the result set has any nested dict columns
if data:
first_row = data[0]
has_dict_col = any([isinstance(c, dict) for c in first_row])
df_data = list(data) if has_dict_col else np.array(data)
else:
df_data = []

cdf = dataframe.SupersetDataFrame(
pd.DataFrame(df_data, columns=column_names))

return cdf


@celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT)
def get_sql_results(
ctask, query_id, return_results=True, store_results=False,
Expand Down Expand Up @@ -224,11 +245,7 @@ def handle_error(msg):
},
default=utils.json_iso_dttm_ser)

column_names = (
[col[0] for col in cursor_description] if cursor_description else [])
column_names = dedup(column_names)
cdf = dataframe.SupersetDataFrame(
pd.DataFrame(list(data), columns=column_names))
cdf = convert_results_to_df(cursor_description, data)

query.rows = cdf.size
query.progress = 100
Expand Down
17 changes: 17 additions & 0 deletions tests/sqllab_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from superset import appbuilder, db, sm, utils
from superset.models.sql_lab import Query
from superset.sql_lab import convert_results_to_df
from .base_tests import SupersetTestCase


Expand Down Expand Up @@ -200,6 +201,22 @@ def test_alias_duplicate(self):
user_name='admin',
raise_on_error=True)

def test_df_conversion_no_dict(self):
cols = [['string_col'], ['int_col']]
data = [['a', 4]]
cdf = convert_results_to_df(cols, data)

self.assertEquals(len(data), cdf.size)
self.assertEquals(len(cols), len(cdf.columns))

def test_df_conversion_dict(self):
cols = [['string_col'], ['dict_col'], ['int_col']]
data = [['a', {'c1': 1, 'c2': 2, 'c3': 3}, 4]]
cdf = convert_results_to_df(cols, data)

self.assertEquals(len(data), cdf.size)
self.assertEquals(len(cols), len(cdf.columns))


if __name__ == '__main__':
unittest.main()

0 comments on commit 4bc5fe5

Please sign in to comment.