Skip to content

Commit

Permalink
[sqllab] bugfix visualizing a query with a semi-colon (#1869)
Browse files Browse the repository at this point in the history
* [sqllab] bugfix visualizing a query with a semi-colon

* Fixing tests
  • Loading branch information
mistercrunch committed Jan 6, 2017
1 parent c3edc6e commit 9a62d94
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 51 deletions.
32 changes: 5 additions & 27 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from sqlalchemy.orm import sessionmaker

from superset import (
app, db, models, utils, dataframe, results_backend, sql_parse)
app, db, models, utils, dataframe, results_backend)
from superset.sql_parse import SupersetQuery
from superset.db_engine_specs import LimitMethod
from superset.jinja_context import get_template_processor
QueryStatus = models.QueryStatus
Expand Down Expand Up @@ -40,28 +41,6 @@ def dedup(l, suffix='__'):
return new_l


def create_table_as(sql, table_name, override=False):
"""Reformats the query into the create table as query.
Works only for the single select SQL statements, in all other cases
the sql query is not modified.
:param superset_query: string, sql query that will be executed
:param table_name: string, will contain the results of the query execution
:param override, boolean, table table_name will be dropped if true
:return: string, create table as query
"""
# TODO(bkyryliuk): enforce that all the columns have names. Presto requires it
# for the CTA operation.
# TODO(bkyryliuk): drop table if allowed, check the namespace and
# the permissions.
# TODO raise if multi-statement
exec_sql = ''
if override:
exec_sql = 'DROP TABLE IF EXISTS {table_name};\n'
exec_sql += "CREATE TABLE {table_name} AS \n{sql}"
return exec_sql.format(**locals())


@celery_app.task(bind=True)
def get_sql_results(self, query_id, return_results=True, store_results=False):
"""Executes the sql query returns the results."""
Expand All @@ -76,7 +55,6 @@ def get_sql_results(self, query_id, return_results=True, store_results=False):
session.commit() # HACK
query = session.query(models.Query).filter_by(id=query_id).one()
database = query.database
executed_sql = query.sql.strip().strip(';')
db_engine_spec = database.db_engine_spec

def handle_error(msg):
Expand All @@ -91,7 +69,8 @@ def handle_error(msg):
handle_error("Results backend isn't configured.")

# Limit enforced only for retrieving the data, not for the CTA queries.
superset_query = sql_parse.SupersetQuery(executed_sql)
superset_query = SupersetQuery(query.sql)
executed_sql = superset_query.stripped()
if not superset_query.is_select() and not database.allow_dml:
handle_error(
"Only `SELECT` statements are allowed against this database")
Expand All @@ -105,8 +84,7 @@ def handle_error(msg):
query.tmp_table_name = 'tmp_{}_table_{}'.format(
query.user_id,
start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
executed_sql = create_table_as(
executed_sql, query.tmp_table_name)
executed_sql = superset_query.as_create_table(query.tmp_table_name)
query.select_as_cta_used = True
elif (
query.limit and superset_query.is_select() and
Expand Down
33 changes: 32 additions & 1 deletion superset/sql_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def __init__(self, sql_statement):
self._table_names = set()
self._alias_names = set()
# TODO: multistatement support
for statement in sqlparse.parse(self.sql):
self._parsed = sqlparse.parse(self.sql)
for statement in self._parsed:
self.__extract_from_token(statement)
self._table_names = self._table_names - self._alias_names

Expand All @@ -26,6 +27,13 @@ def tables(self):
def is_select(self):
return self.sql.upper().startswith('SELECT')

def stripped(self):
sql = self.sql
if sql:
while sql[-1] in (' ', ';', '\n', '\t'):
sql = sql[:-1]
return sql

@staticmethod
def __precedes_table_name(token_value):
for keyword in PRECEDES_TABLE_NAME:
Expand Down Expand Up @@ -67,6 +75,29 @@ def __process_identifier(self, identifier):
self._alias_names.add(identifier.tokens[0].value)
self.__extract_from_token(identifier)

def as_create_table(self, table_name, overwrite=False):
"""Reformats the query into the create table as query.
Works only for the single select SQL statements, in all other cases
the sql query is not modified.
:param superset_query: string, sql query that will be executed
:param table_name: string, will contain the results of the
query execution
:param overwrite, boolean, table table_name will be dropped if true
:return: string, create table as query
"""
# TODO(bkyryliuk): enforce that all the columns have names.
# Presto requires it for the CTA operation.
# TODO(bkyryliuk): drop table if allowed, check the namespace and
# the permissions.
# TODO raise if multi-statement
exec_sql = ''
sql = self.stripped()
if overwrite:
exec_sql = 'DROP TABLE IF EXISTS {table_name};\n'
exec_sql += "CREATE TABLE {table_name} AS \n{sql}"
return exec_sql.format(**locals())

def __extract_from_token(self, token):
if not hasattr(token, 'tokens'):
return
Expand Down
4 changes: 3 additions & 1 deletion superset/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
)
from superset.source_registry import SourceRegistry
from superset.models import DatasourceAccessRequest as DAR
from superset.sql_parse import SupersetQuery

config = app.config
log_this = models.Log.log_this
Expand Down Expand Up @@ -2229,7 +2230,8 @@ def sqllab_viz(self):
if not table:
table = models.SqlaTable(table_name=table_name)
table.database_id = data.get('dbId')
table.sql = data.get('sql')
q = SupersetQuery(data.get('sql'))
table.sql = q.stripped()
db.session.add(table)
cols = []
dims = []
Expand Down
40 changes: 18 additions & 22 deletions tests/celery_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

import pandas as pd

from superset import app, appbuilder, cli, db, models, sql_lab, dataframe
from superset import app, appbuilder, cli, db, models, dataframe
from superset.security import sync_role_definitions
from superset.sql_parse import SupersetQuery

from .base_tests import SupersetTestCase

Expand All @@ -35,38 +36,33 @@ class UtilityFunctionTests(SupersetTestCase):

# TODO(bkyryliuk): support more cases in CTA function.
def test_create_table_as(self):
select_query = "SELECT * FROM outer_space;"
updated_select_query = sql_lab.create_table_as(
select_query, "tmp")
q = SupersetQuery("SELECT * FROM outer_space;")

self.assertEqual(
"CREATE TABLE tmp AS \nSELECT * FROM outer_space;",
updated_select_query)
"CREATE TABLE tmp AS \nSELECT * FROM outer_space",
q.as_create_table("tmp"))

updated_select_query_with_drop = sql_lab.create_table_as(
select_query, "tmp", override=True)
self.assertEqual(
"DROP TABLE IF EXISTS tmp;\n"
"CREATE TABLE tmp AS \nSELECT * FROM outer_space;",
updated_select_query_with_drop)
"CREATE TABLE tmp AS \nSELECT * FROM outer_space",
q.as_create_table("tmp", overwrite=True))

select_query_no_semicolon = "SELECT * FROM outer_space"
updated_select_query_no_semicolon = sql_lab.create_table_as(
select_query_no_semicolon, "tmp")
# now without a semicolon
q = SupersetQuery("SELECT * FROM outer_space")
self.assertEqual(
"CREATE TABLE tmp AS \nSELECT * FROM outer_space",
updated_select_query_no_semicolon)
q.as_create_table("tmp"))

# now a multi-line query
multi_line_query = (
"SELECT * FROM planets WHERE\n"
"Luke_Father = 'Darth Vader';")
updated_multi_line_query = sql_lab.create_table_as(
multi_line_query, "tmp")
expected_updated_multi_line_query = (
"CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n"
"Luke_Father = 'Darth Vader';")
"Luke_Father = 'Darth Vader'")
q = SupersetQuery(multi_line_query)
self.assertEqual(
expected_updated_multi_line_query,
updated_multi_line_query)
"CREATE TABLE tmp AS \nSELECT * FROM planets WHERE\n"
"Luke_Father = 'Darth Vader'",
q.as_create_table("tmp")
)


class CeleryTestCase(SupersetTestCase):
Expand Down

0 comments on commit 9a62d94

Please sign in to comment.