Skip to content

Commit

Permalink
feat: add Postgres SQL validator (#11538)
Browse files Browse the repository at this point in the history
* Add Postgres SQL validator

* Strip line number from message

* Add unit tests

* Run tests only with postgres backend

* Add dep

* Add dep to bashlib
  • Loading branch information
betodealmeida committed Dec 5, 2020
1 parent 2c32342 commit 66cd565
Show file tree
Hide file tree
Showing 13 changed files with 132 additions and 40 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/bashlib.sh
Expand Up @@ -88,6 +88,8 @@ build-instrumented-assets() {
}

setup-postgres() {
say "::group::Install dependency for unit tests"
sudo apt-get update && sudo apt-get install --yes libecpg-dev
say "::group::Initialize database"
psql "postgresql://superset:superset@127.0.0.1:15432/superset" <<-EOF
DROP SCHEMA IF EXISTS sqllab_test_db CASCADE;
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Expand Up @@ -28,6 +28,7 @@ RUN mkdir /app \
default-libmysqlclient-dev \
libpq-dev \
libsasl2-dev \
libecpg-dev \
&& rm -rf /var/lib/apt/lists/*

# First, we just wanna install requirements, which will allow us to utilize the cache
Expand Down
37 changes: 19 additions & 18 deletions requirements/base.txt
Expand Up @@ -6,7 +6,7 @@
# pip-compile-multi
#
-e file:. # via -r requirements/base.in
aiohttp==3.6.2 # via slackclient
aiohttp==3.7.2 # via slackclient
alembic==1.4.3 # via flask-migrate
amqp==2.6.1 # via kombu
apispec[yaml]==3.3.2 # via flask-appbuilder
Expand All @@ -22,9 +22,9 @@ celery==4.4.7 # via apache-superset
cffi==1.14.3 # via cryptography
chardet==3.0.4 # via aiohttp
click==7.1.2 # via apache-superset, flask, flask-appbuilder
colorama==0.4.3 # via apache-superset, flask-appbuilder
colorama==0.4.4 # via apache-superset, flask-appbuilder
contextlib2==0.6.0.post1 # via apache-superset
croniter==0.3.34 # via apache-superset
croniter==0.3.36 # via apache-superset
cryptography==3.2.1 # via apache-superset
decorator==4.4.2 # via retry
defusedxml==0.6.0 # via python3-openid
Expand All @@ -33,7 +33,7 @@ email-validator==1.1.1 # via flask-appbuilder
flask-appbuilder==3.1.1 # via apache-superset
flask-babel==1.0.0 # via flask-appbuilder
flask-caching==1.9.0 # via apache-superset
flask-compress==1.5.0 # via apache-superset
flask-compress==1.8.0 # via apache-superset
flask-jwt-extended==3.24.1 # via flask-appbuilder
flask-login==0.4.1 # via flask-appbuilder
flask-migrate==2.5.3 # via apache-superset
Expand All @@ -45,28 +45,29 @@ flask==1.1.2 # via apache-superset, flask-appbuilder, flask-babel,
geographiclib==1.50 # via geopy
geopy==2.0.0 # via apache-superset
gunicorn==20.0.4 # via apache-superset
humanize==2.6.0 # via apache-superset
humanize==3.1.0 # via apache-superset
idna==2.10 # via email-validator, yarl
importlib-metadata==1.7.0 # via -r requirements/base.in, jsonschema, kombu, markdown
importlib-metadata==1.7.0 # via -r requirements/base.in
isodate==0.6.0 # via apache-superset
itsdangerous==1.1.0 # via flask, flask-wtf
jinja2==2.11.2 # via flask, flask-babel
jsonschema==3.2.0 # via flask-appbuilder
kombu==4.6.11 # via celery
mako==1.1.3 # via alembic
markdown==3.2.2 # via apache-superset
markdown==3.3.3 # via apache-superset
markupsafe==1.1.1 # via jinja2, mako, wtforms
marshmallow-enum==1.5.1 # via flask-appbuilder
marshmallow-sqlalchemy==0.23.1 # via flask-appbuilder
marshmallow==3.8.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy
marshmallow==3.9.0 # via flask-appbuilder, marshmallow-enum, marshmallow-sqlalchemy
msgpack==1.0.0 # via apache-superset
multidict==4.7.6 # via aiohttp, yarl
multidict==5.0.0 # via aiohttp, yarl
natsort==7.0.1 # via croniter
numpy==1.19.2 # via pandas, pyarrow
numpy==1.19.4 # via pandas, pyarrow
packaging==20.4 # via bleach
pandas==1.1.2 # via apache-superset
pandas==1.1.4 # via apache-superset
parsedatetime==2.6 # via apache-superset
pathlib2==2.3.5 # via apache-superset
pgsanity==0.2.9 # via apache-superset
polyline==1.4.0 # via apache-superset
prison==0.1.3 # via flask-appbuilder
py==1.9.0 # via retry
Expand All @@ -76,29 +77,29 @@ pyjwt==1.7.1 # via flask-appbuilder, flask-jwt-extended
pyparsing==2.4.7 # via packaging
pyrsistent==0.16.1 # via -r requirements/base.in, jsonschema
python-dateutil==2.8.1 # via alembic, apache-superset, croniter, flask-appbuilder, pandas
python-dotenv==0.14.0 # via apache-superset
python-dotenv==0.15.0 # via apache-superset
python-editor==1.0.4 # via alembic
python-geohash==0.8.5 # via apache-superset
python3-openid==3.2.0 # via flask-openid
pytz==2020.1 # via babel, celery, flask-babel, pandas
pytz==2020.4 # via babel, celery, flask-babel, pandas
pyyaml==5.3.1 # via apache-superset, apispec
retry==0.9.2 # via apache-superset
selenium==3.141.0 # via apache-superset
simplejson==3.17.2 # via apache-superset
six==1.15.0 # via bleach, cryptography, flask-jwt-extended, flask-talisman, isodate, jsonschema, packaging, pathlib2, polyline, prison, pyrsistent, python-dateutil, sqlalchemy-utils, wtforms-json
slackclient==2.5.0 # via apache-superset
sqlalchemy-utils==0.36.8 # via apache-superset, flask-appbuilder
sqlalchemy==1.3.19 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
sqlalchemy==1.3.20 # via alembic, apache-superset, flask-sqlalchemy, marshmallow-sqlalchemy, sqlalchemy-utils
sqlparse==0.3.0 # via apache-superset
typing-extensions==3.7.4.3 # via yarl
urllib3==1.25.10 # via selenium
typing-extensions==3.7.4.3 # via aiohttp
urllib3==1.25.11 # via selenium
vine==1.3.0 # via amqp, celery
webencodings==0.5.1 # via bleach
werkzeug==1.0.1 # via flask, flask-jwt-extended
wtforms-json==0.3.3 # via apache-superset
wtforms==2.3.3 # via flask-wtf, wtforms-json
yarl==1.5.1 # via aiohttp
zipp==3.2.0 # via importlib-metadata
yarl==1.6.2 # via aiohttp
zipp==3.4.0 # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools
10 changes: 5 additions & 5 deletions requirements/development.txt
Expand Up @@ -7,14 +7,14 @@
#
-r base.txt
-e file:. # via -r requirements/base.in
boto3==1.15.3 # via tabulator
botocore==1.18.3 # via boto3, s3transfer
boto3==1.16.10 # via tabulator
botocore==1.19.10 # via boto3, s3transfer
cached-property==1.5.2 # via tableschema
certifi==2020.6.20 # via requests
et-xmlfile==1.0.1 # via openpyxl
flask-cors==3.0.9 # via -r requirements/development.in
future==0.18.2 # via pyhive
ijson==3.1.1 # via tabulator
ijson==3.1.2.post0 # via tabulator
jdcal==1.4.1 # via openpyxl
jmespath==0.10.0 # via boto3, botocore
jsonlines==1.2.0 # via tabulator
Expand All @@ -29,8 +29,8 @@ requests==2.24.0 # via pydruid, tableschema, tabulator
rfc3986==1.4.0 # via tableschema
s3transfer==0.3.3 # via boto3
sasl==0.2.1 # via pyhive, thrift-sasl
tableschema==1.19.4 # via -r requirements/development.in
tabulator==1.52.3 # via tableschema
tableschema==1.20.0 # via -r requirements/development.in
tabulator==1.52.5 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl
unicodecsv==0.14.1 # via tableschema, tabulator
Expand Down
2 changes: 1 addition & 1 deletion requirements/docker.txt
Expand Up @@ -12,7 +12,7 @@ greenlet==0.4.17 # via gevent
psycopg2-binary==2.8.6 # via -r requirements/docker.in
redis==3.5.3 # via -r requirements/docker.in
zope.event==4.5.0 # via gevent
zope.interface==5.1.0 # via gevent
zope.interface==5.1.2 # via gevent

# The following packages are considered to be unsafe in a requirements file:
# setuptools
12 changes: 5 additions & 7 deletions requirements/integration.txt
Expand Up @@ -10,23 +10,21 @@ cfgv==3.2.0 # via pre-commit
click==7.1.2 # via pip-compile-multi, pip-tools
distlib==0.3.1 # via virtualenv
filelock==3.0.12 # via tox, virtualenv
identify==1.5.4 # via pre-commit
importlib-metadata==1.7.0 # via pluggy, pre-commit, tox, virtualenv
identify==1.5.9 # via pre-commit
nodeenv==1.5.0 # via pre-commit
packaging==20.4 # via tox
pip-compile-multi==2.1.0 # via -r requirements/integration.in
pip-tools==5.3.1 # via pip-compile-multi
pluggy==0.13.1 # via tox
pre-commit==2.7.1 # via -r requirements/integration.in
pre-commit==2.8.2 # via -r requirements/integration.in
py==1.9.0 # via tox
pyparsing==2.4.7 # via packaging
pyyaml==5.3.1 # via pre-commit
six==1.15.0 # via packaging, pip-tools, tox, virtualenv
toml==0.10.1 # via pre-commit, tox
toml==0.10.2 # via pre-commit, tox
toposort==1.5 # via pip-compile-multi
tox==3.20.0 # via -r requirements/integration.in
virtualenv==20.0.31 # via pre-commit, tox
zipp==3.2.0 # via importlib-metadata
tox==3.20.1 # via -r requirements/integration.in
virtualenv==20.1.0 # via pre-commit, tox

# The following packages are considered to be unsafe in a requirements file:
# pip
8 changes: 4 additions & 4 deletions requirements/testing.txt
Expand Up @@ -15,7 +15,7 @@ coverage==5.3 # via pytest-cov
docker==4.3.1 # via -r requirements/testing.in
flask-testing==0.8.0 # via -r requirements/testing.in
freezegun==1.0.0 # via -r requirements/testing.in
iniconfig==1.0.1 # via pytest
iniconfig==1.1.1 # via pytest
ipdb==0.13.4 # via -r requirements/testing.in
ipython-genutils==0.2.0 # via traitlets
ipython==7.16.1 # via -r requirements/testing.in, ipdb
Expand All @@ -30,14 +30,14 @@ pexpect==4.8.0 # via ipython
pickleshare==0.7.5 # via ipython
prompt-toolkit==3.0.8 # via ipython
ptyprocess==0.6.0 # via pexpect
pygments==2.7.1 # via ipython
pygments==2.7.2 # via ipython
pyhive[hive,presto]==0.6.3 # via -r requirements/development.in, -r requirements/testing.in
pylint==2.6.0 # via -r requirements/testing.in
pytest-cov==2.10.1 # via -r requirements/testing.in
pytest==6.1.1 # via -r requirements/testing.in, pytest-cov
pytest==6.1.2 # via -r requirements/testing.in, pytest-cov
redis==3.5.3 # via -r requirements/testing.in
statsd==3.3.0 # via -r requirements/testing.in
traitlets==5.0.4 # via ipython
traitlets==5.0.5 # via ipython
wcwidth==0.2.5 # via prompt-toolkit
websocket-client==0.57.0 # via docker
wrapt==1.12.1 # via astroid
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,polyline,prison,pyarrow,pyhive,pytest,pytz,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -89,6 +89,7 @@ def get_git_sha():
"pandas>=1.1.2, <1.2",
"parsedatetime",
"pathlib2",
"pgsanity",
"polyline",
"python-dateutil",
"python-dotenv",
Expand Down
5 changes: 4 additions & 1 deletion superset/config.py
Expand Up @@ -890,7 +890,10 @@ class CeleryConfig: # pylint: disable=too-few-public-methods
DEFAULT_RELATIVE_END_TIME = "today"

# Configure which SQL validator to use for each engine
SQL_VALIDATORS_BY_ENGINE = {"presto": "PrestoDBSQLValidator"}
SQL_VALIDATORS_BY_ENGINE = {
"presto": "PrestoDBSQLValidator",
"postgresql": "PostgreSQLValidator",
}

# Do you want Talisman enabled?
TALISMAN_ENABLED = False
Expand Down
7 changes: 5 additions & 2 deletions superset/sql_validators/__init__.py
Expand Up @@ -16,9 +16,12 @@
# under the License.
from typing import Optional, Type

from . import base, presto_db
from . import base, postgres, presto_db
from .base import SQLValidationAnnotation


def get_validator_by_name(name: str) -> Optional[Type[base.BaseSQLValidator]]:
return {"PrestoDBSQLValidator": presto_db.PrestoDBSQLValidator}.get(name)
return {
"PrestoDBSQLValidator": presto_db.PrestoDBSQLValidator,
"PostgreSQLValidator": postgres.PostgreSQLValidator,
}.get(name)
54 changes: 54 additions & 0 deletions superset/sql_validators/postgres.py
@@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import re
from typing import List, Optional

from pgsanity.pgsanity import check_string

from superset.models.core import Database
from superset.sql_validators.base import BaseSQLValidator, SQLValidationAnnotation


class PostgreSQLValidator(BaseSQLValidator): # pylint: disable=too-few-public-methods
"""Validate SQL queries using the pgsanity module"""

name = "PostgreSQLValidator"

@classmethod
def validate(
cls, sql: str, schema: Optional[str], database: Database
) -> List[SQLValidationAnnotation]:
annotations: List[SQLValidationAnnotation] = []
valid, error = check_string(sql, add_semicolon=True)
if valid:
return annotations

match = re.match(r"^line (\d+): (.*)", error)
line_number = int(match.group(1)) if match else None
message = match.group(2) if match else error

annotations.append(
SQLValidationAnnotation(
message=message,
line_number=line_number,
start_column=None,
end_column=None,
)
)

return annotations
31 changes: 30 additions & 1 deletion tests/sql_validator_tests.py
Expand Up @@ -15,17 +15,18 @@
# specific language governing permissions and limitations
# under the License.
# isort:skip_file
# pylint: disable=invalid-name, no-self-use
"""Unit tests for Sql Lab"""
import unittest
from unittest.mock import MagicMock, patch

import pytest
from pyhive.exc import DatabaseError

import tests.test_app
from superset import app
from superset.sql_validators import SQLValidationAnnotation
from superset.sql_validators.base import BaseSQLValidator
from superset.sql_validators.postgres import PostgreSQLValidator
from superset.sql_validators.presto_db import (
PrestoDBSQLValidator,
PrestoSQLValidationError,
Expand Down Expand Up @@ -211,5 +212,33 @@ def test_validate_sql_endpoint(self):
self.assertIn("no SQL validator is configured", resp["error"])


class TestPostgreSQLValidator(SupersetTestCase):
def test_valid_syntax(self):
if get_example_database().backend != "postgresql":
return

mock_database = MagicMock()
annotations = PostgreSQLValidator.validate(
sql='SELECT 1, "col" FROM "table"', schema="", database=mock_database
)
assert annotations == []

def test_invalid_syntax(self):
if get_example_database().backend != "postgresql":
return

mock_database = MagicMock()
annotations = PostgreSQLValidator.validate(
sql='SELECT 1, "col"\nFROOM "table"', schema="", database=mock_database
)

assert len(annotations) == 1
annotation = annotations[0]
assert annotation.line_number == 2
assert annotation.start_column is None
assert annotation.end_column is None
assert annotation.message == 'ERROR: syntax error at or near """'


if __name__ == "__main__":
unittest.main()

0 comments on commit 66cd565

Please sign in to comment.