From 101e5b670bbe0a0f5aff5e8683ddee4444cba13e Mon Sep 17 00:00:00 2001 From: Bogdan Date: Mon, 10 Aug 2020 11:22:38 -0700 Subject: [PATCH] chore: clean up csv tests (#10556) * Clean up csv tests * Update tests/base_tests.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> * Update tests/base_tests.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> * import optional * Fix mypy error Co-authored-by: bogdan kyryliuk Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- superset/tasks/slack_util.py | 2 +- superset/utils/core.py | 4 + tests/base_tests.py | 41 +++-- tests/core_tests.py | 216 -------------------------- tests/csv_upload_tests.py | 286 +++++++++++++++++++++++++++++++++++ 5 files changed, 319 insertions(+), 230 deletions(-) create mode 100644 tests/csv_upload_tests.py diff --git a/superset/tasks/slack_util.py b/superset/tasks/slack_util.py index ef647ebd2a5a..09d5ca04b2cb 100644 --- a/superset/tasks/slack_util.py +++ b/superset/tasks/slack_util.py @@ -26,7 +26,7 @@ from superset import app # Globals -config = app.config # type: ignore +config = app.config logger = logging.getLogger("tasks.slack_util") diff --git a/superset/utils/core.py b/superset/utils/core.py index 5f11aab6c073..14ec121de4c2 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1029,6 +1029,10 @@ def get_main_database() -> "Database": return get_or_create_db("main", db_uri) +def backend() -> str: + return get_example_database().backend + + def is_adhoc_metric(metric: Metric) -> bool: return bool( isinstance(metric, dict) diff --git a/tests/base_tests.py b/tests/base_tests.py index e0a20a4a7720..8f708a5df6dd 100644 --- a/tests/base_tests.py +++ b/tests/base_tests.py @@ -18,7 +18,7 @@ """Unit tests for Superset""" import imp import json -from typing import Any, Dict, Union, List +from typing import Any, Dict, Union, List, Optional from unittest.mock import Mock, patch import pandas as pd @@ -44,6 +44,31 @@ FAKE_DB_NAME = "fake_db_100" +def login(client: Any, username: str = "admin", password: str = "general"): + resp = get_resp(client, "/login/", data=dict(username=username, password=password)) + assert "User confirmation needed" not in resp + + +def get_resp( + client: Any, + url: str, + data: Any = None, + follow_redirects: bool = True, + raise_on_error: bool = True, + json_: Optional[str] = None, +): + """Shortcut to get the parsed results while following redirects""" + if data: + resp = client.post(url, data=data, follow_redirects=follow_redirects) + elif json_: + resp = client.post(url, json=json_, follow_redirects=follow_redirects) + else: + resp = client.get(url, follow_redirects=follow_redirects) + if raise_on_error and resp.status_code > 400: + raise Exception("http request failed with code {}".format(resp.status_code)) + return resp.data.decode("utf-8") + + class SupersetTestCase(TestCase): default_schema_backend_map = { @@ -145,8 +170,7 @@ def get_or_create(self, cls, criteria, session, **kwargs): return obj def login(self, username="admin", password="general"): - resp = self.get_resp("/login/", data=dict(username=username, password=password)) - self.assertNotIn("User confirmation needed", resp) + return login(self.client, username, password) def get_slice( self, slice_name: str, session: Session, expunge_from_session: bool = True @@ -189,16 +213,7 @@ def get_datasource_mock() -> BaseDatasource: def get_resp( self, url, data=None, follow_redirects=True, raise_on_error=True, json_=None ): - """Shortcut to get the parsed results while following redirects""" - if data: - resp = self.client.post(url, data=data, follow_redirects=follow_redirects) - elif json_: - resp = self.client.post(url, json=json_, follow_redirects=follow_redirects) - else: - resp = self.client.get(url, follow_redirects=follow_redirects) - if raise_on_error and resp.status_code > 400: - raise Exception("http request failed with code {}".format(resp.status_code)) - return resp.data.decode("utf-8") + return get_resp(self.client, url, data, follow_redirects, raise_on_error, json_) def get_json_resp( self, url, data=None, follow_redirects=True, raise_on_error=True, json_=None diff --git a/tests/core_tests.py b/tests/core_tests.py index ade009502e79..d179ac3df43b 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -845,222 +845,6 @@ def enable_csv_upload(self, database: models.Database) -> None: form_get = self.get_resp("/csvtodatabaseview/form") self.assertIn("CSV to Database configuration", form_get) - def upload_csv( - self, filename: str, table_name: str, extra: Optional[Dict[str, str]] = None - ): - form_data = { - "csv_file": open(filename, "rb"), - "sep": ",", - "name": table_name, - "con": utils.get_example_database().id, - "if_exists": "fail", - "index_label": "test_label", - "mangle_dupe_cols": False, - } - if extra: - form_data.update(extra) - return self.get_resp("/csvtodatabaseview/form", data=form_data) - - def upload_excel( - self, filename: str, table_name: str, extra: Optional[Dict[str, str]] = None - ): - form_data = { - "excel_file": open(filename, "rb"), - "name": table_name, - "con": utils.get_example_database().id, - "sheet_name": "Sheet1", - "if_exists": "fail", - "index_label": "test_label", - "mangle_dupe_cols": False, - } - if extra: - form_data.update(extra) - return self.get_resp("/exceltodatabaseview/form", data=form_data) - - @mock.patch( - "superset.models.core.config", - {**app.config, "ALLOWED_USER_CSV_SCHEMA_FUNC": lambda d, u: ["admin_database"]}, - ) - def test_import_csv_enforced_schema(self): - if utils.get_example_database().backend == "sqlite": - # sqlite doesn't support schema / database creation - return - self.login(username="admin") - table_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) - full_table_name = f"admin_database.{table_name}" - filename = "testCSV.csv" - self.create_sample_csvfile(filename, ["a,b", "john,1", "paul,2"]) - try: - self.enable_csv_upload(utils.get_example_database()) - - # no schema specified, fail upload - resp = self.upload_csv(filename, table_name) - self.assertIn( - 'Database "examples" schema "None" is not allowed for csv uploads', resp - ) - - # user specified schema matches the expected schema, append - success_msg = f'CSV file "{filename}" uploaded to table "{full_table_name}"' - resp = self.upload_csv( - filename, - table_name, - extra={"schema": "admin_database", "if_exists": "append"}, - ) - self.assertIn(success_msg, resp) - - resp = self.upload_csv( - filename, - table_name, - extra={"schema": "admin_database", "if_exists": "replace"}, - ) - self.assertIn(success_msg, resp) - - # user specified schema doesn't match, fail - resp = self.upload_csv(filename, table_name, extra={"schema": "gold"}) - self.assertIn( - 'Database "examples" schema "gold" is not allowed for csv uploads', - resp, - ) - finally: - os.remove(filename) - - def test_import_csv_explore_database(self): - if utils.get_example_database().backend == "sqlite": - # sqlite doesn't support schema / database creation - return - explore_db_id = utils.get_example_database().id - - upload_db = utils.get_or_create_db( - "csv_explore_db", app.config["SQLALCHEMY_EXAMPLES_URI"] - ) - upload_db_id = upload_db.id - extra = upload_db.get_extra() - extra["explore_database_id"] = explore_db_id - upload_db.extra = json.dumps(extra) - db.session.commit() - - self.login(username="admin") - self.enable_csv_upload(DatasetDAO.get_database_by_id(upload_db_id)) - table_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) - - f = "testCSV.csv" - self.create_sample_csvfile(f, ["a,b", "john,1", "paul,2"]) - # initial upload with fail mode - resp = self.upload_csv(f, table_name) - self.assertIn(f'CSV file "{f}" uploaded to table "{table_name}"', resp) - table = self.get_table_by_name(table_name) - self.assertEqual(table.database_id, explore_db_id) - - # cleanup - db.session.delete(table) - db.session.delete(DatasetDAO.get_database_by_id(upload_db_id)) - db.session.commit() - os.remove(f) - - def test_import_csv(self): - self.login(username="admin") - examples_db = utils.get_example_database() - table_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) - - f1 = "testCSV.csv" - self.create_sample_csvfile(f1, ["a,b", "john,1", "paul,2"]) - f2 = "testCSV2.csv" - self.create_sample_csvfile(f2, ["b,c,d", "john,1,x", "paul,2,"]) - self.enable_csv_upload(examples_db) - - try: - success_msg_f1 = f'CSV file "{f1}" uploaded to table "{table_name}"' - - # initial upload with fail mode - resp = self.upload_csv(f1, table_name) - self.assertIn(success_msg_f1, resp) - - # upload again with fail mode; should fail - fail_msg = f'Unable to upload CSV file "{f1}" to table "{table_name}"' - resp = self.upload_csv(f1, table_name) - self.assertIn(fail_msg, resp) - - # upload again with append mode - resp = self.upload_csv(f1, table_name, extra={"if_exists": "append"}) - self.assertIn(success_msg_f1, resp) - - # upload again with replace mode - resp = self.upload_csv(f1, table_name, extra={"if_exists": "replace"}) - self.assertIn(success_msg_f1, resp) - - # try to append to table from file with different schema - resp = self.upload_csv(f2, table_name, extra={"if_exists": "append"}) - fail_msg_f2 = f'Unable to upload CSV file "{f2}" to table "{table_name}"' - self.assertIn(fail_msg_f2, resp) - - # replace table from file with different schema - resp = self.upload_csv(f2, table_name, extra={"if_exists": "replace"}) - success_msg_f2 = f'CSV file "{f2}" uploaded to table "{table_name}"' - self.assertIn(success_msg_f2, resp) - - table = self.get_table_by_name(table_name) - # make sure the new column name is reflected in the table metadata - self.assertIn("d", table.column_names) - - # null values are set - self.upload_csv( - f2, - table_name, - extra={"null_values": '["", "john"]', "if_exists": "replace"}, - ) - # make sure that john and empty string are replaced with None - engine = examples_db.get_sqla_engine() - data = engine.execute(f"SELECT * from {table_name}").fetchall() - assert data == [(None, 1, "x"), ("paul", 2, None)] - - # default null values - self.upload_csv(f2, table_name, extra={"if_exists": "replace"}) - # make sure that john and empty string are replaced with None - data = engine.execute(f"SELECT * from {table_name}").fetchall() - assert data == [("john", 1, "x"), ("paul", 2, None)] - - finally: - os.remove(f1) - os.remove(f2) - - def test_import_excel(self): - self.login(username="admin") - table_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) - f1 = "testExcel.xlsx" - self.create_sample_excelfile(f1, {"a": ["john", "paul"], "b": [1, 2]}) - self.enable_csv_upload(utils.get_example_database()) - - try: - success_msg_f1 = f'Excel file "{f1}" uploaded to table "{table_name}"' - - # initial upload with fail mode - resp = self.upload_excel(f1, table_name) - self.assertIn(success_msg_f1, resp) - - # upload again with fail mode; should fail - fail_msg = f'Unable to upload Excel file "{f1}" to table "{table_name}"' - resp = self.upload_excel(f1, table_name) - self.assertIn(fail_msg, resp) - - # upload again with append mode - resp = self.upload_excel(f1, table_name, extra={"if_exists": "append"}) - self.assertIn(success_msg_f1, resp) - - # upload again with replace mode - resp = self.upload_excel(f1, table_name, extra={"if_exists": "replace"}) - self.assertIn(success_msg_f1, resp) - - # make sure that john and empty string are replaced with None - data = ( - utils.get_example_database() - .get_sqla_engine() - .execute(f"SELECT * from {table_name}") - .fetchall() - ) - assert data == [(0, "john", 1), (1, "paul", 2)] - finally: - os.remove(f1) - def test_dataframe_timezone(self): tz = pytz.FixedOffset(60) data = [ diff --git a/tests/csv_upload_tests.py b/tests/csv_upload_tests.py new file mode 100644 index 000000000000..51b6474e6bf1 --- /dev/null +++ b/tests/csv_upload_tests.py @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# isort:skip_file +"""Unit tests for Superset CSV upload""" +import json +import logging +import os +from typing import Dict, Optional + +import random +import string +from unittest import mock + +import pandas as pd +import pytest + +from tests.test_app import app # isort:skip +from superset import db +from superset.models.core import Database +from superset.utils import core as utils +from tests.base_tests import get_resp, login, SupersetTestCase + +logger = logging.getLogger(__name__) + + +test_client = app.test_client() + +CSV_UPLOAD_DATABASE = "csv_explore_db" +CSV_FILENAME1 = "testCSV1.csv" +CSV_FILENAME2 = "testCSV2.csv" +EXCEL_FILENAME = "testExcel.xlsx" + +EXCEL_UPLOAD_TABLE = "excel_upload" +CSV_UPLOAD_TABLE = "csv_upload" +CSV_UPLOAD_TABLE_W_SCHEMA = "csv_upload_w_schema" +CSV_UPLOAD_TABLE_W_EXPLORE = "csv_upload_w_explore" + + +@pytest.fixture(scope="module") +def setup_csv_upload(): + with app.app_context(): + login(test_client, username="admin") + + upload_db = utils.get_or_create_db( + CSV_UPLOAD_DATABASE, app.config["SQLALCHEMY_EXAMPLES_URI"] + ) + extra = upload_db.get_extra() + extra["explore_database_id"] = utils.get_example_database().id + upload_db.extra = json.dumps(extra) + upload_db.allow_csv_upload = True + db.session.commit() + + yield + + upload_db = get_upload_db() + engine = upload_db.get_sqla_engine() + engine.execute(f"DROP TABLE IF EXISTS {EXCEL_UPLOAD_TABLE}") + engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE}") + engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE_W_SCHEMA}") + engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE_W_EXPLORE}") + db.session.delete(upload_db) + db.session.commit() + + +@pytest.fixture(scope="module") +def create_csv_files(): + with open(CSV_FILENAME1, "w+") as test_file: + for line in ["a,b", "john,1", "paul,2"]: + test_file.write(f"{line}\n") + + with open(CSV_FILENAME2, "w+") as test_file: + for line in ["b,c,d", "john,1,x", "paul,2,"]: + test_file.write(f"{line}\n") + yield + os.remove(CSV_FILENAME1) + os.remove(CSV_FILENAME2) + + +@pytest.fixture() +def create_excel_files(): + pd.DataFrame({"a": ["john", "paul"], "b": [1, 2]}).to_excel(EXCEL_FILENAME) + yield + os.remove(EXCEL_FILENAME) + + +def get_upload_db(): + return db.session.query(Database).filter_by(database_name=CSV_UPLOAD_DATABASE).one() + + +def upload_csv(filename: str, table_name: str, extra: Optional[Dict[str, str]] = None): + csv_upload_db_id = get_upload_db().id + form_data = { + "csv_file": open(filename, "rb"), + "sep": ",", + "name": table_name, + "con": csv_upload_db_id, + "if_exists": "fail", + "index_label": "test_label", + "mangle_dupe_cols": False, + } + if extra: + form_data.update(extra) + return get_resp(test_client, "/csvtodatabaseview/form", data=form_data) + + +def upload_excel( + filename: str, table_name: str, extra: Optional[Dict[str, str]] = None +): + form_data = { + "excel_file": open(filename, "rb"), + "name": table_name, + "con": get_upload_db().id, + "sheet_name": "Sheet1", + "if_exists": "fail", + "index_label": "test_label", + "mangle_dupe_cols": False, + } + if extra: + form_data.update(extra) + return get_resp(test_client, "/exceltodatabaseview/form", data=form_data) + + +@mock.patch( + "superset.models.core.config", + {**app.config, "ALLOWED_USER_CSV_SCHEMA_FUNC": lambda d, u: ["admin_database"]}, +) +def test_import_csv_enforced_schema(setup_csv_upload, create_csv_files): + if utils.backend() == "sqlite": + pytest.skip("Sqlite doesn't support schema / database creation") + + full_table_name = f"admin_database.{CSV_UPLOAD_TABLE_W_SCHEMA}" + + # no schema specified, fail upload + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA) + assert ( + f'Database "{CSV_UPLOAD_DATABASE}" schema "None" is not allowed for csv uploads' + in resp + ) + + # user specified schema matches the expected schema, append + success_msg = f'CSV file "{CSV_FILENAME1}" uploaded to table "{full_table_name}"' + resp = upload_csv( + CSV_FILENAME1, + CSV_UPLOAD_TABLE_W_SCHEMA, + extra={"schema": "admin_database", "if_exists": "append"}, + ) + assert success_msg in resp + resp = upload_csv( + CSV_FILENAME1, + CSV_UPLOAD_TABLE_W_SCHEMA, + extra={"schema": "admin_database", "if_exists": "replace"}, + ) + assert success_msg in resp + + # user specified schema doesn't match, fail + resp = upload_csv( + CSV_FILENAME1, CSV_UPLOAD_TABLE_W_SCHEMA, extra={"schema": "gold"} + ) + assert ( + f'Database "{CSV_UPLOAD_DATABASE}" schema "gold" is not allowed for csv uploads' + in resp + ) + + +def test_import_csv_explore_database(setup_csv_upload, create_csv_files): + if utils.backend() == "sqlite": + pytest.skip("Sqlite doesn't support schema / database creation") + + # initial upload with fail mode + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE_W_EXPLORE) + assert ( + f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE_W_EXPLORE}"' + in resp + ) + table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE_W_EXPLORE) + assert table.database_id == utils.get_example_database().id + + +def test_import_csv(setup_csv_upload, create_csv_files): + success_msg_f1 = ( + f'CSV file "{CSV_FILENAME1}" uploaded to table "{CSV_UPLOAD_TABLE}"' + ) + + # initial upload with fail mode + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) + assert success_msg_f1 in resp + + # upload again with fail mode; should fail + fail_msg = ( + f'Unable to upload CSV file "{CSV_FILENAME1}" to table "{CSV_UPLOAD_TABLE}"' + ) + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE) + assert fail_msg in resp + + # upload again with append mode + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) + assert success_msg_f1 in resp + + # upload again with replace mode + resp = upload_csv(CSV_FILENAME1, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) + assert success_msg_f1 in resp + + # try to append to table from file with different schema + resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "append"}) + fail_msg_f2 = ( + f'Unable to upload CSV file "{CSV_FILENAME2}" to table "{CSV_UPLOAD_TABLE}"' + ) + assert fail_msg_f2 in resp + + # replace table from file with different schema + resp = upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) + success_msg_f2 = ( + f'CSV file "{CSV_FILENAME2}" uploaded to table "{CSV_UPLOAD_TABLE}"' + ) + assert success_msg_f2 in resp + + table = SupersetTestCase.get_table_by_name(CSV_UPLOAD_TABLE) + # make sure the new column name is reflected in the table metadata + assert "d" in table.column_names + + # null values are set + upload_csv( + CSV_FILENAME2, + CSV_UPLOAD_TABLE, + extra={"null_values": '["", "john"]', "if_exists": "replace"}, + ) + # make sure that john and empty string are replaced with None + engine = get_upload_db().get_sqla_engine() + data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() + assert data == [(None, 1, "x"), ("paul", 2, None)] + + # default null values + upload_csv(CSV_FILENAME2, CSV_UPLOAD_TABLE, extra={"if_exists": "replace"}) + # make sure that john and empty string are replaced with None + data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall() + assert data == [("john", 1, "x"), ("paul", 2, None)] + + +def test_import_excel(setup_csv_upload, create_excel_files): + success_msg = ( + f'Excel file "{EXCEL_FILENAME}" uploaded to table "{EXCEL_UPLOAD_TABLE}"' + ) + + # initial upload with fail mode + resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) + assert success_msg in resp + + # upload again with fail mode; should fail + fail_msg = f'Unable to upload Excel file "{EXCEL_FILENAME}" to table "{EXCEL_UPLOAD_TABLE}"' + resp = upload_excel(EXCEL_FILENAME, EXCEL_UPLOAD_TABLE) + assert fail_msg in resp + + # upload again with append mode + resp = upload_excel( + EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "append"} + ) + assert success_msg in resp + + # upload again with replace mode + resp = upload_excel( + EXCEL_FILENAME, EXCEL_UPLOAD_TABLE, extra={"if_exists": "replace"} + ) + assert success_msg in resp + + # make sure that john and empty string are replaced with None + data = ( + get_upload_db() + .get_sqla_engine() + .execute(f"SELECT * from {EXCEL_UPLOAD_TABLE}") + .fetchall() + ) + assert data == [(0, "john", 1), (1, "paul", 2)]