Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Small refactor for tests that allows DB setup to be done from all tests #4012

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/conftest.py
Expand Up @@ -54,7 +54,7 @@
get_sqlite_connection_url,
)
from great_expectations.util import is_library_loadable
from tests.test_utils import create_files_in_directory
from tests.test_utils import create_files_in_directory, load_data_into_test_database

yaml = YAML()
###
Expand Down
Expand Up @@ -9,9 +9,9 @@
CONNECTION_STRING = f"mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -9,9 +9,9 @@
CONNECTION_STRING = f"mssql+pyodbc://sa:ReallyStrongPwd1234%^&*@{db_hostname}:1433/test_ci?driver=ODBC Driver 17 for SQL Server&charset=utf8&autocommit=true"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -6,9 +6,9 @@
CONNECTION_STRING = "mysql+pymysql://root@localhost/test_ci"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -6,9 +6,9 @@
CONNECTION_STRING = "mysql+pymysql://root@localhost/test_ci"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -2,13 +2,12 @@

import great_expectations as ge
from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest
from tests.test_utils import load_data_into_test_database

CONNECTION_STRING = "postgresql+psycopg2://postgres:@localhost/test_ci"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -6,9 +6,9 @@
CONNECTION_STRING = "postgresql+psycopg2://postgres:@localhost/test_ci"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down
Expand Up @@ -15,9 +15,9 @@
CONNECTION_STRING = f"postgresql+psycopg2://{redshift_username}:{redshift_password}@{redshift_host}:{redshift_port}/{redshift_database}?sslmode={redshift_sslmode}"

# This utility is not for general use. It is only to support testing.
from util import load_data_into_database
from tests.test_utils import load_data_into_test_database

load_data_into_database(
load_data_into_test_database(
table_name="taxi_data",
csv_path="./data/yellow_tripdata_sample_2019-01.csv",
connection_string=CONNECTION_STRING,
Expand Down

This file was deleted.

35 changes: 18 additions & 17 deletions tests/integration/test_script_runner.py
Expand Up @@ -104,25 +104,21 @@ class BackendDependencies(enum.Enum):
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_choose_which_dataconnector_to_use.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/dataconnector_docs",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

util_script is not needed for data_connector tests

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙇🏻

},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_configure_an_inferredassetdataconnector.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/dataconnector_docs",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_configure_a_configuredassetdataconnector.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/dataconnector_docs",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_configure_a_runtimedataconnector.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/dataconnector_docs",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
},
{
"name": "rule_base_profiler_multi_batch_example",
Expand Down Expand Up @@ -193,66 +189,70 @@ class BackendDependencies(enum.Enum):
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/postgres_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.POSTGRESQL,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/postgres_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.POSTGRESQL,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/sqlite_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/sqlite/",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.SQLALCHEMY,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/sqlite_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/sqlite/",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.SQLALCHEMY,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_introspect_and_partition_your_data/sql_database/yaml_example_gradual.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/sqlite/",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.SQLALCHEMY,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/how_to_introspect_and_partition_your_data/sql_database/yaml_example_complete.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/sqlite/",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.SQLALCHEMY,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/mssql_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.MSSQL,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/mssql_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.MSSQL,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/mysql_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.MYSQL,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/mysql_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.MYSQL,
},
]
Expand All @@ -264,15 +264,15 @@ class BackendDependencies(enum.Enum):
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/snowflake_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"extra_backend_dependencies": BackendDependencies.SNOWFLAKE,
"util_script": "tests/test_utils.py",
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/snowflake_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"extra_backend_dependencies": BackendDependencies.SNOWFLAKE,
"util_script": "tests/test_utils.py",
},
]

Expand Down Expand Up @@ -343,14 +343,14 @@ class BackendDependencies(enum.Enum):
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/bigquery_yaml_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.BIGQUERY,
},
{
"user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/bigquery_python_example.py",
"data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
"data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
"util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
"util_script": "tests/test_utils.py",
"extra_backend_dependencies": BackendDependencies.BIGQUERY,
},
{
Expand Down Expand Up @@ -448,15 +448,15 @@ class BackendDependencies(enum.Enum):
# "user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/redshift_python_example.py",
# "data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
# "data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
# "util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
# "extra_backend_dependencies": BackendDependencies.REDSHIFT,
# "util_script": "tests/test_utils.py",
# },
# {
# "user_flow_script": "tests/integration/docusaurus/connecting_to_your_data/database/redshift_yaml_example.py",
# "data_context_dir": "tests/integration/fixtures/no_datasources/great_expectations",
# "data_dir": "tests/test_sets/taxi_yellow_tripdata_samples/first_3_files",
# "util_script": "tests/integration/docusaurus/connecting_to_your_data/database/util.py",
# "extra_backend_dependencies": BackendDependencies.REDSHIFT,
# "util_script": "tests/test_utils.py",
# },
]

Expand Down Expand Up @@ -613,7 +613,8 @@ def _execute_integration_test(test_configuration, tmp_path):
base_dir,
test_configuration.get("util_script"),
)
util_script_path = os.path.join(tmp_path, "util.py")
os.makedirs(os.path.join(tmp_path, "tests/"))
util_script_path = os.path.join(tmp_path, "tests/test_utils.py")
shutil.copyfile(script_source, util_script_path)

# Check initial state
Expand Down
55 changes: 55 additions & 0 deletions tests/test_utils.py
Expand Up @@ -7,6 +7,21 @@
import pandas as pd
import pytest

logger = logging.getLogger(__name__)

try:
import sqlalchemy as sa
from sqlalchemy.exc import SQLAlchemyError

except ImportError:
logger.debug(
"Unable to load SqlAlchemy context; install optional sqlalchemy dependency for support"
)
sa = None
reflection = None
Table = None
Select = None

from great_expectations.data_context.store import CheckpointStore, StoreBackend
from great_expectations.data_context.store.util import (
build_checkpoint_store_using_store_backend,
Expand Down Expand Up @@ -380,3 +395,43 @@ def delete_config_from_filesystem(
store_backend=store_backend_obj,
configuration_key=configuration_key,
)


def load_data_into_test_database(
table_name: str,
csv_path: str,
connection_string: str,
load_full_dataset: bool = False,
) -> None:
"""
Utility method that is used in loading test data into databases that can be accessed through SqlAlchemy.
This includes local Dockerized DBs like postgres, but also cloud-dbs like BigQuery and Redshift.
"""
import pandas as pd

if sa:
engine = sa.create_engine(connection_string)
else:
logger.debug(
"Attempting to load data in to tests SqlAlchemy database, but unable to load SqlAlchemy context; "
"install optional sqlalchemy dependency for support."
)
return
try:
connection = engine.connect()
print(f"Dropping table {table_name}")
connection.execute(f"DROP TABLE IF EXISTS {table_name}")
df = pd.read_csv(csv_path)
if not load_full_dataset:
# Improving test performance by only loading the first 10 rows of our test data into the db
df = df.head(10)
print(f"Creating table {table_name} from {csv_path}")
df.to_sql(name=table_name, con=engine, index=False)
except SQLAlchemyError as e:
logger.error(
f"""Docs integration tests encountered an error while loading test-data into test-database."""
)
raise
finally:
connection.close()
engine.dispose()