# Setting up Docker container with test data


#### Spin up postgres Docker container from `great-expectations` repo

* `great_expectations/assets/docker/postgresql` will have a `docker-composer.yml` that can be used to spin up a test postgres container

* Use `docker-composer up` in this directory to spin up the Docker container. 


In [3]:
# This helper function will be used to load the Titanic.csv dataset into the Docker container
# that was spun up in the previous step


def load_data_into_test_database(
    table_name: str,
    csv_path: str,
    connection_string: str,
    load_full_dataset: bool = False,
) -> None:
    import pandas as pd
    import sqlalchemy as sa

    engine = sa.create_engine(connection_string)
    try:
        connection = engine.connect()
        print(f"Dropping table {table_name}")
        connection.execute(f"DROP TABLE IF EXISTS {table_name}")
        df = pd.read_csv(csv_path)
        if not load_full_dataset:
            # Improving test performance by only loading the first 10 rows of our test data into the db
            df = df.head(10)
        print(f"Creating table {table_name} from {csv_path}")
        df.to_sql(name=table_name, con=engine, index=False)
    except SQLAlchemyError as e:
        logger.error(
            f"""Docs integration tests encountered an error while loading test-data into test-database."""
        )
        raise
    finally:
        connection.close()
        engine.dispose()

In [4]:
# to the Docker container in great_expectations/Assets/docker/postgresql
CONNECTION_STRING = "postgresql+psycopg2://postgres:@localhost/test_ci"

In [5]:
load_data_into_test_database(
    table_name="titanic",
    csv_path="../data/Titanic.csv",
    connection_string=CONNECTION_STRING,
    load_full_dataset=True,
)

Dropping table titanic
Creating table titanic from ../data/Titanic.csv
