From f864da98e4a2c4e5d3675aad9335ad005001c24b Mon Sep 17 00:00:00 2001 From: Iain <25081046+Iain-S@users.noreply.github.com> Date: Thu, 5 Jan 2023 16:25:22 +0000 Subject: [PATCH 1/5] Add function test case --- tests/examples/dst.dump | 26 ++++++++++ tests/examples/src.dump | 108 +++++++++++++++++++++++++++++++++++++++ tests/test_functional.py | 61 ++++++++++++++++++++++ tests/tmp/.gitignore | 1 + 4 files changed, 196 insertions(+) create mode 100644 tests/examples/dst.dump create mode 100644 tests/examples/src.dump create mode 100644 tests/test_functional.py create mode 100644 tests/tmp/.gitignore diff --git a/tests/examples/dst.dump b/tests/examples/dst.dump new file mode 100644 index 00000000..8e6e6abb --- /dev/null +++ b/tests/examples/dst.dump @@ -0,0 +1,26 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 14.2 (Debian 14.2-1.pgdg110+1) +-- Dumped by pg_dump version 14.6 (Homebrew) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: dst; Type: DATABASE; Schema: -; Owner: postgres +-- + +CREATE DATABASE dst WITH TEMPLATE = template0 ENCODING = 'UTF8' LOCALE = 'en_US.utf8'; + + +ALTER DATABASE dst OWNER TO postgres; diff --git a/tests/examples/src.dump b/tests/examples/src.dump new file mode 100644 index 00000000..704a8db1 --- /dev/null +++ b/tests/examples/src.dump @@ -0,0 +1,108 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 14.2 (Debian 14.2-1.pgdg110+1) +-- Dumped by pg_dump version 14.6 (Homebrew) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: src; Type: DATABASE; Schema: -; Owner: postgres +-- + +CREATE DATABASE src WITH TEMPLATE = template0 ENCODING = 'UTF8' LOCALE = 'en_US.utf8'; + + +ALTER DATABASE src OWNER TO postgres; + +\connect src + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: hospital_visit; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.hospital_visit ( + hospital_visit_id bigint NOT NULL, + person_id integer NOT NULL, + visit_start date NOT NULL, + visit_duration_seconds real NOT NULL, + visit_image bytea NOT NULL +); + + +ALTER TABLE public.hospital_visit OWNER TO postgres; + +-- +-- Name: person; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.person ( + person_id integer NOT NULL, + name text NOT NULL, + research_opt_out boolean NOT NULL, + stored_from timestamp with time zone NOT NULL +); + + +ALTER TABLE public.person OWNER TO postgres; + +-- +-- Data for Name: hospital_visit; Type: TABLE DATA; Schema: public; Owner: postgres +-- + +COPY public.hospital_visit (hospital_visit_id, person_id, visit_start, visit_duration_seconds, visit_image) FROM stdin; +\. + + +-- +-- Data for Name: person; Type: TABLE DATA; Schema: public; Owner: postgres +-- + +COPY public.person (person_id, name, research_opt_out, stored_from) FROM stdin; +\. + + +-- +-- Name: hospital_visit hospital_visit_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.hospital_visit + ADD CONSTRAINT hospital_visit_pkey PRIMARY KEY (hospital_visit_id); + + +-- +-- Name: person person_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.person + ADD CONSTRAINT person_pkey PRIMARY KEY (person_id); + + +-- +-- PostgreSQL database dump complete +-- diff --git a/tests/test_functional.py b/tests/test_functional.py new file mode 100644 index 00000000..cfe57b5f --- /dev/null +++ b/tests/test_functional.py @@ -0,0 +1,61 @@ +"""Tests for the main module.""" +import os +from pathlib import Path +from subprocess import run +from unittest import TestCase, skipUnless + + +@skipUnless( + os.environ.get("FUNCTIONAL_TESTS") == "1", "Set 'FUNCTIONAL_TESTS=1' to enable." +) +class FunctionalTests(TestCase): + """End-to-end tests.""" + + def setUp(self) -> None: + pass + + @staticmethod + def test_workflow() -> None: + """Test the recommended CLI workflow runs without errors.""" + + # Export example databases + # pg_dump -d src -h localhost -U postgres -C > tests/examples/src.dump + # pg_dump -d dst -h localhost -U postgres -C > tests/examples/dst.dump + + # Restore databases + # psql --host localhost --username postgres --file="tests/examples/src.dump" + # psql --host localhost --username postgres --file="tests/examples/dst.dump" + + env = os.environ.copy() + env = { + **env, + "src_host_name": "localhost", + "src_user_name": "postgres", + "src_password": "password", + "src_db_name": "src", + "src_schema": "", + "dst_host_name": "localhost", + "dst_user_name": "postgres", + "dst_password": "password", + "dst_db_name": "dst", + } + + orm_file_path = str(Path("tests/tmp/orm.py")) + with open(orm_file_path, "wb") as file: + run(["sqlsynthgen", "make-tables"], stdout=file, env=env, check=True) + + ssg_file_path = str(Path("tests/tmp/ssg.py")) + with open(ssg_file_path, "wb") as file: + run( + ["sqlsynthgen", "make-generators", orm_file_path], + stdout=file, + env=env, + check=True, + ) + + run(["sqlsynthgen", "create-tables", orm_file_path], env=env, check=True) + run( + ["sqlsynthgen", "create-data", orm_file_path, ssg_file_path], + env=env, + check=True, + ) diff --git a/tests/tmp/.gitignore b/tests/tmp/.gitignore new file mode 100644 index 00000000..72e8ffc0 --- /dev/null +++ b/tests/tmp/.gitignore @@ -0,0 +1 @@ +* From 2278c48ddf56d0c5e2a19ea8469a73ed08f47736 Mon Sep 17 00:00:00 2001 From: Iain <25081046+Iain-S@users.noreply.github.com> Date: Thu, 5 Jan 2023 17:32:50 +0000 Subject: [PATCH 2/5] Run functional test in GitHub action --- .github/workflows/tests.yml | 13 ++++++++++++- tests/examples/dst.dump | 1 + tests/examples/src.dump | 1 + tests/test_functional.py | 19 ++++++++++++++++++- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 421eb704..80f8734a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,6 +12,13 @@ env: jobs: the_job: runs-on: ubuntu-latest + services: + postgres: + image: postgres:15 + env: + POSTGRES_PASSWORD: password + ports: + - 5432:5432 steps: - name: Checkout Code uses: actions/checkout@v3 @@ -40,7 +47,11 @@ jobs: if: steps.poetry-cache.outputs.cache-hit != 'true' run: | python -m poetry install + - name: Create src database + shell: bash + run: | + PGPASSWORD=password psql --host=localhost --username=postgres --file=tests/examples/src.dump - name: Run Unit Tests shell: bash run: | - poetry run python -m unittest discover --verbose tests + FUNCTIONAL_TESTS=1 poetry run python -m unittest discover --verbose tests diff --git a/tests/examples/dst.dump b/tests/examples/dst.dump index 8e6e6abb..98df5804 100644 --- a/tests/examples/dst.dump +++ b/tests/examples/dst.dump @@ -16,6 +16,7 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = off; +DROP DATABASE dst; -- -- Name: dst; Type: DATABASE; Schema: -; Owner: postgres -- diff --git a/tests/examples/src.dump b/tests/examples/src.dump index 704a8db1..4e718f79 100644 --- a/tests/examples/src.dump +++ b/tests/examples/src.dump @@ -16,6 +16,7 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = off; +DROP DATABASE src; -- -- Name: src; Type: DATABASE; Schema: -; Owner: postgres -- diff --git a/tests/test_functional.py b/tests/test_functional.py index cfe57b5f..9412bd9f 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -12,7 +12,24 @@ class FunctionalTests(TestCase): """End-to-end tests.""" def setUp(self) -> None: - pass + """Pre-test setup.""" + + env = os.environ.copy() + env = {**env, "PGPASSWORD": "password"} + # Clear and re-create the destination database + completed_process = run( + [ + "psql", + "--host=localhost", + "--username=postgres", + "--file=" + str(Path("tests/examples/dst.dump")), + ], + capture_output=True, + env=env, + check=True, + ) + # psql doesn't always return != 0 if it fails + assert completed_process.stderr == b"" @staticmethod def test_workflow() -> None: From a8e733a56278f3d43146b8344d9a2214798f0951 Mon Sep 17 00:00:00 2001 From: Iain <25081046+Iain-S@users.noreply.github.com> Date: Thu, 5 Jan 2023 17:38:29 +0000 Subject: [PATCH 3/5] Run functional test in GitHub action --- tests/examples/dst.dump | 2 +- tests/examples/src.dump | 2 +- tests/test_functional.py | 15 ++++++--------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/examples/dst.dump b/tests/examples/dst.dump index 98df5804..909e2afe 100644 --- a/tests/examples/dst.dump +++ b/tests/examples/dst.dump @@ -16,7 +16,7 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = off; -DROP DATABASE dst; +DROP DATABASE IF EXISTS dst; -- -- Name: dst; Type: DATABASE; Schema: -; Owner: postgres -- diff --git a/tests/examples/src.dump b/tests/examples/src.dump index 4e718f79..f9a5258b 100644 --- a/tests/examples/src.dump +++ b/tests/examples/src.dump @@ -16,7 +16,7 @@ SET xmloption = content; SET client_min_messages = warning; SET row_security = off; -DROP DATABASE src; +DROP DATABASE IF EXISTS src; -- -- Name: src; Type: DATABASE; Schema: -; Owner: postgres -- diff --git a/tests/test_functional.py b/tests/test_functional.py index 9412bd9f..3425875e 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -14,8 +14,12 @@ class FunctionalTests(TestCase): def setUp(self) -> None: """Pre-test setup.""" + # If you need to update src.dump or dst.dump, use + # pg_dump -d src|dst -h localhost -U postgres -C -c > tests/examples/src|dst.dump + env = os.environ.copy() env = {**env, "PGPASSWORD": "password"} + # Clear and re-create the destination database completed_process = run( [ @@ -28,21 +32,14 @@ def setUp(self) -> None: env=env, check=True, ) + # psql doesn't always return != 0 if it fails - assert completed_process.stderr == b"" + assert completed_process.stderr == b"", completed_process.stderr @staticmethod def test_workflow() -> None: """Test the recommended CLI workflow runs without errors.""" - # Export example databases - # pg_dump -d src -h localhost -U postgres -C > tests/examples/src.dump - # pg_dump -d dst -h localhost -U postgres -C > tests/examples/dst.dump - - # Restore databases - # psql --host localhost --username postgres --file="tests/examples/src.dump" - # psql --host localhost --username postgres --file="tests/examples/dst.dump" - env = os.environ.copy() env = { **env, From 2d97027346d9645ba7e26bfe8b341ef6dbf4f9b3 Mon Sep 17 00:00:00 2001 From: Iain <25081046+Iain-S@users.noreply.github.com> Date: Fri, 6 Jan 2023 10:58:08 +0000 Subject: [PATCH 4/5] Clear previous functional test outputs in setUp --- tests/test_functional.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/test_functional.py b/tests/test_functional.py index 3425875e..11e1db0b 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -11,8 +11,13 @@ class FunctionalTests(TestCase): """End-to-end tests.""" + orm_file_path = Path("tests/tmp/orm.py") + ssg_file_path = Path("tests/tmp/ssg.py") + def setUp(self) -> None: """Pre-test setup.""" + self.orm_file_path.unlink(missing_ok=True) + self.ssg_file_path.unlink(missing_ok=True) # If you need to update src.dump or dst.dump, use # pg_dump -d src|dst -h localhost -U postgres -C -c > tests/examples/src|dst.dump @@ -36,8 +41,7 @@ def setUp(self) -> None: # psql doesn't always return != 0 if it fails assert completed_process.stderr == b"", completed_process.stderr - @staticmethod - def test_workflow() -> None: + def test_workflow(self) -> None: """Test the recommended CLI workflow runs without errors.""" env = os.environ.copy() @@ -54,22 +58,20 @@ def test_workflow() -> None: "dst_db_name": "dst", } - orm_file_path = str(Path("tests/tmp/orm.py")) - with open(orm_file_path, "wb") as file: + with open(self.orm_file_path, "wb") as file: run(["sqlsynthgen", "make-tables"], stdout=file, env=env, check=True) - ssg_file_path = str(Path("tests/tmp/ssg.py")) - with open(ssg_file_path, "wb") as file: + with open(self.ssg_file_path, "wb") as file: run( - ["sqlsynthgen", "make-generators", orm_file_path], + ["sqlsynthgen", "make-generators", self.orm_file_path], stdout=file, env=env, check=True, ) - run(["sqlsynthgen", "create-tables", orm_file_path], env=env, check=True) + run(["sqlsynthgen", "create-tables", self.orm_file_path], env=env, check=True) run( - ["sqlsynthgen", "create-data", orm_file_path, ssg_file_path], + ["sqlsynthgen", "create-data", self.orm_file_path, self.ssg_file_path], env=env, check=True, ) From 4453158c1cb048250881c6f367d3fd15a8ce21eb Mon Sep 17 00:00:00 2001 From: Iain <25081046+Iain-S@users.noreply.github.com> Date: Wed, 11 Jan 2023 11:16:39 +0000 Subject: [PATCH 5/5] Add setup and testing instructions --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index a9d14f1a..c7c5fc89 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,32 @@ # sqlsynthgen Synthetic data for SQL databases + +## Development + +### Setup + +### Poetry + +1. Install [Poetry](https://python-poetry.org/docs/#installation). +1. Create a Poetry environment and install dependencies with `poetry install`. +1. Activate a Poetry shell with `poetry shell`. + We will assume that this shell is active for the remainder of the instructions. + +### Pre-Commit + +1. Install [Pre-commit](https://pre-commit.com/#install). +1. Install Pre-commit hooks with `pre-commit install --install-hooks`. + The hooks will run whenever you perform a Git commit. + +### Testing + +You can execute most unit tests by running `python -m unittest discover --verbose tests/` from the root directory. +However, the functional tests require: + +1. A local PostgreSQL server to be running with: + 1. A `postgres` user whose password is `password`. + 1. Databases named `src` and `dst`. + 1. See [tests.yml](.github/workflows/tests.yml) and [test_functional.py](tests/test_functional.py) for more. +1. The `FUNCTIONAL_TESTS` environment variable to be set to `1`. + For example, you could run `FUNCTIONAL_TESTS=1 python -m unittest discover --verbose tests/`.