Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ env:
jobs:
the_job:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:15
env:
POSTGRES_PASSWORD: password
ports:
- 5432:5432
steps:
- name: Checkout Code
uses: actions/checkout@v3
Expand Down Expand Up @@ -40,7 +47,11 @@ jobs:
if: steps.poetry-cache.outputs.cache-hit != 'true'
run: |
python -m poetry install
- name: Create src database
shell: bash
run: |
PGPASSWORD=password psql --host=localhost --username=postgres --file=tests/examples/src.dump
- name: Run Unit Tests
shell: bash
run: |
poetry run python -m unittest discover --verbose tests
FUNCTIONAL_TESTS=1 poetry run python -m unittest discover --verbose tests
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
# sqlsynthgen

Synthetic data for SQL databases

## Development

### Setup

### Poetry

1. Install [Poetry](https://python-poetry.org/docs/#installation).
1. Create a Poetry environment and install dependencies with `poetry install`.
1. Activate a Poetry shell with `poetry shell`.
We will assume that this shell is active for the remainder of the instructions.

### Pre-Commit

1. Install [Pre-commit](https://pre-commit.com/#install).
1. Install Pre-commit hooks with `pre-commit install --install-hooks`.
The hooks will run whenever you perform a Git commit.

### Testing

You can execute most unit tests by running `python -m unittest discover --verbose tests/` from the root directory.
However, the functional tests require:

1. A local PostgreSQL server to be running with:
1. A `postgres` user whose password is `password`.
1. Databases named `src` and `dst`.
1. See [tests.yml](.github/workflows/tests.yml) and [test_functional.py](tests/test_functional.py) for more.
1. The `FUNCTIONAL_TESTS` environment variable to be set to `1`.
For example, you could run `FUNCTIONAL_TESTS=1 python -m unittest discover --verbose tests/`.
27 changes: 27 additions & 0 deletions tests/examples/dst.dump
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
--
-- PostgreSQL database dump
--

-- Dumped from database version 14.2 (Debian 14.2-1.pgdg110+1)
-- Dumped by pg_dump version 14.6 (Homebrew)

SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

DROP DATABASE IF EXISTS dst;
--
-- Name: dst; Type: DATABASE; Schema: -; Owner: postgres
--

CREATE DATABASE dst WITH TEMPLATE = template0 ENCODING = 'UTF8' LOCALE = 'en_US.utf8';


ALTER DATABASE dst OWNER TO postgres;
109 changes: 109 additions & 0 deletions tests/examples/src.dump
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
--
-- PostgreSQL database dump
--

-- Dumped from database version 14.2 (Debian 14.2-1.pgdg110+1)
-- Dumped by pg_dump version 14.6 (Homebrew)

SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

DROP DATABASE IF EXISTS src;
--
-- Name: src; Type: DATABASE; Schema: -; Owner: postgres
--

CREATE DATABASE src WITH TEMPLATE = template0 ENCODING = 'UTF8' LOCALE = 'en_US.utf8';


ALTER DATABASE src OWNER TO postgres;

\connect src

SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

SET default_tablespace = '';

SET default_table_access_method = heap;

--
-- Name: hospital_visit; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.hospital_visit (
hospital_visit_id bigint NOT NULL,
person_id integer NOT NULL,
visit_start date NOT NULL,
visit_duration_seconds real NOT NULL,
visit_image bytea NOT NULL
);


ALTER TABLE public.hospital_visit OWNER TO postgres;

--
-- Name: person; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.person (
person_id integer NOT NULL,
name text NOT NULL,
research_opt_out boolean NOT NULL,
stored_from timestamp with time zone NOT NULL
);


ALTER TABLE public.person OWNER TO postgres;

--
-- Data for Name: hospital_visit; Type: TABLE DATA; Schema: public; Owner: postgres
--

COPY public.hospital_visit (hospital_visit_id, person_id, visit_start, visit_duration_seconds, visit_image) FROM stdin;
\.


--
-- Data for Name: person; Type: TABLE DATA; Schema: public; Owner: postgres
--

COPY public.person (person_id, name, research_opt_out, stored_from) FROM stdin;
\.


--
-- Name: hospital_visit hospital_visit_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres
--

ALTER TABLE ONLY public.hospital_visit
ADD CONSTRAINT hospital_visit_pkey PRIMARY KEY (hospital_visit_id);


--
-- Name: person person_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres
--

ALTER TABLE ONLY public.person
ADD CONSTRAINT person_pkey PRIMARY KEY (person_id);


--
-- PostgreSQL database dump complete
--
77 changes: 77 additions & 0 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Tests for the main module."""
import os
from pathlib import Path
from subprocess import run
from unittest import TestCase, skipUnless


@skipUnless(
os.environ.get("FUNCTIONAL_TESTS") == "1", "Set 'FUNCTIONAL_TESTS=1' to enable."
)
class FunctionalTests(TestCase):
"""End-to-end tests."""

orm_file_path = Path("tests/tmp/orm.py")
ssg_file_path = Path("tests/tmp/ssg.py")

def setUp(self) -> None:
"""Pre-test setup."""
self.orm_file_path.unlink(missing_ok=True)
self.ssg_file_path.unlink(missing_ok=True)

# If you need to update src.dump or dst.dump, use
# pg_dump -d src|dst -h localhost -U postgres -C -c > tests/examples/src|dst.dump

env = os.environ.copy()
env = {**env, "PGPASSWORD": "password"}

# Clear and re-create the destination database
completed_process = run(
[
"psql",
"--host=localhost",
"--username=postgres",
"--file=" + str(Path("tests/examples/dst.dump")),
],
capture_output=True,
env=env,
check=True,
)

# psql doesn't always return != 0 if it fails
assert completed_process.stderr == b"", completed_process.stderr

def test_workflow(self) -> None:
"""Test the recommended CLI workflow runs without errors."""

env = os.environ.copy()
env = {
**env,
"src_host_name": "localhost",
"src_user_name": "postgres",
"src_password": "password",
"src_db_name": "src",
"src_schema": "",
"dst_host_name": "localhost",
"dst_user_name": "postgres",
"dst_password": "password",
"dst_db_name": "dst",
}

with open(self.orm_file_path, "wb") as file:
run(["sqlsynthgen", "make-tables"], stdout=file, env=env, check=True)

with open(self.ssg_file_path, "wb") as file:
run(
["sqlsynthgen", "make-generators", self.orm_file_path],
stdout=file,
env=env,
check=True,
)

run(["sqlsynthgen", "create-tables", self.orm_file_path], env=env, check=True)
run(
["sqlsynthgen", "create-data", self.orm_file_path, self.ssg_file_path],
env=env,
check=True,
)
1 change: 1 addition & 0 deletions tests/tmp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*