Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ jobs:
- name: Run Unit Tests
shell: bash
run: |
FUNCTIONAL_TESTS=1 poetry run python -m unittest discover --verbose tests
REQUIRES_DB=1 poetry run python -m unittest discover --verbose tests
5 changes: 3 additions & 2 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ignore-patterns=

# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use.
jobs=0
jobs=1

# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
Expand Down Expand Up @@ -86,7 +86,8 @@ disable=raw-checker-failed,
suppressed-message,
deprecated-pragma,
use-symbolic-message-instead,
too-few-public-methods
too-few-public-methods,
duplicate-code

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
23 changes: 23 additions & 0 deletions sqlsynthgen/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Base generator classes."""
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Any

from sqlalchemy import insert


@dataclass
class FileUploader:
"""For uploading data files."""

table: Any

def load(self, connection: Any) -> None:
"""Load the data from file."""
with Path(self.table.fullname + ".csv").open(
"r", newline="", encoding="utf-8"
) as csvfile:
reader = csv.DictReader(csvfile)
stmt = insert(self.table).values(list(reader))
connection.execute(stmt)
16 changes: 13 additions & 3 deletions sqlsynthgen/create.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Functions and classes to create and populate the target database."""
from typing import Any
from typing import Any, List

from sqlalchemy import create_engine, insert
from sqlalchemy.schema import CreateSchema
Expand All @@ -23,6 +23,16 @@ def create_db_tables(metadata: Any) -> Any:
metadata.create_all(engine)


def create_db_vocab(sorted_vocab: List[Any]) -> None:
"""Load vocabulary tables from files."""
settings = get_settings()
dst_engine = create_engine(settings.dst_postgres_dsn)

with dst_engine.connect() as dst_conn:
for vocab_table in sorted_vocab:
vocab_table.load(dst_conn)


def create_db_data(sorted_tables: list, sorted_generators: list, num_rows: int) -> None:
"""Connect to a database and populate it with data."""
settings = get_settings()
Expand All @@ -39,8 +49,8 @@ def populate(
) -> None:
"""Populate a database schema with dummy data."""

for table, generator in zip(
tables, generators
for table, generator in reversed(
list(zip(reversed(tables), reversed(generators)))
): # Run all the inserts for one table in a transaction
with dst_conn.begin():
for _ in range(num_rows):
Expand Down
9 changes: 8 additions & 1 deletion sqlsynthgen/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import typer
import yaml

from sqlsynthgen.create import create_db_data, create_db_tables
from sqlsynthgen.create import create_db_data, create_db_tables, create_db_vocab
from sqlsynthgen.make import make_generators_from_tables
from sqlsynthgen.settings import get_settings

Expand Down Expand Up @@ -45,6 +45,13 @@ def create_data(
)


@app.command()
def create_vocab(ssg_file: str = typer.Argument(...)) -> None:
"""Create tables using the SQLAlchemy file."""
ssg_module = import_file(ssg_file)
create_db_vocab(ssg_module.sorted_vocab)


@app.command()
def create_tables(orm_file: str = typer.Argument(...)) -> None:
"""Create tables using the SQLAlchemy file."""
Expand Down
50 changes: 50 additions & 0 deletions sqlsynthgen/make.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
"""Functions to make a module of generator classes."""
import csv
import inspect
from pathlib import Path
from types import ModuleType
from typing import Any, Final, Optional

from mimesis.providers.base import BaseProvider
from sqlalchemy import create_engine, select
from sqlalchemy.sql import sqltypes

from sqlsynthgen import providers
from sqlsynthgen.settings import get_settings

HEADER_TEXT: str = "\n".join(
(
'"""This file was auto-generated by sqlsynthgen but can be edited manually."""',
"from mimesis import Generic",
"from mimesis.locales import Locale",
"from sqlsynthgen.base import FileUploader",
"",
"generic = Generic(locale=Locale.EN)",
"",
Expand Down Expand Up @@ -136,13 +141,28 @@ def _add_generator_for_table(
return content, new_class_name


def _download_table(table: Any, engine: Any) -> None:
"""Download a table and store it as a .csv file"""
stmt = select([table])
with engine.connect() as conn:
result = list(conn.execute(stmt))
with Path(table.fullname + ".csv").open(
"w", newline="", encoding="utf-8"
) as csvfile:
writer = csv.writer(csvfile, delimiter=",")
writer.writerow([x.name for x in table.columns])
for row in result:
writer.writerow(row)


def make_generators_from_tables(
tables_module: ModuleType, generator_config: dict
) -> str:
"""Creates sqlsynthgen generator classes from a sqlacodegen-generated file.

Args:
tables_module: A sqlacodegen-generated module.
generator_config: Configuration to control the generator creation.

Returns:
A string that is a valid Python module, once written to file.
Expand All @@ -154,12 +174,42 @@ def make_generators_from_tables(
new_content += f"\nfrom . import {generator_module_name}"

sorted_generators = "[\n"
sorted_vocab = "[\n"

settings = get_settings()
engine = create_engine(settings.src_postgres_dsn)

for table in tables_module.Base.metadata.sorted_tables:
if table.name in [
x
for x in generator_config.get("tables", {}).keys()
if generator_config["tables"][x].get("vocabulary_table")
]:

orm_class = _orm_class_from_table_name(tables_module, table.fullname)
if not orm_class:
raise RuntimeError(f"Couldn't find {table.fullname} in {tables_module}")
class_name = orm_class.__name__
new_content += (
f"\n\n{class_name.lower()}_vocab "
f"= FileUploader({tables_module.__name__}.{class_name}.__table__)"
)
sorted_vocab += f"{INDENTATION}{class_name.lower()}_vocab,\n"

_download_table(table, engine)

continue

table_config = generator_config.get("tables", {}).get(table.name, {})
new_content, new_generator_name = _add_generator_for_table(
new_content, tables_module, table_config, table
)
sorted_generators += f"{INDENTATION}{new_generator_name},\n"

sorted_generators += "]"
sorted_vocab += "]"

new_content += "\n\n" + "sorted_generators = " + sorted_generators + "\n"
new_content += "\n\n" + "sorted_vocab = " + sorted_vocab + "\n"

return new_content
4 changes: 4 additions & 0 deletions tests/examples/basetable.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
id
1
2
3
2 changes: 1 addition & 1 deletion tests/examples/dst.dump
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

DROP DATABASE IF EXISTS dst;
DROP DATABASE IF EXISTS dst WITH (FORCE);
--
-- Name: dst; Type: DATABASE; Schema: -; Owner: postgres
--
Expand Down
11 changes: 11 additions & 0 deletions tests/examples/example_orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,14 @@ class Entity(Base):
Integer,
primary_key=True,
)


class Concept(Base):
__tablename__ = "concept"
__table_args__ = {"schema": "myschema"}

concept_id = Column(
Integer,
primary_key=True,
)
concept_name = Column(Text)
2 changes: 2 additions & 0 deletions tests/examples/expected.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id
1
8 changes: 8 additions & 0 deletions tests/examples/expected_ssg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This file was auto-generated by sqlsynthgen but can be edited manually."""
from mimesis import Generic
from mimesis.locales import Locale
from sqlsynthgen.base import FileUploader

generic = Generic(locale=Locale.EN)

Expand All @@ -16,6 +17,8 @@
import tests.examples.example_orm
from . import custom_generators

concept_vocab = FileUploader(tests.examples.example_orm.Concept.__table__)

class entityGenerator:
def __init__(self, src_db_conn, dst_db_conn):
pass
Expand Down Expand Up @@ -44,3 +47,8 @@ def __init__(self, src_db_conn, dst_db_conn):
personGenerator,
hospital_visitGenerator,
]


sorted_vocab = [
concept_vocab,
]
3 changes: 2 additions & 1 deletion tests/examples/generator_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ custom_generators_module: custom_generators
tables:
person:
num_rows_per_pass: 2
vocabulary_table: false
custom_generators:
- name: generic.person.full_name
args: null
Expand All @@ -27,3 +26,5 @@ tables:
- visit_start
- visit_end
- visit_duration_seconds
concept:
vocabulary_table: true
2 changes: 2 additions & 0 deletions tests/examples/mytable.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id
1
2 changes: 1 addition & 1 deletion tests/examples/providers.dump
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

DROP DATABASE IF EXISTS providers;
DROP DATABASE IF EXISTS providers WITH (FORCE);
--
-- Name: providers; Type: DATABASE; Schema: -; Owner: postgres
--
Expand Down
49 changes: 22 additions & 27 deletions tests/examples/src.dump
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

DROP DATABASE IF EXISTS src;
DROP DATABASE IF EXISTS src WITH (FORCE);
--
-- Name: src; Type: DATABASE; Schema: -; Owner: postgres
--
Expand Down Expand Up @@ -44,26 +44,23 @@ SET default_tablespace = '';
SET default_table_access_method = heap;

--
-- Name: hospital_visit; Type: TABLE; Schema: public; Owner: postgres
-- Name: concept; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.hospital_visit (
hospital_visit_id bigint NOT NULL,
person_id integer NOT NULL,
visit_start date NOT NULL,
visit_duration_seconds real NOT NULL,
visit_image bytea NOT NULL
CREATE TABLE public.concept (
concept_id integer NOT NULL PRIMARY KEY,
concept_name text NOT NULL
);


ALTER TABLE public.hospital_visit OWNER TO postgres;
ALTER TABLE public.concept OWNER TO postgres;

--
-- Name: person; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.person (
person_id integer NOT NULL,
person_id integer NOT NULL PRIMARY KEY,
name text NOT NULL,
research_opt_out boolean NOT NULL,
stored_from timestamp with time zone NOT NULL
Expand All @@ -73,36 +70,34 @@ CREATE TABLE public.person (
ALTER TABLE public.person OWNER TO postgres;

--
-- Data for Name: hospital_visit; Type: TABLE DATA; Schema: public; Owner: postgres
-- Name: hospital_visit; Type: TABLE; Schema: public; Owner: postgres
--

COPY public.hospital_visit (hospital_visit_id, person_id, visit_start, visit_duration_seconds, visit_image) FROM stdin;
\.
CREATE TABLE public.hospital_visit (
hospital_visit_id bigint NOT NULL PRIMARY KEY,
person_id integer NOT NULL references public.person(person_id),
visit_start date NOT NULL,
visit_duration_seconds real NOT NULL,
visit_image bytea NOT NULL,
visit_type_concept_id integer NOT NULL references public.concept(concept_id)
);

ALTER TABLE public.hospital_visit OWNER TO postgres;

--
-- Data for Name: person; Type: TABLE DATA; Schema: public; Owner: postgres
-- Data for Name: hospital_visit; Type: TABLE DATA; Schema: public; Owner: postgres
--

COPY public.person (person_id, name, research_opt_out, stored_from) FROM stdin;
COPY public.hospital_visit (hospital_visit_id, person_id, visit_start, visit_duration_seconds, visit_image) FROM stdin;
\.


--
-- Name: hospital_visit hospital_visit_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres
--

ALTER TABLE ONLY public.hospital_visit
ADD CONSTRAINT hospital_visit_pkey PRIMARY KEY (hospital_visit_id);


--
-- Name: person person_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres
-- Data for Name: person; Type: TABLE DATA; Schema: public; Owner: postgres
--

ALTER TABLE ONLY public.person
ADD CONSTRAINT person_pkey PRIMARY KEY (person_id);

COPY public.person (person_id, name, research_opt_out, stored_from) FROM stdin;
\.

--
-- PostgreSQL database dump complete
Expand Down
Loading