# MultiTable

## Quickstart

In [None]:
# End-to-end synthetics example

from gretel_trainer.relational.connectors import sqlite_conn
from gretel_trainer.relational.multi_table import MultiTable


!curl -o "ecom_xf.db" "https://gretel-blueprints-pub.s3.us-west-2.amazonaws.com/rdb/ecom_xf.db"


connector = sqlite_conn("ecom_xf.db")
relational_data = connector.extract()

model = MultiTable(relational_data)
model.train()
synthetic_tables = model.generate()

connector.save(synthetic_tables, prefix="syn_")

## Detailed walkthrough

### Set up source relational data

In [None]:
# Display the schema of our demo database

from IPython.display import Image

Image("https://gretel-blueprints-pub.s3.us-west-2.amazonaws.com/rdb/ecommerce_db.png", width=600, height=600)

In [None]:
# Download the demo database

!curl -o "ecom_xf.db" "https://gretel-blueprints-pub.s3.us-west-2.amazonaws.com/rdb/ecom_xf.db"

In [None]:
# Connect to SQLite database and extract relational data

from gretel_trainer.relational.connectors import sqlite_conn

ecommerce_db_path = "ecom_xf.db"

sqlite = sqlite_conn(path=ecommerce_db_path)
relational_data = sqlite.extract()

In [None]:
# Alternatively, manually define relational_data

from gretel_trainer.relational.core import RelationalData
import pandas as pd

csv_dir = "/path/to/extracted_csvs"
tables = ["events", "users", "distribution_center", "products", "inventory_items", "order_items"]

rel_data = RelationalData()

# rel_data.add_table(table_name, optional_primary_key_column, table_data_as_dataframe)
for table in tables:
    rel_data.add_table(table, "id", pd.read_csv(f"{csv_dir}/{table}.csv"))

# rel_data.add_foreign_key("table.column", "parent_table.referenced_column")
rel_data.add_foreign_key("events.user_id", "users.id")
rel_data.add_foreign_key("order_items.user_id", "users.id")
rel_data.add_foreign_key("order_items.inventory_item_id", "inventory_items.id")
rel_data.add_foreign_key("inventory_items.product_id", "products.id")
rel_data.add_foreign_key("inventory_items.product_distribution_center_id", "distribution_center.id")
rel_data.add_foreign_key("products.distribution_center_id", "distribution_center.id")


### Transform, train, and generate

In [None]:
from gretel_trainer.relational.multi_table import MultiTable

multitable = MultiTable(relational_data=relational_data, project_prefix="mtnb")

In [None]:
# Transform some tables

xform_out = multitable.transform(
    configs={
        "users": "https://gretel-blueprints-pub.s3.amazonaws.com/rdb/users_policy.yaml",
        "events": "https://gretel-blueprints-pub.s3.amazonaws.com/rdb/events_policy.yaml",
    }, 
    in_place=False
)

In [None]:
# Compare original to transformed

print(multitable.relational_data.get_table_data("users").head(5))
print(xform_out["users"].head(5))

In [None]:
# Train synthetic models for all tables

multitable.train()

In [None]:
# Inspect training results

multitable.train_statuses

If a subset of tables fails to train, they can be retried with modified datasets without throwing away and retraining the other tables that trained successfully.

In [None]:
original_products = multitable.relational_data.get_table_data("products")
original_inventory_items = multitable.relational_data.get_table_data("inventory_items")

modified_products = original_products.assign(sku="000")
modified_inventory_items = original_inventory_items.assign(product_sku="000")

multitable.retrain_tables({
    "products": modified_products,
    "inventory_items": modified_inventory_items,
})

In [None]:
# Generate synthetic data

synthetic_tables = multitable.generate()

In [None]:
# Inspect generation results

multitable.generate_statuses

In [None]:
# Compare original to synthetic data

for table_name, dataframe in synthetic_tables.items():
    print(table_name)
    print(multitable.relational_data.get_table_data(table_name).head(5))
    print(dataframe.head(5))

In [None]:
# Write output data to a new SQLite database

from gretel_trainer.relational.connectors import sqlite_conn

out_db_path = "out.db"

out_db = sqlite_conn(out_db_path)
out_db.save(synthetic_tables, prefix="syn_")

#### Postgres demo via Docker

In [None]:
# Start up a postgres container with docker

!docker run --rm -d --name multitable_pgdemo -e POSTGRES_PASSWORD=password -p 5432:5432 postgres

In [None]:
# Write synthetic tables to the Postgres db

from gretel_trainer.relational.connectors import postgres_conn

out_db = postgres_conn("postgres", "password", "localhost", 5432)
out_db.save(synthetic_tables)


In [None]:
# Inspect the postgres database

!docker exec multitable_pgdemo psql -U postgres -c "\dt"
!docker exec multitable_pgdemo psql -U postgres -c "select * from users limit 5;"

In [None]:
# Tear down the docker container

!docker stop multitable_pgdemo

## Cleanup / reset

In [None]:
# Delete projects from Gretel Cloud and remove working directory

import gretel_client

projects = gretel_client.projects.projects.search_projects(query="mtnb") 
for project in projects:
    project.delete()

!rm -rf working