In [None]:
#!pip install -Uqq gretel-trainer

In [None]:
from gretel_trainer import Trainer

dataset = "https://gretel-public-website.s3-us-west-2.amazonaws.com/datasets/USAdultIncome5k.csv"

# Simplest example
model = Trainer()
model.train(dataset)
model.generate()

In [None]:
# Specify underlying model and config options.
# configs can be either a string, dict, or path

from gretel_trainer.models import GretelLSTM, GretelCTGAN

model_type = GretelCTGAN(
    config="synthetics/high-dimensionality", 
    max_header_clusters=100, 
    max_rows=50000
)

# Optionally update model params from a base config
model_type.update_params({'epochs': 500})

model = Trainer(model_type=model_type)
model.train(dataset)
model.generate()

In [None]:
# Or, load and generate data from an existing model

model = Trainer.load()
model.generate(num_records=42)

In [None]:
# Train a model and conditionally generate data

import pandas as pd

DATASET_PATH = 'https://gretel-public-website.s3.amazonaws.com/datasets/mitre-synthea-health.csv'
MODEL_TYPE = [GretelLSTM(), GretelCTGAN()][1]

# Create dataset to autocomplete values for
seed_df = pd.DataFrame(data=[
    ["black", "african", "F"],
    ["black", "african", "F"],
    ["black", "african", "F"],
    ["black", "african", "F"],
    ["asian", "chinese", "F"],
    ["asian", "chinese", "F"],
    ["asian", "chinese", "F"],
    ["asian", "chinese", "F"],
    ["asian", "chinese", "F"]
], columns=["RACE", "ETHNICITY", "GENDER"])

seed_fields = seed_df.columns.values.tolist()
model = Trainer(model_type=MODEL_TYPE)
model.train(DATASET_PATH, seed_fields=seed_fields)
model.generate(seed_df=seed_df)

In [None]:
# Or, load a existing model and conditionally generate data

model = Trainer.load()
model.generate(seed_df=seed_df)