In [2]:
import os
import yaml
from getpass import getpass

from navigator_helpers import PiiDocsPipeline

In [3]:
llm_config = """
- model_name: gretelai-mistral-nemo-2407
  litellm_params:
    model: gretelai/gpt-mistral-nemo-2407
    api_key: os.environ/GRETEL_PROD_API_KEY
    api_base: https://api.gretel.ai
  tags:
  - open_license
  - nl
  - judge
  - code
"""

# Save the configuration to a local YAML file
llm_config_path = "llm_config.yaml"
with open(llm_config_path, "w") as file:
    yaml.dump(yaml.safe_load(llm_config), file, default_flow_style=False)

In [4]:
pii_doc_config = """
doc_lang: pii_doc
llm_suite_type: open_license

num_domains: 3
num_doctypes_per_domain: 10

entity_validation: true
"""

In [5]:
# Set the Gretel API key as an environment variable
os.environ["GRETEL_PROD_API_KEY"] = getpass("Enter your Gretel API key: ")

pipe = PiiDocsPipeline(pii_doc_config, llm_config=llm_config_path)

2024-10-17 12:35:11.991 - INFO - ⚙️ Setting up Synthetic Data Pipeline
2024-10-17 12:35:11.995 - INFO - 🦜 Initializing LLM suite
2024-10-17 12:35:11.996 - INFO - 📖 Natural language LLM: gretelai-mistral-nemo-2407
2024-10-17 12:35:11.997 - INFO - 💻 Code LLM: gretelai-mistral-nemo-2407
2024-10-17 12:35:11.998 - INFO - ⚖️ Judge LLM: gretelai-mistral-nemo-2407
2024-10-17 12:35:11.998 - INFO - 📦 Artifact path: pipeline-artifacts/pii_doc


In [6]:
contextual_tags = {
    "domain_and_doctypes": {
        "healthcare": None,
        "e-commerce": None,
        "education": None
    },
}

In [None]:
pipe.set_contextual_tags(contextual_tags)
pipe.show_contextual_tags()

In [None]:
results = pipe.run(num_samples=5, max_workers=1)

In [11]:
results.display_sample()