In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pydantic import BaseModel, Field

from gretel_client.navigator_client import Gretel
from gretel_client.data_designer.judge_rubrics import TEXT_TO_PYTHON_LLM_JUDGE_PROMPT_TEMPLATE, PYTHON_RUBRICS


In [None]:
class Fruit(BaseModel):
    name: str = Field(..., description="Name of the fruit.")
    cost: float = Field(..., description="Dollar value of the fruit.")
    weight: float = Field(..., description="Weight in lbs.")
    flavor: str = Field(..., description="Primary flavor profile of the fruit.")
    preparation: str = Field(..., description="How to prepare the fruit for a fruit salad.")


class FruitSalad(BaseModel):
    total_cost: float = Field(..., description="Total cost of all fruits.")
    name: str = Field(..., description="Name of this unique fruit salad.")
    haiku: str = Field(..., description="A beautiful haiku about this fruit salad.")
    ascii_art: str = Field(..., description="A small ASCII art depiction of the fruit salad.")
    fruits: list[Fruit]

In [None]:
gretel = Gretel(endpoint='https://api.dev.gretel.ai')
dd = (gretel.data_designer.new(model_suite="apache-2.0")
    .with_person_samplers({"some_dude": {"sex": "Male", "locale": "en_GB"}})
    .add_column(name="employee_id",
                type="uuid",
                params={"prefix": "GRETEL_", "short_form": True, "uppercase": True})        
    .add_column(name="first_name", type="expression", params={"expr": "some_dude.first_name"})
    .add_column(name="last_name", type="expression", params={"expr": "some_dude.last_name"})
    .add_column(name="start_date",
                type="datetime",
                params={"start": "2020-01-01", "end": "2025-01-01"},
                convert_to="%m/%d/%Y")
    .add_column(name="end_date",
                type="datetime",
                params={"start": "2020-01-01", "end": "2025-01-01"},
                convert_to="%m/%d/%Y")
    .add_column(name="age",
                type="gaussian",
                params={"mean": 30, "stddev": 5},
                conditional_params={"pet_type == 'reptile'": {"mean": 25, "stddev": 2}},
                convert_to="int")
    .add_column(name="pet_type",
                type="category",
                params={"values": ["dog", "cat"], "weights": [0.8, 0.2]})
    .add_column(name="pet_names",
                type="subcategory",
                params={"category": "pet_type", "values": {"dog": ["fido", "spot"], "cat": ["whiskers", "mittens"]}})
    .add_column(name="instruction", 
                prompt="Write a description of python code that does sorts an array in nlong time complexity",)
    .add_column(name="code_implementation", 
                prompt="Wirte a python code that described by {instruction}",
                model_alias="code",
                data_config={"type": "code", "params": {"syntax": "python"}})
    .add_column(name="region", type="category", params={"values": ["Thailand", "France", "South Africa"]})
    .add_column(name="fruit_salad",
                prompt="Create a description of fruits to go in a regional fruit salad from {region}!",
                data_config={"type": "structured", "params": {"model": FruitSalad}})
    .add_column(name="code_validity_result", type="code-validation", code_lang="python", target_column="code_implementation")
    .add_column(name="code_judge_result", type="llm-judge", prompt=TEXT_TO_PYTHON_LLM_JUDGE_PROMPT_TEMPLATE, rubrics=PYTHON_RUBRICS)
    .add_constraint(target_column="age", type="scalar_inequality", params={"operator": "<", "rhs": 35})
    .add_constraint(target_column="start_date", type="column_inequality", params={"operator": "<", "rhs": "end_date"})
    .with_evaluation_report()
)

dd

In [None]:
dd.C.SamplerColumn(
    name="pet_type",
    type="category",
    params=dd.P.Category(values=["dog", "cat"], weights=[0.8, 0.2])
)

In [None]:
preview_result = dd.preview(verbose_logging=True)

In [None]:
preview_result.display_sample_record()

In [None]:
preview_result.dataset.df

In [None]:
# result = dd.create(num_records=100, workflow_run_name="test_new")