In [38]:
from gretel_client.navigator import DataDesigner

In [39]:
system_instruction="\
    You are an expert at writing realistic emails. When given information about a person's job, \
    personality, and goals, write emails that sound exactly like they would write them. Match \
    their natural communication style - whether formal or casual, direct or diplomatic. \
    Include their common phrases and speech patterns. Consider their role, industry,\
    and relationship with the recipient. Make the tone and length appropriate for their objective. Avoid generic business language unless it matches how they actually write. Your emails should be indistinguishable from ones the person would write themselves."

In [40]:
## Create our DD Instance
data_designer = DataDesigner(
    model_suite='apache-2.0',
    special_system_instructions=system_instruction,
    endpoint="https://api.gretel.cloud",
    cache="yes"
)

[19:09:47] [INFO] 🦜 Using apache-2.0 model suite
Logged in as kirit.thadaka@gretel.ai ✅


### Use Structured Outputs to make sure your data is in the right format

You can use Pydantic to define a structure for the messages that are produced by Data Designer

In [41]:
from typing import Dict, List, Literal, Optional, Union, Any
from pydantic import BaseModel, Field, model_validator

class Email(BaseModel):
    """A single email in the email chain."""
    role: Literal["business_owner", "customer"] = Field(..., description="Which role is writing the message.")
    subject: str = Field(..., description="Email subject.")
    content: str = Field(..., description="Email contents.")


class EmailConversation(BaseModel):
    """An email conversation between a two people."""
    conversation: list[Email] = Field(..., description="List of all messages in the email chain.")

### Use Seeds to define the characteristics of the email conversations

Seed your dataset with the types of characteristics you'd like to see in the email conversation

In [42]:
data_designer.add_categorical_seed_column(
    name="business_type",
    description="The type of small business the owner operates",
    values=[
        "Food & Beverage",
        "Local Retail",
        "Skilled Trade",
        "Creative Services",
        "Health & Wellness"
    ],
    subcategories=[
        {
            "name": "specific_business",
            "values": {
                "Food & Beverage": [
                    "Family-run Bakery",
                    "Local Coffee Shop",
                    "Food Truck",
                    "Small Catering Business",
                    "Specialty Food Store"
                ],
                "Local Retail": [
                    "Independent Bookstore",
                    "Local Plant Nursery",
                    "Vintage Clothing Shop",
                    "Craft Supply Store",
                    "Local Pet Supply Store"
                ],
                "Skilled Trade": [
                    "Local Plumber",
                    "Independent Electrician",
                    "Small Carpentry Shop",
                    "Auto Repair Shop",
                    "Local Landscaping Service"
                ],
                "Creative Services": [
                    "Freelance Photographer",
                    "Independent Graphic Designer",
                    "Local Wedding Planner",
                    "Custom Jewelry Maker",
                    "Independent Interior Designer"
                ],
                "Health & Wellness": [
                    "Independent Massage Therapist",
                    "Local Yoga Studio",
                    "Family Chiropractor",
                    "Personal Training Business",
                    "Small Wellness Center"
                ]
            },
            "num_new_values_to_generate": 2
        }
    ]
)

In [43]:
data_designer.add_categorical_seed_column(
    name="owner_personality",
    description="The communication style and personality of the business owner",
    values=[
        "Professional and Formal",
        "Warm and Personal",
        "Direct and Efficient",
        "Creative and Casual",
        "Patient and Educational"
    ]
)

In [44]:
data_designer.add_categorical_seed_column(
    name="customer_personality",
    description="The personality type and communication style of the customer",
    values=[
        "Polite and Clear",
        "Frustrated but Respectful",
        "Demanding and Direct",
        "Detail-oriented",
        "Casual and Friendly",
        "Confused and Seeking Help"
    ]
)

In [45]:
data_designer.add_categorical_seed_column(
    name="conversation_goal",
    description="The primary objective of the email exchange in a small business context",
    values=[
        # Scheduling & Availability
        "Appointment Scheduling",     # For services like massage, training, repairs
        "Custom Order Discussion",    # For bakeries, jewelers, craftspeople
        "Consultation Request",       # For designers, wellness providers, trades
        
        # Service-Related
        "Quote Request",             # For trades, creative services, catering
        "Service Modification",      # Change to existing booking/order
        "Availability Check",        # Stock check, service times, special items
        
        # Customer Support
        "Progress Update Request",   # For ongoing projects, custom orders
        "Last-Minute Changes",       # Emergency scheduling, order modifications
        "Service Issue Resolution",  # Quality concerns, timing issues
        
        # Business Development
        "Collaboration Inquiry",     # Local business partnerships, events
        "Special Event Planning",    # Private bookings, workshops, classes
        "Custom Project Proposal",   # For creative services, skilled trades
        
        # Customer Experience
        "Follow-up Care",           # Post-service check-ins, maintenance tips
        "Detailed Instructions",     # Care instructions, product use guidance
        "Personal Recommendations",  # Product/service suggestions based on history
        
        # Administrative
        "Payment Arrangement",       # Deposits, payment plans, invoicing
        "Business Policy Question",  # Hours, booking policies, COVID protocols
        "Local Delivery Options"     # Delivery zones, timing, special handling
    ]
)

In [46]:
data_designer.add_categorical_seed_column(
    name="conversation_tone",
    description="The overall tone of the email exchange",
    values=[
        "Professional",
        "Friendly",
        "Formal",
        "Apologetic",
        "Appreciative",
        "Resolution-focused"
    ]
)

In [47]:
data_designer.add_categorical_seed_column(
    name="urgency_level",
    description="The urgency level and timing context of the small business email exchange",
    values=[
        # Same Day/Immediate
        "Emergency Service Needed",      # Urgent repairs, last-minute catering cancelation
        "Same-Day Modification",         # Changes to today's appointments/orders
        "Time-Critical Question",        # Questions about today's service/product
        
        # Near-Term
        "This Week Required",           # Scheduling for this week, stock inquiries
        "48-Hour Response Needed",      # Quote needed within 2 days, upcoming appointment
        "Weekend Preparation",          # Planning for weekend events/services
        
        # Standard Timing
        "Regular Booking",              # Normal appointment scheduling
        "Standard Order Process",       # Typical product orders/inquiries
        "Routine Inquiry",             # General questions about services/products
        
        # Future Planning
        "Advance Booking",             # Events, large projects, seasonal orders
        "Project Planning",            # Custom work, renovations, design projects
        "Seasonal Planning",           # Holiday orders, seasonal services
        
        # Follow-up Based
        "Post-Service Check",          # After service completion
        "Maintenance Schedule",        # Regular service planning
        "Project Update"               # Progress updates on ongoing work
    ]
)

In [48]:
data_designer.add_categorical_seed_column(
    name="business_relationship",
    description="The nature of the customer-business relationship in a small business context",
    values=[
        # New Relationships
        "First-Time Local Customer",             # Local resident trying service for first time
        "Word-of-Mouth Referral",               # Referred by existing customer
        "Social Media Discovery",               # Found business through local social media
        "Local Event Introduction",             # Met at farmer's market/community event
        
        # Established Relationships
        "Weekly Regular",                       # E.g., Standing yoga class, weekly bread order
        "Monthly Service Client",               # Regular maintenance, monthly appointments
        "Seasonal Customer",                    # Holiday orders, seasonal services
        "Project-Based Client",                 # Ongoing renovation, wedding planning
        
        # Community Connections
        "Fellow Local Business Owner",          # Other business owner in community
        "Local Family Customer",                # Family with multiple service needs
        "Neighborhood Regular",                 # Lives/works nearby, frequent casual visits
        "Community Event Partner",              # Collaborated on local events
        
        # Special Circumstances
        "Multi-Generation Customer",            # Family has used business for years
        "Former Regular Returning",             # Coming back after moving/break
        "Special Needs Client",                 # Requires specific accommodations
        "Custom Order Regular"                  # Regular custom/specialized orders
    ]
)

In [49]:

data_designer.add_categorical_seed_column(
    name="email_length",
    description="Number of email exchanges in the conversation",
    values=[2, 3, 4, 5, 6]
)

In [50]:
data_designer.add_generated_data_column(
    name="business_name",
    generation_prompt=(
        "Generate a name for a small business. The business type is: {business_type}, specifically: {specific_business}"
    )
)

In [51]:
data_designer.add_generated_data_column(
    name="email_objective",
    generation_prompt=(
        "Based on this small business context:\n"
        "- Business Type: {business_type} (Specifically: {specific_business})\n"
        "- Conversation Goal: {conversation_goal}\n"
        "- Customer Type: {business_relationship}\n"
        "\n"
        "Generate ONE clear, specific reason why a customer needs to email this small business. "
        "The objective should:\n"
        "- Focus on a single, coherent need or request\n"
        "- Include relevant details that matter to the business owner\n"
        "- Be realistic for this specific type of business\n"
        "- Match the conversation goal\n"
        "\n"
        "Examples:\n"
        "- For a bakery: 'Need to order a gluten-free birthday cake for 15 people, decorated with a garden theme, required for next Saturday afternoon.'\n"
        "- For a plumber: 'Kitchen sink is backing up and making gurgling sounds. Need assessment and repair quote, preferably within next two days.'\n"
        "- For a yoga studio: 'Interested in private yoga sessions for a group of 4 beginners, looking to schedule weekly classes starting next month.'\n"
        "\n"
        "Only provide the objective, no additional context or explanation. Keep it focused on one clear purpose."
    )
)

In [52]:

# Generate an appropriate subject line
data_designer.add_generated_data_column(
    name="email_subject",
    generation_prompt=(
        "Create an email subject line based on:\n"
        "- Email Objective: {email_objective}\n"
        "- Urgency Level: {urgency_level}\n"
        "- Conversation Tone: {conversation_tone}\n"
        "\n"
        "The subject line should:\n"
        "- Be one line only\n"
        "- Reflect the urgency when needed\n"
        "- Be specific enough to be useful\n"
        "- Match how real people write email subjects to small businesses\n"
        "- Sound natural, not corporate\n"
        "\n"
        "Only provide the subject line, no quotes or additional context."
    )
)

In [53]:

# Generate the email conversation
data_designer.add_generated_data_column(
    name="email_contents",
    generation_prompt=(
        "Generate an email conversation between a small business owner and customer with these parameters:\n"
        "\n"
        "CONTEXT:\n"
        "- Business Type: {business_type} (Specifically: {specific_business})\n"
        "- Owner's Style: {owner_personality}\n"
        "- Customer's Style: {customer_personality}\n"
        "- Relationship: {business_relationship}\n"
        "- Conversation Tone: {conversation_tone}\n"
        "- Business Name: {business_name}\n"
        "\n"
        "OBJECTIVE:\n"
        "{email_objective}\n"
        "\n"
        "SUBJECT LINE:\n"
        "{email_subject}\n"
        "\n"
        "REQUIREMENTS:\n"
        "- The conversation must have {email_length} number of emails and reach a conclusion by the end of it.\n"
        "- Start with the customer's initial email\n"
        "- Each email should feel like a real small business interaction\n"
        "- Make sure you reflect the communication styles of both owner and customer throughout the email chain\n"
        "- Include natural details and follow-ups\n"
        "- End when the objective is achieved or an appropriate next step is established\n"
        "- Use realistic email formatting\n"
        "- Keep the tone authentic to a small business interaction\n"
        "- Give the business owner and customer realistic names, sign each email with the person's name.\n"
        "- Make sure the names used are consistent across the entire email chain."
    ),
    data_config={"type": "structured", "params": {"model": EmailConversation}},
    llm_type="judge"
)

In [54]:
preview = data_designer.generate_dataset_preview(verbose_logging=True)

[19:09:48] [INFO] 🚀 Generating dataset preview
[19:09:49] [INFO] 🦜 Step 1: Generate seed category values
[19:09:49] [INFO]   |   |-- ✨ Generating values for seed subcategory `specific_business` when `business_type` is Food & Beverage
[19:09:49] [INFO]   |   |-- ✨ Generating values for seed subcategory `specific_business` when `business_type` is Local Retail
[19:09:50] [INFO]   |   |-- ✨ Generating values for seed subcategory `specific_business` when `business_type` is Skilled Trade
[19:09:50] [INFO]   |   |-- ✨ Generating values for seed subcategory `specific_business` when `business_type` is Creative Services
[19:09:50] [INFO]   |   |-- ✨ Generating values for seed subcategory `specific_business` when `business_type` is Health & Wellness
[19:09:51] [INFO]   |-- Model usage: [{"model": "Qwen/Qwen2.5-7B-Instruct", "prompt_tokens": 2261, "completion_tokens": 95, "request_count": 5, "total_tokens": 2356}]
[19:09:51] [INFO] 🎲 Step 2: Sample data seeds
[19:09:51] [INFO]   |-- 🎲 Randomly sam

In [55]:
preview.display_sample_record()