In [1]:
from dataclasses import asdict
import json
from dotenv import load_dotenv
from scripts.OpenAIService import OpenAIService
import numpy as np
from classes import Content, DialOption, Segment, SegmentType, Output, OutputField, GoalsOutput
from utils import generate_unique_id

load_dotenv()
service = OpenAIService()

In [2]:
def generateSegment(max_width, max_depth, parent_reference=None) -> Segment:
    add_depth: bool = np.random.choice([True])
    if max_depth <= 0 or not add_depth:
        return Segment(type=SegmentType.holdTheLine, contents=[Content(text=f"{parent_reference}:0", fileID=generate_unique_id())], id=generate_unique_id())
    else:
        return generateDialOptions(max_width, max_depth, parent_reference)

def generateDialOptions(max_width, max_depth, parent_reference=None) -> Segment:
    width = np.random.randint(2, max_width + 1)
    dial_options = []
    for i in range(1, width+1):
        reference = f"{parent_reference}:{i}"
        dialOption = DialOption(text=reference, index=i, nextSegment=generateSegment(max_width, max_depth - 1, reference), fileID=generate_unique_id())
        dial_options.append(dialOption)
        
    return Segment(type=SegmentType.dialOptions, contents=dial_options, id=generate_unique_id())

def generateCallTree(max_width, max_depth) -> Segment:
    root_reference = "ROOT"
    segment = Segment(type=SegmentType.welcome, contents=[Content(text=root_reference, fileID=generate_unique_id())], nextSegment=generateDialOptions(max_width, max_depth, root_reference), id=generate_unique_id())
    return segment
    
call_tree = generateCallTree(2, 2)
# save generated call tree
with open("callTree.json", "w") as f:
    json.dump(asdict(call_tree), f, indent=4)

In [39]:
EXAMPLE = Segment(
    type=SegmentType.welcome,
    contents=[Content(text="REF:0", fileID=generate_unique_id())],
    nextSegment=Segment(
        type=SegmentType.dialOptions,
        contents=[
            DialOption(index=1, text="REF:1", fileID=generate_unique_id(),  nextSegment=Segment(
                type=SegmentType.info, contents=[Content(text="REF:2", fileID=generate_unique_id())], nextSegment=Segment(
                    type=SegmentType.dialOptions, contents=[
                        DialOption(text="REF:3", fileID=generate_unique_id(), index=1, nextSegment=Segment(
                            type=SegmentType.holdTheLine, contents=[Content(text="REF:4", fileID=generate_unique_id())]
                        ) ),
                        DialOption(text="REF:5", fileID=generate_unique_id(), index=2, nextSegment=Segment(
                            type=SegmentType.holdTheLine, contents=[Content(text="REF:6", fileID=generate_unique_id())]
                        ) ),
                    ]
                )
                )),
            DialOption(index=2, text="REF:7", fileID=generate_unique_id(), nextSegment=Segment(
                type=SegmentType.holdTheLine, contents=[Content(text="REF:8", fileID=generate_unique_id())]
            )),
            DialOption(index=3, text="REF:9", fileID=generate_unique_id(), nextSegment=Segment(
                type=SegmentType.holdTheLine, contents=[Content(text="REF:10", fileID=generate_unique_id())]
            ))
            
        ]
    )
)

OUTPUT_EXAMPLE = Output(
    fields=[
        OutputField(reference="REF:42", text="Welcome to Company X customer support."),
        OutputField(reference="REF:42", text="To speak to a representative, press [index]."),
        OutputField(reference="REF:42", text="Thank you for calling the sales.com, goodbye."),
        OutputField(reference="REF:42", text="Did you know that you can also visit our website at www.companyx.com for more information about y?"),
    ]
)

# COMPANY_DESCRIPTION = "A tech support company specializing in computer and software troubleshooting for small businesses. Callers can navigate the IVR to reach hardware specialists, network engineers, or cybersecurity experts."
COMPANY_NAME = "CoInspect"
COMPANY_LOCATION = "San Francisco"
COMPANY_TAGS = "Food"
COMPANY_DESCRIPTION = "CoInspect App is an all-in-one solution that offers food safety compliance, brand standard audits, supply chain audits easily. Request a FREE Demo."

FULL_COMPANY_DESCRIPTION = '''
Name: {name}
Location: {location}
Tags: {tags}
Description: {description}'''.format(
    name=COMPANY_NAME,
    location=COMPANY_LOCATION,
    tags=COMPANY_TAGS,
    description=COMPANY_DESCRIPTION
)

SYSTEM_PROMPT = '''
You are a helpful and precise assistant.
Given a description of a company. Fill out the marked text fields of a fictive IVR call menu.
I will use your output directly to generate fictive IVR call audio with TTS, 
therefore, please make sure that the text is speakable.

Please be creative based on the company description and make sure that the text is speakable and makes sense in relation to the fictive company.
If it is a dial option, make sure to mention that they should press '[INDEX]' to choose that menu.
I will replace it with the correct index in the output, so you are only allowed to write '[INDEX]' in the text.
A dial option can never be an offer to call back, returning to the menu or visit a website, it should always be a menu option leading to a new segment.

I have already created a json structure containing the marked text fields that you should fill out in the output. 
This is marked with 'ROOT:<reference_index>':
{call_tree}

The welcome intro should be one sentence only!
The number of references in the output should match the number of references in the input.
It's very important that you fill out all the references in the output!!!
'''.format(call_tree=call_tree)

USER_PROMPT = '''
Here is the company information that the output should be specific to:
--------{company_description}
--------
'''.format(company_description=FULL_COMPANY_DESCRIPTION)
print(SYSTEM_PROMPT)

print(USER_PROMPT)


You are a helpful and precise assistant.
Given a description of a company. Fill out the marked text fields of a fictive IVR call menu.
I will use your output directly to generate fictive IVR call audio with TTS, 
therefore, please make sure that the text is speakable.

Please be creative based on the company description and make sure that the text is speakable and makes sense in relation to the fictive company.
If it is a dial option, make sure to mention that they should press '[INDEX]' to choose that menu.
I will replace it with the correct index in the output, so you are only allowed to write '[INDEX]' in the text.
A dial option can never be an offer to call back, returning to the menu or visit a website, it should always be a menu option leading to a new segment.

I have already created a json structure containing the marked text fields that you should fill out in the output. 
This is marked with 'ROOT:<reference_index>':
Segment(type=<SegmentType.welcome: 'welcome'>, contents=[Co

In [40]:
response: Output = service.parse_information(SYSTEM_PROMPT, USER_PROMPT, Output)
print(response.json())

with open("output2.json", "w") as f:
    json.dump(response.model_dump(), f, indent=4)

Tokens used: 2775
{"fields":[{"reference":"ROOT","text":"Welcome to CoInspect, your all-in-one food safety compliance, brand standards, and supply chain audit solution in San Francisco."},{"reference":"ROOT:1","text":"For a free demo of CoInspect’s food safety and audit features, press [INDEX]."},{"reference":"ROOT:2","text":"For customer support or general inquiries, press [INDEX]."},{"reference":"ROOT:1:1","text":"To schedule a live demo with our specialists, press [INDEX]."},{"reference":"ROOT:1:2","text":"To sign up for a free self-service trial, press [INDEX]."},{"reference":"ROOT:1:1:0","text":"Please hold while we connect you to schedule your live demo."},{"reference":"ROOT:1:2:0","text":"Please hold while we connect you to set up your self-service trial."},{"reference":"ROOT:2:1","text":"For technical assistance with the CoInspect platform, press [INDEX]."},{"reference":"ROOT:2:2","text":"For billing or account questions, press [INDEX]."},{"reference":"ROOT:2:1:0","text":"One m

/var/folders/wq/k5hy5l4j6pbd_js2dbk6vhfc0000gn/T/ipykernel_36095/496640878.py:2: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(response.json())


In [None]:
reference_texts = {}
for field in response.fields:
    reference_texts[field.reference] = field.text
print(reference_texts)

def get_reference_text(reference):
    result = reference_texts.get(reference)
    assert result is not None, f"Reference {reference} not found in output"
    return result

def replace_references(segment: Segment) -> Segment:
    new_contents = []
    for content in segment.contents:
        content_text:str = get_reference_text(content.text)
        if hasattr(content, "index"):
            content_text = content_text.replace("[INDEX]", str(content.index))
            new_contents.append(DialOption(index=content.index, text=content_text, nextSegment=replace_references(content.nextSegment), fileID=content.fileID))
        else:
            new_contents.append(Content(text=content_text, fileID=content.fileID))
            
    next_segment = None
    if segment.nextSegment is not None:
        next_segment = replace_references(segment.nextSegment)
        
    new_segment: Segment = Segment(type=segment.type, contents=new_contents, nextSegment=next_segment, id=segment.id)
    return new_segment

call_tree_replaced = replace_references(call_tree)
print(call_tree_replaced)

with open("callTreeReplaced.json", "w") as f:
    json.dump(asdict(call_tree_replaced), f, indent=4)

{'ROOT': 'Thank you for calling CoInspect in San Francisco, your partner in food safety compliance and audit solutions.', 'ROOT:1': 'For sales, pricing, and to request a free demo, press [INDEX].', 'ROOT:1:1': 'To request a free CoInspect demo and explore our all-in-one food safety, brand standards, and supply chain audit solution, press [INDEX].', 'ROOT:1:1:0': 'Please hold while we connect you to our demo specialist.', 'ROOT:1:2': 'To speak with a sales representative about pricing and package options, press [INDEX].', 'ROOT:1:2:0': 'Please hold while we connect you to a sales representative.', 'ROOT:2': 'For technical support, training, or billing inquiries, press [INDEX].', 'ROOT:2:1': 'For technical support with the CoInspect app, press [INDEX].', 'ROOT:2:1:0': 'Please hold while we connect you to technical support.', 'ROOT:2:2': 'For account or billing assistance, press [INDEX].', 'ROOT:2:2:0': 'Please hold while we connect you to our billing team.'}
Segment(type=<SegmentType.wel

In [6]:
# get unique routes
def get_unique_segment_sequences(segment: Segment) -> list[list[str]]:
    result = []
    
    if segment.type == SegmentType.holdTheLine:
        return [[content.text for content in segment.contents] + [segment.id]]
    elif segment.type == SegmentType.dialOptions:
        for content in segment.contents:
            sub_sequences = get_unique_segment_sequences(content.nextSegment)
            for sub_sequence in sub_sequences:
                result.append([content.text] + sub_sequence)
    elif segment.type == SegmentType.welcome:
        sub_sequences = get_unique_segment_sequences(segment.nextSegment)
        for sub_sequence in sub_sequences:
            result.append([content.text for content in segment.contents] + sub_sequence)
        
        
    return result
    
unique_sequences = get_unique_segment_sequences(call_tree_replaced)
unique_sequences_map = {}
for sequence in unique_sequences:
    unique_sequences_map[sequence[-1]] = sequence[:-1]
        
print(unique_sequences_map)

{'5289468b8cdd461facb5dbdf862352b2': ['Welcome to 23andMe South Bay, your gateway to personalized ancestry, health, and trait reports.', 'For information about your health and trait reports, press 1.', 'To learn about your health predisposition reports, press 1.', 'Please hold while we connect you with a health reports specialist.'], '0a2a539ea3294ba095fe87f88519f2c3': ['Welcome to 23andMe South Bay, your gateway to personalized ancestry, health, and trait reports.', 'For information about your health and trait reports, press 1.', 'For details on your carrier status reports, press 2.', 'Please hold while we connect you with a carrier status specialist.'], '8f6acc96b70b427ab595472b0f65b32c': ['Welcome to 23andMe South Bay, your gateway to personalized ancestry, health, and trait reports.', 'For ancestry and family history services, press 2.', 'To explore your ancestry composition report, press 1.', 'Please hold while we connect you with an ancestry specialist.'], '2de25856115341348f929e

In [None]:
PERSONAS = [
    "A busy professional who values efficiency and quick resolutions.",
    "A tech-savvy individual who likes to use technical terms.",
    "A dyslexic mother of two young children.",
    "An elderly person who may need extra assistance.",
    "A first-time caller unfamiliar with the company's services.",
    "A customer with a specific complaint or issue.",
    "An ex-military veteran seeking support.",
    "A well-formulated student of physics.",
    "A small business owner with limited funds.",
    "A refugee from Ukraine who speaks English as a second language."
]

GOAL_PROMPT = '''
I'm training an LLM agent to navigate a fictive IVR call menu. However, this specific "LLM agent" is not you.
You are a helpful and precise assistant that helps me generate fake user goals that the "LLM agent" should base its navigation on.

A real scenario use of the service I'm building will be as follows:
1. The user picks up a phone and opens the Hold The Line app.
2. The users dials a number. A chat box opens.
3. The "LLM agent" ask the user what they need help with.
4. The user respond with their intended goal.

Your job is, based on the provided persona description, to generate realistic user goals that could be used in this scenario.
The input sequences corresponds to the relevant information that the "LLM agent" would use to navigate based on the goal that you generate.

Rules:
- The goals should be specific to the unique segment sequences.
- Two goals should never overlap. The goals should be as orthogonal to each other as possible.
- The goals should always be from the perspective of the user of the Hold The Line app. Short, simple and realistic.
- The users goal formulation should be based on the persona description.
- Always generate a unique goal for each unique segment sequence in the input.
- Try to avoid using the exact formulations of the input in the goals. However, this might not always be possible.
- The output should be a list of unique goals where each goal corresponds to the unique segment sequence in the input.
'''

GOAL_USER_PROMPT = '''
Here is the persona description of the user that you should imitate:
"{persona}"

Here is the list of unique sequences that you should generate goals for:
{unique_sequences}
'''.format(persona=np.random.choice(PERSONAS), unique_sequences=unique_sequences)

print(GOAL_PROMPT)
print(GOAL_USER_PROMPT)


I'm training an LLM agent to navigate a fictive IVR call menu. However, this specific "LLM agent" is not you.
You are a helpful and precise assistant that helps me generate fake user goals that the "LLM agent" should base its navigation on.

A real scenario use of the service I'm building will be as follows:
1. The user picks up a phone and opens the Hold The Line app.
2. The users dials a number. A chat box opens.
3. The "LLM agent" ask the user what they need help with.
4. The user respond with their intended goal.

Your job is, based on the provided persona description, to generate realistic user goals that could be used in this scenario.
The input sequences corresponds to the relevant information that the "LLM agent" would use to navigate based on the goal that you generate.

Rules:
- The goals should be specific to the unique segment sequences.
- Two goals should never overlap. The goals should be as orthogonal to each other as possible.
- The goals should always be from the pers

In [33]:
goal_response: GoalsOutput = service.parse_information(GOAL_PROMPT, GOAL_USER_PROMPT, GoalsOutput)
print(goal_response.json())

# save generated goals
with open("goals.json", "w") as f:
    json.dump(goal_response.model_dump(), f, indent=4)

Tokens used: 1218
{"goals":["I have questions about my genetic health risk report and want to discuss my predisposition to certain conditions.","I’d like to review my carrier screening results to see if I’m a carrier for any inherited disorders.","I want a detailed breakdown of my ancestry composition by region.","I’m interested in using the DNA relatives feature to connect with genetic matches and build my family tree."]}


/var/folders/wq/k5hy5l4j6pbd_js2dbk6vhfc0000gn/T/ipykernel_36095/3684674894.py:2: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(goal_response.json())
