In [1]:
from dataclasses import asdict
import json
from dotenv import load_dotenv
from scripts.OpenAIService import OpenAIService
import numpy as np
from classes import Content, DialOption, Segment, SegmentType, Output, OutputField 
from utils import generate_unique_id

load_dotenv()
service = OpenAIService()


In [None]:
def generateSegment(max_width, max_depth, parent_reference=None) -> Segment:
    add_depth: bool = np.random.choice([True, False])
    if max_depth <= 0 or not add_depth:
        return Segment(type=SegmentType.holdTheLine, contents=[Content(text=f"{parent_reference}:0", fileID=generate_unique_id())], id=generate_unique_id())
    else:
        return generateDialOptions(max_width, max_depth, parent_reference)

def generateDialOptions(max_width, max_depth, parent_reference=None) -> Segment:
    width = np.random.randint(2, max_width + 1)
    dial_options = []
    for i in range(1, width+1):
        reference = f"{parent_reference}:{i}"
        dialOption = DialOption(text=reference, index=i, nextSegment=generateSegment(max_width, max_depth - 1, reference), fileID=generate_unique_id())
        dial_options.append(dialOption)
        
    return Segment(type=SegmentType.dialOptions, contents=dial_options, id=generate_unique_id())

def generateCallTree(max_width, max_depth) -> Segment:
    root_reference = "ROOT"
    segment = Segment(type=SegmentType.welcome, contents=[Content(text=root_reference, fileID=generate_unique_id())], nextSegment=generateDialOptions(max_width, max_depth, root_reference), id=generate_unique_id())
    return segment
    
call_tree = generateCallTree(2, 2)
# save generated call tree
with open("callTree.json", "w") as f:
    json.dump(asdict(call_tree), f, indent=4)

In [3]:
EXAMPLE = Segment(
    type=SegmentType.welcome,
    contents=[Content(text="REF:0", fileID=generate_unique_id())],
    nextSegment=Segment(
        type=SegmentType.dialOptions,
        contents=[
            DialOption(index=1, text="REF:1", fileID=generate_unique_id(),  nextSegment=Segment(
                type=SegmentType.info, contents=[Content(text="REF:2", fileID=generate_unique_id())], nextSegment=Segment(
                    type=SegmentType.dialOptions, contents=[
                        DialOption(text="REF:3", fileID=generate_unique_id(), index=1, nextSegment=Segment(
                            type=SegmentType.holdTheLine, contents=[Content(text="REF:4", fileID=generate_unique_id())]
                        ) ),
                        DialOption(text="REF:5", fileID=generate_unique_id(), index=2, nextSegment=Segment(
                            type=SegmentType.holdTheLine, contents=[Content(text="REF:6", fileID=generate_unique_id())]
                        ) ),
                    ]
                )
                )),
            DialOption(index=2, text="REF:7", fileID=generate_unique_id(), nextSegment=Segment(
                type=SegmentType.holdTheLine, contents=[Content(text="REF:8", fileID=generate_unique_id())]
            )),
            DialOption(index=3, text="REF:9", fileID=generate_unique_id(), nextSegment=Segment(
                type=SegmentType.holdTheLine, contents=[Content(text="REF:10", fileID=generate_unique_id())]
            ))
            
        ]
    )
)

OUTPUT_EXAMPLE = Output(
    fields=[
        OutputField(reference="REF:42", text="Welcome to Company X customer support."),
        OutputField(reference="REF:42", text="To speak to a representative, press [index]."),
        OutputField(reference="REF:42", text="Thank you for calling the sales.com, goodbye."),
        OutputField(reference="REF:42", text="Did you know that you can also visit our website at www.companyy.com for more information about y?"),
    ]
)

COMPANY_DESCRIPTION = "A tech support company specializing in computer and software troubleshooting for small businesses. Callers can navigate the IVR to reach hardware specialists, network engineers, or cybersecurity experts."

SYSTEM_PROMPT = f'''
You are a helpful and precise assistant.
Given a description of a company. Fill out the marked text fields of a fictive IVR call menu.
I will use your output directly to generate fictive IVR call audio with TTS, 
therefore, please make sure that the text is speakable.

Please be creative based on the company description and make sure that the text is speakable and makes sense in relation to the fictive company.
If it is a dial option, make sure to mention that they should press '[INDEX]' to choose that menu.
I will replace it with the correct index in the output, so you are only allowed to write '[INDEX]' in the text.
A dial option can never be an offer to call back, returning to the menu or visit a website, it should always be a menu option leading to a new segment.

I have already created a json structure containing the marked text fields that you should fill out in the output. 
This is marked with 'REF:<reference_index>':
{call_tree}

The number of references in the output should match the number of references in the input.
It's very important that you fill out all the references in the output!!!
'''

USER_PROMPT = f'''
Here is the company description that the output should be specific to:
"{COMPANY_DESCRIPTION}"
'''
print(SYSTEM_PROMPT)

print(USER_PROMPT)


You are a helpful and precise assistant.
Given a description of a company. Fill out the marked text fields of a fictive IVR call menu.
I will use your output directly to generate fictive IVR call audio with TTS, 
therefore, please make sure that the text is speakable.

Please be creative based on the company description and make sure that the text is speakable and makes sense in relation to the fictive company.
If it is a dial option, make sure to mention that they should press '[INDEX]' to choose that menu.
I will replace it with the correct index in the output, so you are only allowed to write '[INDEX]' in the text.
A dial option can never be an offer to call back, returning to the menu or visit a website, it should always be a menu option leading to a new segment.

I have already created a json structure containing the marked text fields that you should fill out in the output. 
This is marked with 'REF:<reference_index>':
Segment(type=<SegmentType.welcome: 'welcome'>, contents=[Con

In [4]:
response: Output = service.parse_information(SYSTEM_PROMPT, USER_PROMPT, Output)
print(response.json())

with open("output2.json", "w") as f:
    json.dump(response.model_dump(), f, indent=4)

{"fields":[{"reference":"ROOT","text":"Welcome to TechSolve, your trusted partner in computer and software troubleshooting for small businesses. We are ready to help you with all your technology challenges."},{"reference":"ROOT:1","text":"For specialized technical support covering hardware, networks, and cybersecurity, please press [INDEX]."},{"reference":"ROOT:1:1","text":"If you need assistance with computer hardware and device troubleshooting, please press [INDEX] to speak with our hardware specialists."},{"reference":"ROOT:1:1:0","text":"Please hold while we connect you to a hardware specialist."},{"reference":"ROOT:1:2","text":"For support with network issues or connectivity problems, please press [INDEX] to reach our network engineers."},{"reference":"ROOT:1:2:0","text":"Please hold while we connect you to a network engineer."},{"reference":"ROOT:1:3","text":"If you require cybersecurity assistance such as virus removal or data protection, please press [INDEX] to speak with a cyb

/var/folders/wq/k5hy5l4j6pbd_js2dbk6vhfc0000gn/T/ipykernel_51339/496640878.py:2: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(response.json())


In [5]:
reference_texts = {}
for field in response.fields:
    reference_texts[field.reference] = field.text
print(reference_texts)

def get_reference_text(reference):
    result = reference_texts.get(reference)
    assert result is not None, f"Reference {reference} not found in output"
    return result

def replace_references(segment: Segment) -> Segment:
    new_contents = []
    for content in segment.contents:
        content_text:str = get_reference_text(content.text)
        if hasattr(content, "index"):
            content_text = content_text.replace("[INDEX]", str(content.index))
            new_contents.append(DialOption(index=content.index, text=content_text, nextSegment=replace_references(content.nextSegment), fileID=content.fileID))
        else:
            new_contents.append(Content(text=content_text, fileID=content.fileID))
            
    next_segment = None
    if segment.nextSegment is not None:
        next_segment = replace_references(segment.nextSegment)
        
    new_segment: Segment = Segment(type=segment.type, contents=new_contents, nextSegment=next_segment)
    return new_segment

call_tree_replaced = replace_references(call_tree)
print(call_tree_replaced)

with open("callTreeReplaced.json", "w") as f:
    json.dump(asdict(call_tree_replaced), f, indent=4)

{'ROOT': 'Welcome to TechSolve, your trusted partner in computer and software troubleshooting for small businesses. We are ready to help you with all your technology challenges.', 'ROOT:1': 'For specialized technical support covering hardware, networks, and cybersecurity, please press [INDEX].', 'ROOT:1:1': 'If you need assistance with computer hardware and device troubleshooting, please press [INDEX] to speak with our hardware specialists.', 'ROOT:1:1:0': 'Please hold while we connect you to a hardware specialist.', 'ROOT:1:2': 'For support with network issues or connectivity problems, please press [INDEX] to reach our network engineers.', 'ROOT:1:2:0': 'Please hold while we connect you to a network engineer.', 'ROOT:1:3': 'If you require cybersecurity assistance such as virus removal or data protection, please press [INDEX] to speak with a cybersecurity expert.', 'ROOT:1:3:0': 'Please hold while we connect you to a cybersecurity expert.', 'ROOT:2': 'For general inquiries, including b

In [None]:
import torch
from transformers import AutoProcessor, BarkModel
from utils import get_best_device

processor = AutoProcessor.from_pretrained("suno/bark")
device = get_best_device()
model = BarkModel.from_pretrained("suno/bark", torch_dtype=torch.float16)
# model = model.to(device)
# enable CPU offload
# model.enable_cpu_offload()

In [None]:
voice_preset = "v2/en_speaker_6"

def generate_audio(text):
    inputs = processor(text, voice_preset=voice_preset, return_tensors="pt")
    # inputs = {k: v.to(device) for k, v in inputs.items()}
    audio_array = model.generate(**inputs, semantic_max_new_tokens=100)
    audio_array = audio_array.cpu().numpy().squeeze()
    return audio_array

text = "Hello, my name is test. I am generating audio from text."

audio = generate_audio(text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:10000 for open-end generation.


In [None]:
import soundfile as sf
import numpy as np
import IPython.display as ipd
# convert to 16 bit PCM
audio2 = (audio * 22050).astype(np.int16)
sf.write("test.wav", audio2, samplerate=22050)
ipd.Audio(audio, rate=22050)  # load a NumPy array

In [None]:
def extract_text(past_text, object):
    if object == None:
        print("object is none")
        return past_text
    if isinstance(object, Segment):
        if object.type == SegmentType.dialOptions:
            
            past_text += "\n"
            for content in object.contents:
                past_text += content.text
                past_text += "\n"
            n_dial_options = len(object.contents)
            random_dial_pick_index = np.random.randint(0, n_dial_options)
            dial_option_picked = object.contents[random_dial_pick_index]
            past_text += f"[DIAL:{dial_option_picked.index}]\n\n"
            return extract_text(past_text, dial_option_picked.nextSegment)
        else:
            for content in object.contents:
                past_text += content.text
                # past_text += "\n"
            return extract_text(past_text, object.nextSegment)
    
    
print(extract_text("--------------\n", call_tree_replaced))
print("--------------")
 

object is none
--------------
Welcome to TechShield Support, your reliable partner in tech assistance for small businesses. We are dedicated to resolving your computer and software issues quickly and efficiently.
If you are calling for computer and software troubleshooting, please press 1.
If you require assistance with network or security support, please press 2.
[DIAL:1]


For hardware and device issues, please press 1.
For software installation and performance concerns, please press 2.
For operating system errors and update issues, please press 3.
[DIAL:1]

Connecting you now to a hardware specialist. Please hold for assistance.
--------------


In [None]:
response

Output(fields=[OutputField(reference='REF:0', text='Welcome to TechAid Solutions, your trusted partner in tech support for small businesses. We specialize in computer and software troubleshooting to keep your operations running smoothly.'), OutputField(reference='REF:1', text='For expert assistance with computer and software issues, please press [index].'), OutputField(reference='REF:2', text='If you are experiencing hardware glitches or connectivity problems, we can help direct you to the right specialist. Please choose from the options below.'), OutputField(reference='REF:3', text='To speak with one of our hardware specialists, please press [index].'), OutputField(reference='REF:4', text='Thank you for calling TechAid Solutions. A hardware specialist will be with you shortly.'), OutputField(reference='REF:5', text='For network engineering support, please press [index].'), OutputField(reference='REF:6', text='Thank you for calling TechAid Solutions. A network engineer will assist you 