# Setup

In [122]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [191]:
# Standard library imports
from pydantic import BaseModel
from copy import deepcopy
import threading
import warnings
import queue
import json
import os

warnings.filterwarnings("ignore")
JWT_SECRET_API = !echo $(aws --profile "chat-prod_ro" secretsmanager get-secret-value --secret-id "arn:aws:secretsmanager:us-west-2:242659714806:secret:shared/cresta-server-jwt_secret-VDn5My" --query SecretString --output text) # type: ignore
os.environ["JWT_SECRET_API"] = json.loads(JWT_SECRET_API[0])["jwt-secret"]
os.environ["CONFIG_SERVICE_ADDR"] = "auth.chat-prod.internal.cresta.ai:443"
os.environ["CONFIG_USE_SECURE_CHANNEL"] = "true"

# Third party imports

from sentence_transformers import SentenceTransformer, util
from retry import retry

# Greyparrot imports
from greyparrot.llm.prompting import prompts as prompts_utils
from greyparrot.conversations.db import ConversationsDBConn
from greyparrot.multi_tenancy.v3_config import V3Config
from greyparrot.conversations.utils import get_chats
from greyparrot.chats_common import PartialChat
from greyparrot.common import get_logger


logger = get_logger(__name__)

# Local imports
from llm_proxy_client import LLMProxyDevClient

In [124]:
embedder = SentenceTransformer("all-mpnet-base-v2")

In [125]:
customer_id = "brinks"
profile_id = "care-voice"
usecase_id = "care-voice"
language_code = "en-US"

In [126]:
def get_chats_with_ids(chat_ids: list[str]) -> list[PartialChat]:
    customer_name: str = V3Config.short_name_from_ids(customer_id, profile_id)
    conv_db_conn: ConversationsDBConn = ConversationsDBConn.from_customer_name(customer_name)
    chats: list[PartialChat] = conv_db_conn.get_detailed_chats(
        customer_id=customer_id,
        profile_id=profile_id,
        usecase_id=usecase_id,
        language_code=language_code,
        conversation_ids=chat_ids,
        is_dev_user=False,
    )
    return chats

In [127]:
LLM_ENGINE: str = "gpt-4o-mini"
CONCURRENCY: int = 10

In [128]:
@retry(tries=1, delay=60, backoff=2, logger=logger)
def chat_completion(**kwargs):
    return LLMProxyDevClient("openai").beta.chat.completions.parse(**kwargs)

# Discovery

In [129]:
# TODO remove this after fixing speaker_role flips
flips: dict[str, str] = {"agent": "visitor", "visitor": "agent"}

In [130]:
def chat_to_prompt_text(chat: PartialChat) -> str:
    return "\n".join([
        f"{prompts_utils.speaker_role_str_for_prompts(flips[m.speaker_role.value]).capitalize()}: {m.text}"
        for m in chat.messages
    ])

In [131]:
SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY = """### Context and data description
You are a conversation analyst working for Brinks Home Security Call Center.

You will be given 1 conversation at a time. Each conversation is between a Brinks Call Center Agent and a Customer. Your primary goal is to extract workflows of steps which the Agent takes in **the given conversation** to help resolve the Customer's needs related to their home security system and services.

The primary use case of these workflows is to create a troubleshooting template to address similar customer needs in the future.

Each workflow should be a list of steps which the Agent needs to take.

For each workflow, return:
- Product: Specific Brinks product/service involved (e.g., Security Panel, Door/Window Sensor, Motion Detector, Security Camera, Doorbell Camera, Smart Lock, Mobile App, Alarm.com Account)
- Issue: Specific customer problem (e.g., False Alarms, Device Offline, Camera Not Recording, App Login Issues, Billing Questions, Account Changes)
- Steps: Detailed troubleshooting or resolution steps the Agent follows

Make sure the product is specific to Brinks' security equipment and services, not general categories.
Make sure the issue clearly describes the exact problem the customer is experiencing.
Make sure the steps are detailed and follow Brinks' standard operating procedures.
Make sure to only return the agent troubleshooting steps, not the customer's requests or other information.

Common scenarios include:
- Security system troubleshooting (panel issues, sensor malfunctions, connectivity problems)
- Camera and video recording issues
- Account management (billing, autopay, contact updates)
- Mobile app and Alarm.com portal assistance
- Service changes (moving, upgrading, cancellation)
- Installation and maintenance appointments

**Important**: There could be more than 1 workflow in a single conversation. There could also be no workflows in a single conversation. The workflows will be used to create troubleshooting guides to address similar customer needs in the future."""

In [132]:
class Flow(BaseModel):
    product: str
    issue: str
    steps: list[str]

class Flows(BaseModel):
    flows: list[Flow]

In [133]:
def discover_flow_in_chat(chat: PartialChat,
                          llm_engine: str = LLM_ENGINE) -> list[Flow]:
    logger.info(f"Discovering Agent flow in chat {chat.chat_name}")
    messages: list[dict[str, str]] = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY
        },
        {
            "role": "user",
            "content": chat_to_prompt_text(chat)
        },
    ]

    return chat_completion(model=llm_engine,
                            messages=messages,
                            response_format=Flows).choices[0].message.parsed.flows

In [134]:
def extract_flows_from_chats(chats: list[PartialChat],
                                concurrency: int=10) -> dict[str, list[Flow]]:
    lock: threading.Lock = threading.Lock()
    indexes: queue.Queue = queue.Queue()

    workflows: dict[str, list[Flow]] = {}
    for idx in range(len(chats)):
        indexes.put(idx)

    def workflow_labeler_worker():
        while True:
            try:
                idx = indexes.get(block=False)
            except queue.Empty:
                return
            chat = chats[idx]
            try:
                extracted_workflows: list[Flow] = discover_flow_in_chat(chat)
                with lock:
                    workflows[str(chat)] = extracted_workflows
                    if len(workflows) % 10 == 0:
                        print(f"Workflows from {len(workflows)} chats extracted!")
            except Exception as e:
                logger.warning(e, str(chat))
            indexes.task_done()

    logger.info(
        f"Starting processing {len(chats)} chats with {concurrency} workers")
    workers = [
        threading.Thread(target=workflow_labeler_worker)
        for _ in range(concurrency)
    ]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()
    logger.info(f"Finished processing all {len(chats)} chats")

    return workflows

In [135]:
test_chat: PartialChat = get_chats_with_ids(["0843c54c-6487-45ce-946a-cc6257484f54"])[0]
workflows: list[Flow] = discover_flow_in_chat(test_chat)



cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice-FKrGHU -r
cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice -r


{"message": "Discovering Agent flow in chat 0843c54c-6487-45ce-946a-cc6257484f54", "name": "__main__", "asctime": "2025-02-13 23:06:11", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "MainThread", "status": "INFO"}


In [136]:
print(chat_to_prompt_text(test_chat))

Customer: Technical support
Agent: Thank you for being the best part of Brinks Home. My name is Juan. Who do I have the pleasure of speaking with?
Customer: Debbie. Deborah Terryya.
Agent: Good morning, missus Deborah. How can I assist you?
Customer: Okay. On December 16th, I had Brinks installed. I had one. Camera put on my front door, and I had one put on the tireport, and then I went out of town for a couple weeks. And only my carport has been working since December 6th eight. And I clicked on the troubleshoot for my front door, and it just keeps coming up that there's a problem. I mean, I did I did everything it told me to do. I unplugged the power cord, and told me to wait 5 minutes. And it was trying to find what the problem was. And it is still not responding my front door.
Agent: The front door. The other
Customer: Which would be by my court
Agent: Okay. Gotcha. Let me get into the account real quick. And let me just double check what is happening to that sensor.
Customer: Yeah

In [137]:
print(f"Found {len(workflows)} workflows\n")
for i, flow in enumerate(workflows):
    print(f"[Agent Workflow #{i + 1}]")
    print(f"Product: {flow.product}")
    print(f"Issue: {flow.issue}")
    print("Steps:", end="\n* ")
    print("\n* ".join(flow.steps))
    if i < len(workflows) - 1:
        print("-" * 100)

Found 1 workflows

[Agent Workflow #1]
Product: Security Camera
Issue: Camera Not Recording / Device Offline
Steps:
* Verify customer account details including phone number, address, and verbal password.
* Instruct customer to access front door camera and report LED color.
* Determine if there is no LED light present on the front door camera.
* Guide customer to unplug the power supply for the front door camera and wait for 30 seconds to 1 minute before reconnecting it.
* Instruct customer to reconnect the power supply for the front door camera and check the LED indicator again.
* If the LED is still off, instruct customer to test the power outlet with another device to confirm if it is supplying power.
* Determine that the power outlet is functioning based on the customer's checks (i.e., using a phone charger).
* Identify the power supply for the camera is faulty since no power is reaching the camera.
* Provide customer with the contact number for the technician to resolve the issue (

# 200 Recent Chats

In [138]:
data_set = get_chats(
    'brinks-care-voice', 
    '2024-06-01',
    '2024-12-01',
    200,)

{"message": "Reading from cache_chats-6cdad05445414399627e4b4e59113f84.pkl", "name": "cache-utils", "asctime": "2025-02-13 23:06:14", "levelname": "INFO", "filename": "cache.py", "funcName": "wrapper", "threadName": "MainThread", "status": "INFO"}
{"message": "Loading dataset from cache_chats-6cdad05445414399627e4b4e59113f84.pkl", "name": "dataset-common", "asctime": "2025-02-13 23:06:14", "levelname": "INFO", "filename": "common.py", "funcName": "load", "threadName": "MainThread", "status": "INFO"}


{"message": "Loading dataset succeeded!", "name": "dataset-common", "asctime": "2025-02-13 23:06:15", "levelname": "INFO", "filename": "common.py", "funcName": "load", "threadName": "MainThread", "status": "INFO"}


In [139]:
sampled_chat_ids = list(set([
    chat.chat_name
    for chat in (item if not isinstance(item, tuple) else item[0] for item in data_set)
    if hasattr(chat, 'chat_name')
]))
print(len(sampled_chat_ids))

200


# Agent Workflow Discovery

In [140]:
sampled_chats = get_chats_with_ids(sampled_chat_ids)



cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice-FKrGHU -r
cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice -r


In [141]:
workflows = extract_flows_from_chats(sampled_chats)

{"message": "Starting processing 199 chats with 10 workers", "name": "__main__", "asctime": "2025-02-13 23:06:19", "levelname": "INFO", "filename": "<ipython-input-134-e96ef876503f>", "funcName": "extract_flows_from_chats", "threadName": "MainThread", "status": "INFO"}
{"message": "Discovering Agent flow in chat 000c68d2-6c21-4ddf-ab75-fb32b2ce0fc8", "name": "__main__", "asctime": "2025-02-13 23:06:19", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0017ec90-12c7-44dc-932c-f10b38ce0b9a", "name": "__main__", "asctime": "2025-02-13 23:06:19", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-224 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0039e60e-247f-4a8d-80d1-18d380344dce", "name": "__main__

{"message": "Discovering Agent flow in chat 01086912-66f0-4cdc-b38b-9941fedaca8d", "name": "__main__", "asctime": "2025-02-13 23:06:20", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 01124bf9-4f07-4bf1-a86d-30c4fc6278d6", "name": "__main__", "asctime": "2025-02-13 23:06:21", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 01532d84-1f4b-407f-84aa-54a29374b496", "name": "__main__", "asctime": "2025-02-13 23:06:21", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0167a38e-2c

Workflows from 10 chats extracted!


{"message": "Discovering Agent flow in chat 0297c7d2-a6a2-4559-944a-4727df5b9b6f", "name": "__main__", "asctime": "2025-02-13 23:06:24", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-231 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 034f5b00-e216-401d-8bd1-67a6914ee4e5", "name": "__main__", "asctime": "2025-02-13 23:06:24", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 037b2032-8cff-4f06-9af9-c6f35c789ee8", "name": "__main__", "asctime": "2025-02-13 23:06:25", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 03af99f2-aa

Workflows from 20 chats extracted!


{"message": "Discovering Agent flow in chat 044696fe-ba32-4ce6-a705-c25ce7a923ff", "name": "__main__", "asctime": "2025-02-13 23:06:26", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 04758b07-a186-42e1-b88d-13b644c905cc", "name": "__main__", "asctime": "2025-02-13 23:06:26", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 04c6199a-0ec4-4c9e-b4e0-0c448bdffb6e", "name": "__main__", "asctime": "2025-02-13 23:06:27", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 04d8ed2b-e6

Workflows from 30 chats extracted!


{"message": "Discovering Agent flow in chat 05cf80cf-6e4f-46ce-9edd-5e9018674062", "name": "__main__", "asctime": "2025-02-13 23:06:29", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-228 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05e95ce9-95a5-4b36-8694-05ba68d2a950", "name": "__main__", "asctime": "2025-02-13 23:06:29", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-224 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05eedf8d-8f6e-49e0-a793-1203aef1dd4e", "name": "__main__", "asctime": "2025-02-13 23:06:30", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05f3239c-59

Workflows from 40 chats extracted!


{"message": "Discovering Agent flow in chat 071c6c6d-4294-4f0e-9581-ad621e1871fc", "name": "__main__", "asctime": "2025-02-13 23:06:32", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-229 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 073a30a5-2e0d-4b55-8658-5b704954a90b", "name": "__main__", "asctime": "2025-02-13 23:06:32", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 074c00d3-1ca4-4b2d-98ca-6b401a69f66a", "name": "__main__", "asctime": "2025-02-13 23:06:33", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 081008a0-d8

Workflows from 50 chats extracted!


{"message": "Discovering Agent flow in chat 096c8148-f2c5-4d32-96e7-80e6e3610740", "name": "__main__", "asctime": "2025-02-13 23:06:34", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-227 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 096d88a3-edbe-4a62-acb5-205724910f54", "name": "__main__", "asctime": "2025-02-13 23:06:34", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 09896ad0-ed6d-41e8-9bc0-22c0633a38a9", "name": "__main__", "asctime": "2025-02-13 23:06:35", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 09c7e94d-b2

Workflows from 60 chats extracted!


{"message": "Discovering Agent flow in chat 0ad0f9fd-9d59-4446-9c85-d887a208d532", "name": "__main__", "asctime": "2025-02-13 23:06:36", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-227 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0adb8781-21a8-44f2-b55e-bfa532d6fb28", "name": "__main__", "asctime": "2025-02-13 23:06:37", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0ade3b59-78ec-4429-b533-b6ffad4eaf1f", "name": "__main__", "asctime": "2025-02-13 23:06:37", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0ae9aff6-6d

Workflows from 70 chats extracted!


{"message": "Discovering Agent flow in chat 0b43df94-92e9-47dd-9662-69ee9e71eaa0", "name": "__main__", "asctime": "2025-02-13 23:06:38", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0b597f44-f2b8-48fa-b2d0-3ec09df0da23", "name": "__main__", "asctime": "2025-02-13 23:06:39", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0b5d1101-12bd-4e85-8882-ab0ee9726e60", "name": "__main__", "asctime": "2025-02-13 23:06:39", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0becb6e2-b2

Workflows from 80 chats extracted!


{"message": "Discovering Agent flow in chat 0d143a65-8721-48f5-bb61-cc82ab8d85e4", "name": "__main__", "asctime": "2025-02-13 23:06:41", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d2acb25-43d6-4981-8b75-4f80a2e7f4c3", "name": "__main__", "asctime": "2025-02-13 23:06:41", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d2dfdb2-3f23-4b28-ad9f-a6d2ef4aeb36", "name": "__main__", "asctime": "2025-02-13 23:06:41", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d48ae17-2d

Workflows from 90 chats extracted!


{"message": "Discovering Agent flow in chat 0e3c8794-f10b-4b90-ae0e-0f963f428790", "name": "__main__", "asctime": "2025-02-13 23:06:44", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e3e206d-4abc-47a8-b7f7-fffd5603a90f", "name": "__main__", "asctime": "2025-02-13 23:06:44", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e536744-ac0c-4712-bb80-7183fb5d1c56", "name": "__main__", "asctime": "2025-02-13 23:06:44", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e759e20-11

Workflows from 100 chats extracted!


{"message": "Discovering Agent flow in chat 0f311baa-f57d-4034-ab10-f18d0c39ea7e", "name": "__main__", "asctime": "2025-02-13 23:06:46", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-228 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f343b4f-8a53-47a8-a259-85dc4f7e4fbc", "name": "__main__", "asctime": "2025-02-13 23:06:46", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f3db87a-0d0c-4622-8d4b-8d8bd98ecc4d", "name": "__main__", "asctime": "2025-02-13 23:06:46", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f71b41f-b4

Workflows from 110 chats extracted!


{"message": "Discovering Agent flow in chat 0fe55e1f-eb48-4ada-800d-906f043c1a45", "name": "__main__", "asctime": "2025-02-13 23:06:49", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-230 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0feba987-84d7-4c44-8a55-8ae2f8ae144c", "name": "__main__", "asctime": "2025-02-13 23:06:49", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 101c2671-b555-4353-8b81-4905ea10ba04", "name": "__main__", "asctime": "2025-02-13 23:06:49", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-231 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 102afab8-8e

Workflows from 120 chats extracted!


{"message": "Discovering Agent flow in chat 118edf23-2285-4dc5-8f63-5d8bf2e1bab1", "name": "__main__", "asctime": "2025-02-13 23:06:51", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 11de0e64-d791-408d-97e1-57940f9c80e1", "name": "__main__", "asctime": "2025-02-13 23:06:51", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 11e32ae8-5a81-4497-a1d5-2ef8acf2f581", "name": "__main__", "asctime": "2025-02-13 23:06:52", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1203f20d-13

Workflows from 130 chats extracted!


{"message": "Discovering Agent flow in chat 136bdd10-1b56-4e84-8787-4bd2b54572ad", "name": "__main__", "asctime": "2025-02-13 23:06:54", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 137b7ca8-e316-41c1-b932-1d4831564e6b", "name": "__main__", "asctime": "2025-02-13 23:06:54", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-227 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 139404a3-d45a-4d69-b0ab-7409f32ac216", "name": "__main__", "asctime": "2025-02-13 23:06:54", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-229 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 13960ccc-3a

Workflows from 140 chats extracted!


{"message": "Discovering Agent flow in chat 14102315-6189-4a15-94c2-3ee33be3dba6", "name": "__main__", "asctime": "2025-02-13 23:06:55", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1449dd9d-3137-45d0-895e-300d9657e223", "name": "__main__", "asctime": "2025-02-13 23:06:55", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14535af2-554b-4d0a-9aed-562c37ca2f5d", "name": "__main__", "asctime": "2025-02-13 23:06:56", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-231 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14544e26-60

Workflows from 150 chats extracted!


{"message": "Discovering Agent flow in chat 14b4b61e-2394-4850-9698-a4a117c4380e", "name": "__main__", "asctime": "2025-02-13 23:06:57", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14dab401-786b-43d6-adaa-c3c8ad5107bc", "name": "__main__", "asctime": "2025-02-13 23:06:58", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-229 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14e47b7b-7284-49de-8f11-dbcea6e688a4", "name": "__main__", "asctime": "2025-02-13 23:06:58", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-225 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1512e71c-00

Workflows from 160 chats extracted!


{"message": "Discovering Agent flow in chat 15d5bbab-2bf6-4d46-a685-9254945f7705", "name": "__main__", "asctime": "2025-02-13 23:06:59", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-227 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 15f0c3bc-64b4-4f77-9010-4073df40c41f", "name": "__main__", "asctime": "2025-02-13 23:07:00", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-231 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 160939cf-9a8f-4383-9799-e1631115d33f", "name": "__main__", "asctime": "2025-02-13 23:07:00", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-228 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 16145eaf-c1

Workflows from 170 chats extracted!


{"message": "Discovering Agent flow in chat 1769dc8b-7c1d-43cc-ad42-055239339176", "name": "__main__", "asctime": "2025-02-13 23:07:02", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-228 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 176bdf21-0a06-490f-ad78-d506636940cc", "name": "__main__", "asctime": "2025-02-13 23:07:02", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-229 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 17819caa-4212-4fc3-bf14-26a2fdcdb719", "name": "__main__", "asctime": "2025-02-13 23:07:02", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 17bf716e-01

Workflows from 180 chats extracted!


{"message": "Discovering Agent flow in chat 1919f981-b0dc-4584-ab49-364bbc943425", "name": "__main__", "asctime": "2025-02-13 23:07:05", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-226 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 195f3467-6a11-47cb-a831-45ff25c30f92", "name": "__main__", "asctime": "2025-02-13 23:07:05", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-232 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 19747617-8b80-44a3-b8a5-4fd05510c6de", "name": "__main__", "asctime": "2025-02-13 23:07:06", "levelname": "INFO", "filename": "<ipython-input-133-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-223 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 199d729d-92

Workflows from 190 chats extracted!


{"message": "Finished processing all 199 chats", "name": "__main__", "asctime": "2025-02-13 23:07:11", "levelname": "INFO", "filename": "<ipython-input-134-e96ef876503f>", "funcName": "extract_flows_from_chats", "threadName": "MainThread", "status": "INFO"}


In [144]:
sum([len(w) for w in workflows.values()])

277

In [145]:
all_workflows = []
for w in workflows.values():
    all_workflows.extend(w)

# Grouping Workflows

In [148]:
def group_workflows_by_product(workflows: list[Flow], embedder: SentenceTransformer, semantic_threshold: float) -> dict[str, list[Flow]]:
    
    products = [w.product for w in workflows]
    embeddings = embedder.encode(products, convert_to_tensor=True)
        
    product_groups, solo_products = [], []
    for idx, _ in enumerate(products):
        existing_groups = [group for group in product_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in product_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            product_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_products.append(idx)

    print(f"Found {len(solo_products)} # of Solo Products.")
    print(f"Found {len(product_groups)} # Groups of Products.")

    # Create mapping of indices to product names
    idx_to_product = {i: p for i, p in enumerate(products)}
    
    retval = {}

    for group in product_groups:
        group_key = tuple(set([idx_to_product[idx] for idx in group]))
        retval[group_key] = [workflows[idx] for idx in group]

    for idx in solo_products:
        retval[(workflows[idx].product,)] = [workflows[idx]]

    return retval

In [149]:
def group_workflows_by_issues(workflows: list[Flow], embedder: SentenceTransformer, semantic_threshold: float) -> dict[tuple[str], list[Flow]]:
    
    issues = [w.issue for w in workflows]
    embeddings = embedder.encode(issues, convert_to_tensor=True)
        
    issue_groups, solo_issues = [], []
    for idx, _ in enumerate(issues):
        existing_groups = [group for group in issue_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in issue_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            issue_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_issues.append(idx)

    print(f"Found {len(solo_issues)} # of Solo Issues.")
    print(f"Found {len(issue_groups)} # Groups of Issues.")

    # Create mapping of indices to product names
    idx_to_issue = {i: p for i, p in enumerate(issues)}
    
    retval = {}

    for group in issue_groups:
        group_key = tuple(set([idx_to_issue[idx] for idx in group]))
        retval[group_key] = [workflows[idx] for idx in group]

    for idx in solo_issues:
        retval[(workflows[idx].issue,)] = [workflows[idx]]

    return retval

In [194]:
tree = group_workflows_by_product(all_workflows, embedder, semantic_threshold=0.7)

Found 33 # of Solo Products.
Found 18 # Groups of Products.


In [195]:
for product in tree.keys():
    tree[product] = group_workflows_by_issues(tree[product], embedder, semantic_threshold=0.7)

Found 11 # of Solo Issues.
Found 2 # Groups of Issues.
Found 1 # of Solo Issues.
Found 1 # Groups of Issues.
Found 4 # of Solo Issues.
Found 1 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 5 # of Solo Issues.
Found 1 # Groups of Issues.
Found 2 # of Solo Issues.
Found 1 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 5 # of Solo Issues.
Found 0 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 2 # of Solo Issues.
Found 2 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 27 # of Solo Issues.
Found 4 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 20 # of Solo Issues.
Found 3 # Groups of Issues.
Found 13 # of Solo Issues.
Found 5 # Groups of Issues.
Found 24 # of Solo Issues.
Found 5 # Groups of Issues.
Found 8 # of Solo Issues.
Found 1 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 1 # of Solo Issue

In [196]:
leafs = 0
for product in tree.keys():
    for issue in tree[product].keys():
        leafs += 1
print(f"Found {leafs} leafs in the tree")

Found 191 leafs in the tree


In [None]:
"""
{
    "camera": [camera_flow1, camera_flow2, camera_flow3],
    "alarm": [alarm_flow4, alarm_flow5, alarm_flow6],
    "TV": [tv_flow1]
}
"""

"""
{
    "camera": {
        "issue1": [camera_issue1_flow1, camera_issue1_flow2, camera_issue1_flow3],
        "issue2": [camera_issue2_flow1, camera_issue2_flow2, camera_issue2_flow3],
    },
    "alarm": {
        "issue1": [alarm_issue1_flow1, alarm_issue1_flow2, alarm_issue1_flow3],
        "issue2": [alarm_issue2_flow1, alarm_issue2_flow2, alarm_issue2_flow3],
    },
}
"""

"""
{
    "camera": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
    "alarm": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
}
"""

# Generate Troubleshooting Guide for each leaf in the tree

In [197]:
class TroubleshootingGuide(BaseModel):
    steps: list[str]

In [198]:
def concatenate_flows(flows: list[Flow]) -> str:
    conversations = []
    for i, flow in enumerate(flows):
        conversation = f"Conversation {i+1}:\n" + "\n".join(f"* {step}" for step in flow.steps)
        conversations.append(conversation)
    return "\n\n".join(conversations)


In [199]:
GENERATE_TROUBLESHOOTING_GUIDE_PROMPT = """\
You are a conversation analyst working for Brinks Home Security Call Center.

You will be given the troubleshooting steps taken by a call center agent of 1 or more conversations at a time. Your primary goal is to generate an overall troubleshooting guide for the given product and issue.

The troubleshooting guide must consider all scenarios and steps from the given conversations. Do not produce any steps that are not present in the given conversations. Only produce an overall troubleshooting guide after gaining information from all the conversations.

You must produce an ordered list of steps that the agent can follow to help resolve the customer's needs related to the product and issue.
"""

In [200]:
def generate_troubleshooting_guide(product: str, issue: str, llm_engine: str = LLM_ENGINE) -> str:
    messages: list[dict[str, str]] = [
        {
            "role": "system",
            "content": GENERATE_TROUBLESHOOTING_GUIDE_PROMPT
        },
        {
            "role": "user",
            "content": concatenate_flows(tree[product][issue])
        },
    ]

    return chat_completion(model=llm_engine,
                            messages=messages,
                            response_format=TroubleshootingGuide).choices[0].message.parsed.steps

In [219]:
def generate_troubleshooting_guides(tree: dict[str, dict[str, list[Flow]]], llm_engine: str = LLM_ENGINE, concurrency: int=10) -> None:
    lock: threading.Lock = threading.Lock()
    indexes: queue.Queue = queue.Queue()
    
    # Create flat list of (product, issue) pairs to process
    tasks = []
    for product in tree.keys():
        for issue in tree[product].keys():
            tasks.append((product, issue))
            
    for idx in range(len(tasks)):
        indexes.put(idx)

    def troubleshooting_guide_worker():
        while True:
            try:
                idx = indexes.get(block=False)
            except queue.Empty:
                return
                
            product, issue = tasks[idx]
            try:
                guide = generate_troubleshooting_guide(product, issue, llm_engine)
                with lock:
                    tree[product][issue] = guide
                if idx % 10 == 0:
                    print(f"Generated guides for {idx} product/issue pairs")
            except Exception as e:
                logger.warning(f"Error generating guide for {product}/{issue}: {e}")
            indexes.task_done()

    logger.info(f"Starting guide generation for {len(tasks)} product/issue pairs")
    workers = [
        threading.Thread(target=troubleshooting_guide_worker)
        for _ in range(concurrency)
    ]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()
    logger.info("Finished generating all guides")
    print("Finished generating all guides")

In [220]:
troubleshooting_guide = deepcopy(tree)

In [221]:
generate_troubleshooting_guides(troubleshooting_guide)

{"message": "Starting guide generation for 191 product/issue pairs", "name": "__main__", "asctime": "2025-02-13 23:45:34", "levelname": "INFO", "filename": "<ipython-input-219-f882fd4f0c39>", "funcName": "generate_troubleshooting_guides", "threadName": "MainThread", "status": "INFO"}


Generated guides for 0 product/issue pairs
Generated guides for 10 product/issue pairs
Generated guides for 20 product/issue pairs
Generated guides for 30 product/issue pairs
Generated guides for 40 product/issue pairs
Generated guides for 50 product/issue pairs
Generated guides for 60 product/issue pairs
Generated guides for 70 product/issue pairs
Generated guides for 80 product/issue pairs
Generated guides for 90 product/issue pairs
Generated guides for 100 product/issue pairs
Generated guides for 110 product/issue pairs
Generated guides for 120 product/issue pairs
Generated guides for 140 product/issue pairs
Generated guides for 130 product/issue pairs
Generated guides for 160 product/issue pairs
Generated guides for 150 product/issue pairs
Generated guides for 180 product/issue pairs
Generated guides for 170 product/issue pairs


{"message": "Finished generating all guides", "name": "__main__", "asctime": "2025-02-13 23:46:07", "levelname": "INFO", "filename": "<ipython-input-219-f882fd4f0c39>", "funcName": "generate_troubleshooting_guides", "threadName": "MainThread", "status": "INFO"}


Generated guides for 190 product/issue pairs
Finished generating all guides


In [231]:
for product in troubleshooting_guide.keys():
    print(f"Product: {product}")
    print(f"{'-'*100}\n{'-'*100}")
    for issue in troubleshooting_guide[product].keys():
        print(f"**Issue: {issue}**")
        print("\n".join(troubleshooting_guide[product][issue]))
        print("-"*100)
    print("-"*100)

Product: ('Alarm.com Account',)
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
**Issue: ('App Login Issues',)**
Verify the customer's identity by confirming account details such as phone number, address, or account code word.
Confirm the email address associated with the customer's account.
If the customer cannot remember the password, initiate the password reset process and ask them to check their email for the reset link.
Instruct the customer to enter the new password after they receive the reset link and create a strong password.
If applicable, guide the customer to set up two-factor authentication using text messages or email.
Advise the customer to log in using the updated credentials on their phone or computer.
If login issues persist, instruct the customer to remove any old information saved in the app settings.
-------------