# Setup

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Standard library imports
from pydantic import BaseModel
from copy import deepcopy
import threading
import warnings
import queue
import json
import os

warnings.filterwarnings("ignore")
JWT_SECRET_API = !echo $(aws --profile "chat-prod_ro" secretsmanager get-secret-value --secret-id "arn:aws:secretsmanager:us-west-2:242659714806:secret:shared/cresta-server-jwt_secret-VDn5My" --query SecretString --output text) # type: ignore
os.environ["JWT_SECRET_API"] = json.loads(JWT_SECRET_API[0])["jwt-secret"]
os.environ["CONFIG_SERVICE_ADDR"] = "auth.chat-prod.internal.cresta.ai:443"
os.environ["CONFIG_USE_SECURE_CHANNEL"] = "true"

# Third party imports

from sentence_transformers import SentenceTransformer, util
from retry import retry

# Greyparrot imports
from greyparrot.llm.prompting import prompts as prompts_utils
from greyparrot.conversations.db import ConversationsDBConn
from greyparrot.multi_tenancy.v3_config import V3Config
from greyparrot.conversations.utils import get_chats
from greyparrot.chats_common import PartialChat
from greyparrot.common import get_logger


logger = get_logger(__name__)

# Local imports
from llm_proxy_client import LLMProxyDevClient

In [14]:
embedder = SentenceTransformer("all-mpnet-base-v2")

In [15]:
customer_id = "brinks"
profile_id = "care-voice"
usecase_id = "care-voice"
language_code = "en-US"

In [16]:
def get_chats_with_ids(chat_ids: list[str]) -> list[PartialChat]:
    customer_name: str = V3Config.short_name_from_ids(customer_id, profile_id)
    conv_db_conn: ConversationsDBConn = ConversationsDBConn.from_customer_name(customer_name)
    chats: list[PartialChat] = conv_db_conn.get_detailed_chats(
        customer_id=customer_id,
        profile_id=profile_id,
        usecase_id=usecase_id,
        language_code=language_code,
        conversation_ids=chat_ids,
        is_dev_user=False,
    )
    return chats

In [17]:
LLM_ENGINE: str = "gpt-4o-mini"
CONCURRENCY: int = 10

In [18]:
@retry(tries=1, delay=60, backoff=2, logger=logger)
def chat_completion(**kwargs):
    return LLMProxyDevClient("openai").beta.chat.completions.parse(**kwargs)

# Discovery

In [19]:
# TODO remove this after fixing speaker_role flips
flips: dict[str, str] = {"agent": "visitor", "visitor": "agent"}

In [20]:
def chat_to_prompt_text(chat: PartialChat) -> str:
    return "\n".join([
        f"{prompts_utils.speaker_role_str_for_prompts(flips[m.speaker_role.value]).capitalize()}: {m.text}"
        for m in chat.messages
    ])

In [21]:
SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY = """### Context and data description
You are a conversation analyst working for Brinks Home Security Call Center.

You will be given 1 conversation at a time. Each conversation is between a Brinks Call Center Agent and a Customer. Your primary goal is to extract workflows of steps which the Agent takes in **the given conversation** to help resolve the Customer's needs related to their home security system and services.

The primary use case of these workflows is to create a troubleshooting template to address similar customer needs in the future.

Each workflow should be a list of steps which the Agent needs to take.

For each workflow, return:
- Product: Specific Brinks product/service involved (e.g., Security Panel, Door/Window Sensor, Motion Detector, Security Camera, Doorbell Camera, Smart Lock, Mobile App, Alarm.com Account)
- Issue: Specific customer problem (e.g., False Alarms, Device Offline, Camera Not Recording, App Login Issues, Billing Questions, Account Changes)
- Steps: Detailed troubleshooting or resolution steps the Agent follows

Make sure the product is specific to Brinks' security equipment and services, not general categories.
Make sure the issue clearly describes the exact problem the customer is experiencing.
Make sure the steps are detailed and follow Brinks' standard operating procedures.
Make sure to only return the agent troubleshooting steps, not the customer's requests or other information.

Common scenarios include:
- Security system troubleshooting (panel issues, sensor malfunctions, connectivity problems)
- Camera and video recording issues
- Account management (billing, autopay, contact updates)
- Mobile app and Alarm.com portal assistance
- Service changes (moving, upgrading, cancellation)
- Installation and maintenance appointments

**Important**: There could be more than 1 workflow in a single conversation. There could also be no workflows in a single conversation. The workflows will be used to create troubleshooting guides to address similar customer needs in the future."""

In [22]:
class Flow(BaseModel):
    product: str
    issue: str
    steps: list[str]

class Flows(BaseModel):
    flows: list[Flow]

In [23]:
def discover_flow_in_chat(chat: PartialChat,
                          llm_engine: str = LLM_ENGINE) -> list[Flow]:
    logger.info(f"Discovering Agent flow in chat {chat.chat_name}")
    messages: list[dict[str, str]] = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY
        },
        {
            "role": "user",
            "content": chat_to_prompt_text(chat)
        },
    ]

    return chat_completion(model=llm_engine,
                            messages=messages,
                            response_format=Flows).choices[0].message.parsed.flows

In [24]:
def extract_flows_from_chats(chats: list[PartialChat],
                                concurrency: int=10) -> dict[str, list[Flow]]:
    lock: threading.Lock = threading.Lock()
    indexes: queue.Queue = queue.Queue()

    workflows: dict[str, list[Flow]] = {}
    for idx in range(len(chats)):
        indexes.put(idx)

    def workflow_labeler_worker():
        while True:
            try:
                idx = indexes.get(block=False)
            except queue.Empty:
                return
            chat = chats[idx]
            try:
                extracted_workflows: list[Flow] = discover_flow_in_chat(chat)
                with lock:
                    workflows[str(chat)] = extracted_workflows
                    if len(workflows) % 10 == 0:
                        print(f"Workflows from {len(workflows)} chats extracted!")
            except Exception as e:
                logger.warning(e, str(chat))
            indexes.task_done()

    logger.info(
        f"Starting processing {len(chats)} chats with {concurrency} workers")
    workers = [
        threading.Thread(target=workflow_labeler_worker)
        for _ in range(concurrency)
    ]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()
    logger.info(f"Finished processing all {len(chats)} chats")

    return workflows

In [None]:
test_chat: PartialChat = get_chats_with_ids(["0843c54c-6487-45ce-946a-cc6257484f54"])[0]
workflows: list[Flow] = discover_flow_in_chat(test_chat)

In [None]:
print(chat_to_prompt_text(test_chat))

In [None]:
print(f"Found {len(workflows)} workflows\n")
for i, flow in enumerate(workflows):
    print(f"[Agent Workflow #{i + 1}]")
    print(f"Product: {flow.product}")
    print(f"Issue: {flow.issue}")
    print("Steps:", end="\n* ")
    print("\n* ".join(flow.steps))
    if i < len(workflows) - 1:
        print("-" * 100)

# 200 Recent Chats

In [None]:
data_set = get_chats(
    'brinks-care-voice', 
    '2024-06-01',
    '2024-12-01',
    200,)

In [None]:
sampled_chat_ids = list(set([
    chat.chat_name
    for chat in (item if not isinstance(item, tuple) else item[0] for item in data_set)
    if hasattr(chat, 'chat_name')
]))
print(len(sampled_chat_ids))

# Agent Workflow Discovery

In [None]:
sampled_chats = get_chats_with_ids(sampled_chat_ids)

In [31]:
workflows = extract_flows_from_chats(sampled_chats)

{"message": "Discovering Agent flow in chat 460615cb-a884-4114-ba43-83e5681bb876", "name": "__main__", "asctime": "2025-02-14 06:04:55", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 4764688a-81b2-40dc-8381-d0002ef7530e", "name": "__main__", "asctime": "2025-02-14 06:04:55", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-16 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 4afec988-d114-4adf-b18f-55d0fb833426", "name": "__main__", "asctime": "2025-02-14 06:04:55", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-7 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 4b523c26-42be-4de3-

Workflows from 50 chats extracted!


{"message": "Discovering Agent flow in chat 4dc793cd-6416-47f0-8256-9b66ae20dcf6", "name": "__main__", "asctime": "2025-02-14 06:04:57", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 520c4622-29fd-46cc-9d9a-a33d7ff75b60", "name": "__main__", "asctime": "2025-02-14 06:04:57", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 53464d50-93b6-4318-9888-dfdc67c2511c", "name": "__main__", "asctime": "2025-02-14 06:04:57", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-15 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 544bad03-f8eb-4c2

Workflows from 60 chats extracted!


{"message": "Discovering Agent flow in chat 56ba8f74-a2cc-4c40-ad47-76fe0c536493", "name": "__main__", "asctime": "2025-02-14 06:04:59", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-13 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 57beb88b-5483-48bd-a22e-5b3c9ef56e09", "name": "__main__", "asctime": "2025-02-14 06:04:59", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 582a04a4-a27c-4a10-8974-2fa6622c711e", "name": "__main__", "asctime": "2025-02-14 06:04:59", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-14 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 599c38a0-fea7-47b

Workflows from 70 chats extracted!


{"message": "Discovering Agent flow in chat 65466ee6-c6dd-4bb2-bdfc-4f70dded41ff", "name": "__main__", "asctime": "2025-02-14 06:05:01", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-16 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 66ba422c-cce1-4af1-b082-decc73a69ebb", "name": "__main__", "asctime": "2025-02-14 06:05:01", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-8 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 67c5b46c-8d34-4a24-bb0b-acf8a4e11e42", "name": "__main__", "asctime": "2025-02-14 06:05:02", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 6874fa47-31b7-4050-

Workflows from 80 chats extracted!


{"message": "Discovering Agent flow in chat 713d713d-41fa-44d8-8056-26bc00eab629", "name": "__main__", "asctime": "2025-02-14 06:05:03", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-8 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 713f2b9c-3767-4235-bc18-19693a0914e4", "name": "__main__", "asctime": "2025-02-14 06:05:04", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 716f1e01-d056-44a7-ae5c-dec6131fec1a", "name": "__main__", "asctime": "2025-02-14 06:05:04", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 71a5d990-6405-431f

Workflows from 90 chats extracted!


{"message": "Discovering Agent flow in chat 77d7492d-327d-41c2-844d-e9ac81b422d7", "name": "__main__", "asctime": "2025-02-14 06:05:06", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-8 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 784a9189-a031-4390-9836-81bb04aa5ab9", "name": "__main__", "asctime": "2025-02-14 06:05:06", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 788f8be8-19ec-4b83-a6c0-279fd1acab21", "name": "__main__", "asctime": "2025-02-14 06:05:06", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-16 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 79d7d848-1e90-4ae5

Workflows from 100 chats extracted!


{"message": "Discovering Agent flow in chat 8361455f-b2a9-47e1-b335-982822804980", "name": "__main__", "asctime": "2025-02-14 06:05:09", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-11 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 84b2e6e5-54ca-4ec7-88af-6e44cd6367dd", "name": "__main__", "asctime": "2025-02-14 06:05:09", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-15 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 84ff7952-79f9-4d48-a233-44b04f61e2c4", "name": "__main__", "asctime": "2025-02-14 06:05:09", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 888de472-d87b-479

Workflows from 110 chats extracted!


{"message": "Discovering Agent flow in chat 8e9de433-ee33-403a-99ca-0fb2be228d21", "name": "__main__", "asctime": "2025-02-14 06:05:10", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-13 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 90fb6284-7f4a-46af-a5f6-d0eb80426d53", "name": "__main__", "asctime": "2025-02-14 06:05:10", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 91b0d2b9-5df3-4ae9-960a-c1de460ca73a", "name": "__main__", "asctime": "2025-02-14 06:05:10", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-8 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 92637af6-777a-43f1

Workflows from 120 chats extracted!


{"message": "Discovering Agent flow in chat 991c9453-3405-41c2-a4a6-cc271a3a4d79", "name": "__main__", "asctime": "2025-02-14 06:05:12", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 9a1da8a4-e3cd-4671-9d88-71a9e4ee769c", "name": "__main__", "asctime": "2025-02-14 06:05:12", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 9fd3aab1-48f7-4fdb-8f09-3cbe0dc19418", "name": "__main__", "asctime": "2025-02-14 06:05:13", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat a1173c5a-2a8c-411e

Workflows from 130 chats extracted!


{"message": "Discovering Agent flow in chat a928d323-eb8b-4ad3-8be1-4e2f6065a38a", "name": "__main__", "asctime": "2025-02-14 06:05:14", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-15 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat a9d7ca5e-7148-4c93-9b37-e2e8a86e9d58", "name": "__main__", "asctime": "2025-02-14 06:05:15", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat aab58f7a-4c91-4da8-8d69-a5ec623e7a23", "name": "__main__", "asctime": "2025-02-14 06:05:15", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat aac426c2-1298-4e81

Workflows from 140 chats extracted!


{"message": "Discovering Agent flow in chat c2154d4e-6395-46fb-9cfd-634917a75b46", "name": "__main__", "asctime": "2025-02-14 06:05:16", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-15 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat c35d724c-b6ce-4957-b45a-99486ec26b3e", "name": "__main__", "asctime": "2025-02-14 06:05:16", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-13 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat c554e2c2-ab13-462f-8fcf-7800555f7ce5", "name": "__main__", "asctime": "2025-02-14 06:05:17", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-8 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat c5f85bf0-0cc1-43b2

Workflows from 150 chats extracted!


{"message": "Discovering Agent flow in chat cace7c9b-e599-4ca7-b61e-34c23239f988", "name": "__main__", "asctime": "2025-02-14 06:05:19", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat cc240657-7fba-459b-b4f3-2a3688ce2fea", "name": "__main__", "asctime": "2025-02-14 06:05:19", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat cd0268f5-a83f-4aef-ad08-532809010d80", "name": "__main__", "asctime": "2025-02-14 06:05:19", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-15 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat cd0b1a4f-bc78-405

Workflows from 160 chats extracted!


{"message": "Discovering Agent flow in chat d9f034ce-c787-41d8-bbbb-9013612e809c", "name": "__main__", "asctime": "2025-02-14 06:05:22", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-10 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat db35cf01-8e10-444e-bd05-0e8d1e8aa4ed", "name": "__main__", "asctime": "2025-02-14 06:05:22", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat df6829db-f0ea-4acb-98d4-70f1a5b3302f", "name": "__main__", "asctime": "2025-02-14 06:05:22", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-14 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat dfe46e66-9057-46a4

Workflows from 170 chats extracted!


{"message": "Discovering Agent flow in chat eed43b95-33f2-4f44-b30b-40b535091305", "name": "__main__", "asctime": "2025-02-14 06:05:24", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-16 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat eee368bc-7a19-4a59-a079-ac9d65a587b6", "name": "__main__", "asctime": "2025-02-14 06:05:24", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-12 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat ef843f0a-c659-4af7-9dda-b9fc14980e36", "name": "__main__", "asctime": "2025-02-14 06:05:24", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat efc02a9d-467d-4166

Workflows from 180 chats extracted!


{"message": "Discovering Agent flow in chat f3611706-6a1c-4efc-a11b-7051d41d086d", "name": "__main__", "asctime": "2025-02-14 06:05:26", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-11 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat f3b8cf83-cab2-4d56-b258-7f47dc680f05", "name": "__main__", "asctime": "2025-02-14 06:05:26", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-9 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat f5165b8a-691d-4d3a-8a4f-206577235511", "name": "__main__", "asctime": "2025-02-14 06:05:26", "levelname": "INFO", "filename": "<ipython-input-23-ea4193a4eca2>", "funcName": "discover_flow_in_chat", "threadName": "Thread-13 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat f62dd888-a2de-4018

Workflows from 190 chats extracted!


{"message": "Finished processing all 199 chats", "name": "__main__", "asctime": "2025-02-14 06:05:33", "levelname": "INFO", "filename": "<ipython-input-24-e96ef876503f>", "funcName": "extract_flows_from_chats", "threadName": "MainThread", "status": "INFO"}


In [32]:
sum([len(w) for w in workflows.values()])

249

In [33]:
all_workflows = []
for w in workflows.values():
    all_workflows.extend(w)

# Grouping Workflows

In [34]:
def group_workflows_by_product(workflows: list[Flow], embedder: SentenceTransformer, semantic_threshold: float) -> dict[str, list[Flow]]:
    
    products = [w.product for w in workflows]
    embeddings = embedder.encode(products, convert_to_tensor=True)
        
    product_groups, solo_products = [], []
    for idx, _ in enumerate(products):
        existing_groups = [group for group in product_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in product_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            product_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_products.append(idx)

    print(f"Found {len(solo_products)} # of Solo Products.")
    print(f"Found {len(product_groups)} # Groups of Products.")

    # Create mapping of indices to product names
    idx_to_product = {i: p for i, p in enumerate(products)}
    
    retval = {}

    for group in product_groups:
        group_key = tuple(set([idx_to_product[idx] for idx in group]))
        retval[group_key] = [workflows[idx] for idx in group]

    for idx in solo_products:
        retval[(workflows[idx].product,)] = [workflows[idx]]

    return retval

In [35]:
def group_workflows_by_issues(workflows: list[Flow], embedder: SentenceTransformer, semantic_threshold: float) -> dict[tuple[str], list[Flow]]:
    
    issues = [w.issue for w in workflows]
    embeddings = embedder.encode(issues, convert_to_tensor=True)
        
    issue_groups, solo_issues = [], []
    for idx, _ in enumerate(issues):
        existing_groups = [group for group in issue_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in issue_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            issue_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_issues.append(idx)

    print(f"Found {len(solo_issues)} # of Solo Issues.")
    print(f"Found {len(issue_groups)} # Groups of Issues.")

    # Create mapping of indices to product names
    idx_to_issue = {i: p for i, p in enumerate(issues)}
    
    retval = {}

    for group in issue_groups:
        group_key = tuple(set([idx_to_issue[idx] for idx in group]))
        retval[group_key] = [workflows[idx] for idx in group]

    for idx in solo_issues:
        retval[(workflows[idx].issue,)] = [workflows[idx]]

    return retval

In [36]:
tree = group_workflows_by_product(all_workflows, embedder, semantic_threshold=0.7)

Found 24 # of Solo Products.
Found 17 # Groups of Products.


In [37]:
for product in tree.keys():
    tree[product] = group_workflows_by_issues(tree[product], embedder, semantic_threshold=0.7)

Found 1 # of Solo Issues.
Found 1 # Groups of Issues.
Found 3 # of Solo Issues.
Found 0 # Groups of Issues.
Found 4 # of Solo Issues.
Found 0 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 1 # of Solo Issues.
Found 1 # Groups of Issues.
Found 5 # of Solo Issues.
Found 2 # Groups of Issues.
Found 5 # of Solo Issues.
Found 1 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 23 # of Solo Issues.
Found 3 # Groups of Issues.
Found 0 # of Solo Issues.
Found 1 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 0 # of Solo Issues.
Found 2 # Groups of Issues.
Found 13 # of Solo Issues.
Found 6 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 2 # of Solo Issues.
Found 0 # Groups of Issues.
Found 42 # of Solo Issues.
Found 9 # Groups of Issues.
Found 1 # of Solo Issues.
Found 0 # Groups of Issues.
Found 1 # of Solo Issues.

In [38]:
leafs = 0
for product in tree.keys():
    for issue in tree[product].keys():
        leafs += 1
print(f"Found {leafs} leafs in the tree")

Found 156 leafs in the tree


In [39]:
"""
{
    "camera": [camera_flow1, camera_flow2, camera_flow3],
    "alarm": [alarm_flow4, alarm_flow5, alarm_flow6],
    "TV": [tv_flow1]
}
"""

"""
{
    "camera": {
        "issue1": [camera_issue1_flow1, camera_issue1_flow2, camera_issue1_flow3],
        "issue2": [camera_issue2_flow1, camera_issue2_flow2, camera_issue2_flow3],
    },
    "alarm": {
        "issue1": [alarm_issue1_flow1, alarm_issue1_flow2, alarm_issue1_flow3],
        "issue2": [alarm_issue2_flow1, alarm_issue2_flow2, alarm_issue2_flow3],
    },
}
"""

"""
{
    "camera": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
    "alarm": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
}
"""

'\n{\n    "camera": {\n        "issue1": troubleshooting_steps,\n        "issue2": troubleshooting_steps,\n    },\n    "alarm": {\n        "issue1": troubleshooting_steps,\n        "issue2": troubleshooting_steps,\n    },\n}\n'

# Generate Troubleshooting Guide for each leaf in the tree

In [40]:
class TroubleshootingGuide(BaseModel):
    steps: list[str]

In [41]:
def concatenate_flows(flows: list[Flow]) -> str:
    conversations = []
    for i, flow in enumerate(flows):
        conversation = f"Conversation {i+1}:\n" + "\n".join(f"* {step}" for step in flow.steps)
        conversations.append(conversation)
    return "\n\n".join(conversations)


In [42]:
GENERATE_TROUBLESHOOTING_GUIDE_PROMPT = """\
You are a conversation analyst working for Brinks Home Security Call Center.

You will be given the troubleshooting steps taken by a call center agent of 1 or more conversations at a time. Your primary goal is to generate an overall troubleshooting guide for the given product and issue.

The troubleshooting guide must consider all scenarios and steps from the given conversations. Do not produce any steps that are not present in the given conversations. Only produce an overall troubleshooting guide after gaining information from all the conversations.

You must produce an ordered list of steps that the agent can follow to help resolve the customer's needs related to the product and issue.
"""

In [43]:
def generate_troubleshooting_guide(product: str, issue: str, llm_engine: str = LLM_ENGINE) -> str:
    messages: list[dict[str, str]] = [
        {
            "role": "system",
            "content": GENERATE_TROUBLESHOOTING_GUIDE_PROMPT
        },
        {
            "role": "user",
            "content": concatenate_flows(tree[product][issue])
        },
    ]

    return chat_completion(model=llm_engine,
                            messages=messages,
                            response_format=TroubleshootingGuide).choices[0].message.parsed.steps

In [44]:
def generate_troubleshooting_guides(tree: dict[str, dict[str, list[Flow]]], llm_engine: str = LLM_ENGINE, concurrency: int=10) -> None:
    lock: threading.Lock = threading.Lock()
    indexes: queue.Queue = queue.Queue()
    
    # Create flat list of (product, issue) pairs to process
    tasks = []
    for product in tree.keys():
        for issue in tree[product].keys():
            tasks.append((product, issue))
            
    for idx in range(len(tasks)):
        indexes.put(idx)

    def troubleshooting_guide_worker():
        while True:
            try:
                idx = indexes.get(block=False)
            except queue.Empty:
                return
                
            product, issue = tasks[idx]
            try:
                guide = generate_troubleshooting_guide(product, issue, llm_engine)
                with lock:
                    tree[product][issue] = guide
                if idx % 10 == 0:
                    print(f"Generated guides for {idx} product/issue pairs")
            except Exception as e:
                logger.warning(f"Error generating guide for {product}/{issue}: {e}")
            indexes.task_done()

    logger.info(f"Starting guide generation for {len(tasks)} product/issue pairs")
    workers = [
        threading.Thread(target=troubleshooting_guide_worker)
        for _ in range(concurrency)
    ]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()
    logger.info("Finished generating all guides")
    print("Finished generating all guides")

In [45]:
troubleshooting_guide = deepcopy(tree)

In [46]:
generate_troubleshooting_guides(troubleshooting_guide)

{"message": "Starting guide generation for 156 product/issue pairs", "name": "__main__", "asctime": "2025-02-14 06:05:35", "levelname": "INFO", "filename": "<ipython-input-44-f882fd4f0c39>", "funcName": "generate_troubleshooting_guides", "threadName": "MainThread", "status": "INFO"}


Generated guides for 0 product/issue pairs
Generated guides for 10 product/issue pairs
Generated guides for 30 product/issue pairs
Generated guides for 20 product/issue pairs
Generated guides for 50 product/issue pairs
Generated guides for 40 product/issue pairs
Generated guides for 60 product/issue pairs
Generated guides for 70 product/issue pairs
Generated guides for 80 product/issue pairs
Generated guides for 90 product/issue pairs
Generated guides for 100 product/issue pairs
Generated guides for 110 product/issue pairs
Generated guides for 120 product/issue pairs
Generated guides for 130 product/issue pairs
Generated guides for 140 product/issue pairs
Generated guides for 150 product/issue pairs


{"message": "Finished generating all guides", "name": "__main__", "asctime": "2025-02-14 06:06:12", "levelname": "INFO", "filename": "<ipython-input-44-f882fd4f0c39>", "funcName": "generate_troubleshooting_guides", "threadName": "MainThread", "status": "INFO"}


Finished generating all guides


In [47]:
for product in troubleshooting_guide.keys():
    print(f"Product: {product}")
    print(f"{'-'*100}\n{'-'*100}")
    for issue in troubleshooting_guide[product].keys():
        print(f"**Issue: {issue}**")
        print("\n".join(troubleshooting_guide[product][issue]))
        print("-"*100)
    print("-"*100)

Product: ('Service Technician', 'Technical Support Service', 'Technician Service')
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
**Issue: ('Scheduling a virtual technician appointment', 'Appointment scheduling for technician visit')**
Get confirmation of the customer's preferred email address.
Confirm with the customer about the technician visit fee and service plan coverage.
Gather the customer's preferred contact number for the technician to reach them before the visit.
Provide available time slots for scheduling a virtual technician call or technician visit.
Check and offer available dates for the appointment.
Confirm if the customer prefers a morning or afternoon appointment.
Record the request for a sooner appointment if available.
Get the customer's choice of time for the appointment.
Schedule the appointment for the selected 

In [None]:
# Function to export troubleshooting guide to Markdown
def export_to_markdown(troubleshooting_guide, output_file="troubleshooting_guide.md"):
    with open(output_file, "w") as f:
        for product in troubleshooting_guide.keys():
            f.write(f"# Product: {product}\n")
            for issue in troubleshooting_guide[product].keys():
                # Write issue and steps
                f.write(f"## **Issue: {issue}**\n\n")
                f.write("\n".join([f"- {step}" for step in troubleshooting_guide[product][issue]]) + "\n")

# Example: Export the existing troubleshooting_guide
export_to_markdown(troubleshooting_guide)


Markdown file 'troubleshooting_guide.md' created successfully.
