# Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Standard library imports
from pydantic import BaseModel
from typing import Literal
import threading
import warnings
import random
import queue
import json
import csv
import os

warnings.filterwarnings("ignore")
JWT_SECRET_API = !echo $(aws --profile "chat-prod_ro" secretsmanager get-secret-value --secret-id "arn:aws:secretsmanager:us-west-2:242659714806:secret:shared/cresta-server-jwt_secret-VDn5My" --query SecretString --output text) # type: ignore
os.environ["JWT_SECRET_API"] = json.loads(JWT_SECRET_API[0])["jwt-secret"]
os.environ["CONFIG_SERVICE_ADDR"] = "auth.chat-prod.internal.cresta.ai:443"
os.environ["CONFIG_USE_SECURE_CHANNEL"] = "true"

# Third party imports

from sentence_transformers import SentenceTransformer, util
from retry import retry

# Greyparrot imports
from greyparrot.llm.prompting import prompts as prompts_utils
from greyparrot.conversations.db import ConversationsDBConn
from greyparrot.multi_tenancy.v3_config import V3Config
from greyparrot.chats_common import PartialChat
from greyparrot.dataset_common import Dataset
from greyparrot.common import get_logger
from greyparrot.conversations.utils import get_chats


logger = get_logger(__name__)

# Local imports
from llm_proxy_client import LLMProxyDevClient

Using local gitdb: /src/config/


In [3]:
embedder = SentenceTransformer("all-mpnet-base-v2")

In [4]:
customer_id = "brinks"
profile_id = "care-voice"
usecase_id = "care-voice"
language_code = "en-US"

In [5]:
def get_chats_with_ids(chat_ids: list[str]) -> list[PartialChat]:
    customer_name: str = V3Config.short_name_from_ids(customer_id, profile_id)
    conv_db_conn: ConversationsDBConn = ConversationsDBConn.from_customer_name(customer_name)
    chats: list[PartialChat] = conv_db_conn.get_detailed_chats(
        customer_id=customer_id,
        profile_id=profile_id,
        usecase_id=usecase_id,
        language_code=language_code,
        conversation_ids=chat_ids,
        is_dev_user=False,
    )
    return chats

In [6]:
LLM_ENGINE: str = "gpt-4o-mini"
CONCURRENCY: int = 10

In [7]:
@retry(tries=1, delay=60, backoff=2, logger=logger)
def chat_completion(**kwargs):
    return LLMProxyDevClient("openai").beta.chat.completions.parse(**kwargs)

# Discovery prompts

In [8]:
# TODO remove this after fixing speaker_role flips
flips: dict[str, str] = {"agent": "visitor", "visitor": "agent"}

In [9]:
def chat_to_prompt_text(chat: PartialChat) -> str:
    return "\n".join([
        f"{prompts_utils.speaker_role_str_for_prompts(flips[m.speaker_role.value]).capitalize()}: {m.text}"
        for m in chat.messages
    ])

In [23]:
SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY = """### Context and data description
You are a conversation analyst working for Brinks Home Security Call Center.

You will be given 1 conversation at a time. Each conversation is between a Brinks Call Center Agent and a Customer. Your primary goal is to extract workflows of steps which the Agent takes in **the given conversation** to help resolve the Customer's needs related to their home security system and services.

The primary use case of these workflows is to create a troubleshooting template to address similar customer needs in the future.

Each workflow should be a list of steps which the Agent needs to take.

For each workflow, return:
- Product: Specific Brinks product/service involved (e.g., Security Panel, Door/Window Sensor, Motion Detector, Security Camera, Doorbell Camera, Smart Lock, Mobile App, Alarm.com Account)
- Issue: Specific customer problem (e.g., False Alarms, Device Offline, Camera Not Recording, App Login Issues, Billing Questions, Account Changes)
- Steps: Detailed troubleshooting or resolution steps the Agent follows

Make sure the product is specific to Brinks' security equipment and services, not general categories.
Make sure the issue clearly describes the exact problem the customer is experiencing.
Make sure the steps are detailed and follow Brinks' standard operating procedures.
Make sure to only return the agent troubleshooting steps, not the customer's requests or other information.

Common scenarios include:
- Security system troubleshooting (panel issues, sensor malfunctions, connectivity problems)
- Camera and video recording issues
- Account management (billing, autopay, contact updates)
- Mobile app and Alarm.com portal assistance
- Service changes (moving, upgrading, cancellation)
- Installation and maintenance appointments

**Important**: There could be more than 1 workflow in a single conversation. There could also be no workflows in a single conversation. The workflows will be used to create troubleshooting guides to address similar customer needs in the future."""

In [24]:
class Flow(BaseModel):
    product: str
    issue: str
    steps: list[str]

class Flows(BaseModel):
    flows: list[Flow]

In [25]:
def discover_flow_in_chat(chat: PartialChat,
                          llm_engine: str = LLM_ENGINE) -> list[Flow]:
    logger.info(f"Discovering Agent flow in chat {chat.chat_name}")
    messages: list[dict[str, str]] = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_AGENT_WORKFLOW_DISCOVERY
        },
        {
            "role": "user",
            "content": chat_to_prompt_text(chat)
        },
    ]

    # Note: temperature=0.1 to allow for some exploration
    return chat_completion(model=llm_engine,
                            messages=messages,
                            temperature=0.1,
                            response_format=Flows).choices[0].message.parsed.flows

In [34]:
def extract_flows_from_chats(chats: list[PartialChat],
                                concurrency: int=10) -> dict[str, list[Flow]]:
    lock: threading.Lock = threading.Lock()
    indexes: queue.Queue = queue.Queue()

    workflows: dict[str, list[Flow]] = {}
    for idx in range(len(chats)):
        indexes.put(idx)

    def workflow_labeler_worker():
        while True:
            try:
                idx = indexes.get(block=False)
            except queue.Empty:
                return
            chat = chats[idx]
            try:
                extracted_workflows: list[Flow] = discover_flow_in_chat(chat)
                with lock:
                    workflows[str(chat)] = extracted_workflows
                    if len(workflows) % 10 == 0:
                        print(f"Workflows from {len(workflows)} chats extracted!")
            except Exception as e:
                logger.warning(e, str(chat))
            indexes.task_done()

    logger.info(
        f"Starting processing {len(chats)} chats with {concurrency} workers")
    workers = [
        threading.Thread(target=workflow_labeler_worker)
        for _ in range(concurrency)
    ]
    for worker in workers:
        worker.start()
    for worker in workers:
        worker.join()
    logger.info(f"Finished processing all {len(chats)} chats")

    return workflows

In [27]:
test_chat: PartialChat = get_chats_with_ids(["0843c54c-6487-45ce-946a-cc6257484f54"])[0]
workflows: list[Flow] = discover_flow_in_chat(test_chat)



cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice-FKrGHU -r
cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice -r


{"message": "Discovering Agent flow in chat 0843c54c-6487-45ce-946a-cc6257484f54", "name": "__main__", "asctime": "2025-02-13 21:56:13", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "MainThread", "status": "INFO"}


In [28]:
print(chat_to_prompt_text(test_chat))

Customer: Technical support
Agent: Thank you for being the best part of Brinks Home. My name is Juan. Who do I have the pleasure of speaking with?
Customer: Debbie. Deborah Terryya.
Agent: Good morning, missus Deborah. How can I assist you?
Customer: Okay. On December 16th, I had Brinks installed. I had one. Camera put on my front door, and I had one put on the tireport, and then I went out of town for a couple weeks. And only my carport has been working since December 6th eight. And I clicked on the troubleshoot for my front door, and it just keeps coming up that there's a problem. I mean, I did I did everything it told me to do. I unplugged the power cord, and told me to wait 5 minutes. And it was trying to find what the problem was. And it is still not responding my front door.
Agent: The front door. The other
Customer: Which would be by my court
Agent: Okay. Gotcha. Let me get into the account real quick. And let me just double check what is happening to that sensor.
Customer: Yeah

In [29]:
print(f"Found {len(workflows)} workflows\n")
for i, flow in enumerate(workflows):
    print(f"[Agent Workflow #{i + 1}]")
    print(f"Product: {flow.product}")
    print(f"Issue: {flow.issue}")
    print("Steps:", end="\n* ")
    print("\n* ".join(flow.steps))
    if i < len(workflows) - 1:
        print("-" * 100)

Found 1 workflows

[Agent Workflow #1]
Product: Security Camera
Issue: Camera Not Recording
Steps:
* Verify customer account details including phone number, address, and verbal password.
* Instruct customer to check the LED status on the front door camera.
* If no light is visible, guide customer to unplug the camera from the power supply.
* Instruct customer to wait for 30 seconds to 1 minute before reconnecting the power supply.
* Confirm with the customer that they are reconnecting the correct power supply for the front door camera.
* After reconnecting, ask the customer to check the LED status again.
* If still no light, instruct customer to test the power outlet by plugging in a phone charger to ensure it is supplying power.
* If the outlet is working, conclude that the power supply for the camera is faulty and inform the customer that a technician will need to be dispatched to fix the issue.


# Relevant chats (from KA-QE trainset)

In [30]:
data_set = get_chats(
    'brinks-care-voice', 
    '2024-06-01',
    '2024-12-01',
    200,)



cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice-FKrGHU -r
cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice -r


200it [00:01, 109.25it/s]
{"message": "Caching to cache_chats-6cdad05445414399627e4b4e59113f84.pkl", "name": "cache-utils", "asctime": "2025-02-13 21:56:38", "levelname": "INFO", "filename": "cache.py", "funcName": "wrapper", "threadName": "MainThread", "status": "INFO"}
{"message": "Saving dataset to cache_chats-6cdad05445414399627e4b4e59113f84.pkl", "name": "dataset-common", "asctime": "2025-02-13 21:56:38", "levelname": "INFO", "filename": "common.py", "funcName": "save", "threadName": "MainThread", "status": "INFO"}


In [31]:
sampled_chat_ids = list(set([
    chat.chat_name
    for chat in (item if not isinstance(item, tuple) else item[0] for item in data_set)
    if hasattr(chat, 'chat_name')
]))
print(len(sampled_chat_ids))

200


# Agent Workflow Discovery

In [32]:
sampled_chats = get_chats_with_ids(sampled_chat_ids)



cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice-FKrGHU -r
cmd: cresta-cli connstring -i voice-prod voice-prod brinks-care-voice -r


In [35]:
workflows = extract_flows_from_chats(sampled_chats)

{"message": "Starting processing 199 chats with 10 workers", "name": "__main__", "asctime": "2025-02-13 21:57:55", "levelname": "INFO", "filename": "<ipython-input-34-e96ef876503f>", "funcName": "extract_flows_from_chats", "threadName": "MainThread", "status": "INFO"}
{"message": "Discovering Agent flow in chat 000c68d2-6c21-4ddf-ab75-fb32b2ce0fc8", "name": "__main__", "asctime": "2025-02-13 21:57:55", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0017ec90-12c7-44dc-932c-f10b38ce0b9a", "name": "__main__", "asctime": "2025-02-13 21:57:55", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-20 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0039e60e-247f-4a8d-80d1-18d380344dce", "name": "__main__", "a

{"message": "Discovering Agent flow in chat 01086912-66f0-4cdc-b38b-9941fedaca8d", "name": "__main__", "asctime": "2025-02-13 21:57:56", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 01124bf9-4f07-4bf1-a86d-30c4fc6278d6", "name": "__main__", "asctime": "2025-02-13 21:57:57", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 01532d84-1f4b-407f-84aa-54a29374b496", "name": "__main__", "asctime": "2025-02-13 21:57:58", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0167a38e-2cbd-42e

Workflows from 10 chats extracted!


{"message": "Discovering Agent flow in chat 023640bb-240c-4819-9c5a-d9b41ecddff7", "name": "__main__", "asctime": "2025-02-13 21:57:59", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0257d209-a041-47cc-baa8-70a785504964", "name": "__main__", "asctime": "2025-02-13 21:58:00", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0297c7d2-a6a2-4559-944a-4727df5b9b6f", "name": "__main__", "asctime": "2025-02-13 21:58:00", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 034f5b00-e216-401

Workflows from 20 chats extracted!


{"message": "Discovering Agent flow in chat 040d4804-0ef6-4a89-a4da-f08d92734bff", "name": "__main__", "asctime": "2025-02-13 21:58:02", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 04203a12-c613-4cd4-a6fd-f01c4a6f40a2", "name": "__main__", "asctime": "2025-02-13 21:58:02", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 044696fe-ba32-4ce6-a705-c25ce7a923ff", "name": "__main__", "asctime": "2025-02-13 21:58:02", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-20 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 04758b07-a186-42e

Workflows from 30 chats extracted!


{"message": "Discovering Agent flow in chat 05cf80cf-6e4f-46ce-9edd-5e9018674062", "name": "__main__", "asctime": "2025-02-13 21:58:04", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05e95ce9-95a5-4b36-8694-05ba68d2a950", "name": "__main__", "asctime": "2025-02-13 21:58:04", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05eedf8d-8f6e-49e0-a793-1203aef1dd4e", "name": "__main__", "asctime": "2025-02-13 21:58:04", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 05f3239c-596b-4ce

Workflows from 40 chats extracted!


{"message": "Discovering Agent flow in chat 06f04275-4678-47af-b15d-28cba56043e3", "name": "__main__", "asctime": "2025-02-13 21:58:06", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-28 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 071c6c6d-4294-4f0e-9581-ad621e1871fc", "name": "__main__", "asctime": "2025-02-13 21:58:06", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 073a30a5-2e0d-4b55-8658-5b704954a90b", "name": "__main__", "asctime": "2025-02-13 21:58:07", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 074c00d3-1ca4-4b2

Workflows from 50 chats extracted!


{"message": "Discovering Agent flow in chat 08b28c6d-793b-4fb9-8b2b-7001ed3026aa", "name": "__main__", "asctime": "2025-02-13 21:58:09", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 096c8148-f2c5-4d32-96e7-80e6e3610740", "name": "__main__", "asctime": "2025-02-13 21:58:09", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-21 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 096d88a3-edbe-4a62-acb5-205724910f54", "name": "__main__", "asctime": "2025-02-13 21:58:09", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 09896ad0-ed6d-41e

Workflows from 60 chats extracted!


{"message": "Discovering Agent flow in chat 0ad0f9fd-9d59-4446-9c85-d887a208d532", "name": "__main__", "asctime": "2025-02-13 21:58:12", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-25 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0adb8781-21a8-44f2-b55e-bfa532d6fb28", "name": "__main__", "asctime": "2025-02-13 21:58:12", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-26 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0ade3b59-78ec-4429-b533-b6ffad4eaf1f", "name": "__main__", "asctime": "2025-02-13 21:58:12", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0ae9aff6-6d8e-4b3

Workflows from 70 chats extracted!


{"message": "Discovering Agent flow in chat 0b5d1101-12bd-4e85-8882-ab0ee9726e60", "name": "__main__", "asctime": "2025-02-13 21:58:14", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0becb6e2-b21d-4f97-93c0-e41ffaa77222", "name": "__main__", "asctime": "2025-02-13 21:58:14", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0c176c4f-2e3a-40c7-80cc-33d19ff95336", "name": "__main__", "asctime": "2025-02-13 21:58:15", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-25 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0c1907e0-1e99-4a7

Workflows from 80 chats extracted!


{"message": "Discovering Agent flow in chat 0d143a65-8721-48f5-bb61-cc82ab8d85e4", "name": "__main__", "asctime": "2025-02-13 21:58:16", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d2acb25-43d6-4981-8b75-4f80a2e7f4c3", "name": "__main__", "asctime": "2025-02-13 21:58:16", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d2dfdb2-3f23-4b28-ad9f-a6d2ef4aeb36", "name": "__main__", "asctime": "2025-02-13 21:58:17", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-28 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0d48ae17-2d7b-4fd

Workflows from 90 chats extracted!


{"message": "Discovering Agent flow in chat 0e3c8794-f10b-4b90-ae0e-0f963f428790", "name": "__main__", "asctime": "2025-02-13 21:58:19", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-20 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e3e206d-4abc-47a8-b7f7-fffd5603a90f", "name": "__main__", "asctime": "2025-02-13 21:58:19", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-21 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e536744-ac0c-4712-bb80-7183fb5d1c56", "name": "__main__", "asctime": "2025-02-13 21:58:19", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0e759e20-116c-459

Workflows from 100 chats extracted!


{"message": "Discovering Agent flow in chat 0f311baa-f57d-4034-ab10-f18d0c39ea7e", "name": "__main__", "asctime": "2025-02-13 21:58:21", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-26 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f343b4f-8a53-47a8-a259-85dc4f7e4fbc", "name": "__main__", "asctime": "2025-02-13 21:58:21", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f3db87a-0d0c-4622-8d4b-8d8bd98ecc4d", "name": "__main__", "asctime": "2025-02-13 21:58:21", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-28 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 0f71b41f-b45b-4a9

Workflows from 110 chats extracted!


{"message": "Discovering Agent flow in chat 0feba987-84d7-4c44-8a55-8ae2f8ae144c", "name": "__main__", "asctime": "2025-02-13 21:58:24", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-25 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 101c2671-b555-4353-8b81-4905ea10ba04", "name": "__main__", "asctime": "2025-02-13 21:58:24", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 102afab8-8e7f-476c-a496-63abf2f5f092", "name": "__main__", "asctime": "2025-02-13 21:58:24", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-28 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1067f3fc-5df6-42d

Workflows from 120 chats extracted!


{"message": "Discovering Agent flow in chat 118edf23-2285-4dc5-8f63-5d8bf2e1bab1", "name": "__main__", "asctime": "2025-02-13 21:58:27", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 11de0e64-d791-408d-97e1-57940f9c80e1", "name": "__main__", "asctime": "2025-02-13 21:58:27", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 11e32ae8-5a81-4497-a1d5-2ef8acf2f581", "name": "__main__", "asctime": "2025-02-13 21:58:27", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1203f20d-139f-43e

Workflows from 130 chats extracted!


{"message": "Discovering Agent flow in chat 136bdd10-1b56-4e84-8787-4bd2b54572ad", "name": "__main__", "asctime": "2025-02-13 21:58:29", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-26 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 137b7ca8-e316-41c1-b932-1d4831564e6b", "name": "__main__", "asctime": "2025-02-13 21:58:29", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-20 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 139404a3-d45a-4d69-b0ab-7409f32ac216", "name": "__main__", "asctime": "2025-02-13 21:58:29", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 13960ccc-3adc-449

Workflows from 140 chats extracted!


{"message": "Discovering Agent flow in chat 1449dd9d-3137-45d0-895e-300d9657e223", "name": "__main__", "asctime": "2025-02-13 21:58:31", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14535af2-554b-4d0a-9aed-562c37ca2f5d", "name": "__main__", "asctime": "2025-02-13 21:58:31", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-21 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14544e26-6023-4756-b897-46555b00ee1e", "name": "__main__", "asctime": "2025-02-13 21:58:31", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 146407b6-78bd-4b6

Workflows from 150 chats extracted!


{"message": "Discovering Agent flow in chat 14dab401-786b-43d6-adaa-c3c8ad5107bc", "name": "__main__", "asctime": "2025-02-13 21:58:34", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-21 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 14e47b7b-7284-49de-8f11-dbcea6e688a4", "name": "__main__", "asctime": "2025-02-13 21:58:34", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1512e71c-00fc-455d-929d-71f191699fc2", "name": "__main__", "asctime": "2025-02-13 21:58:34", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-26 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 151380f0-10b0-48e

Workflows from 160 chats extracted!


{"message": "Discovering Agent flow in chat 15d5bbab-2bf6-4d46-a685-9254945f7705", "name": "__main__", "asctime": "2025-02-13 21:58:36", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 15f0c3bc-64b4-4f77-9010-4073df40c41f", "name": "__main__", "asctime": "2025-02-13 21:58:36", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-23 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 160939cf-9a8f-4383-9799-e1631115d33f", "name": "__main__", "asctime": "2025-02-13 21:58:36", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-27 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 16145eaf-c159-4f8

Workflows from 170 chats extracted!


{"message": "Discovering Agent flow in chat 1769dc8b-7c1d-43cc-ad42-055239339176", "name": "__main__", "asctime": "2025-02-13 21:58:38", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-19 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 176bdf21-0a06-490f-ad78-d506636940cc", "name": "__main__", "asctime": "2025-02-13 21:58:38", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-28 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 17819caa-4212-4fc3-bf14-26a2fdcdb719", "name": "__main__", "asctime": "2025-02-13 21:58:39", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-20 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 17bf716e-010e-45c

Workflows from 180 chats extracted!


{"message": "Discovering Agent flow in chat 1913f1d4-0f13-4177-819a-b342ecf2c421", "name": "__main__", "asctime": "2025-02-13 21:58:41", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-24 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 1919f981-b0dc-4584-ab49-364bbc943425", "name": "__main__", "asctime": "2025-02-13 21:58:41", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-22 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 195f3467-6a11-47cb-a831-45ff25c30f92", "name": "__main__", "asctime": "2025-02-13 21:58:41", "levelname": "INFO", "filename": "<ipython-input-25-6a60bf4efb16>", "funcName": "discover_flow_in_chat", "threadName": "Thread-25 (workflow_labeler_worker)", "status": "INFO"}
{"message": "Discovering Agent flow in chat 19747617-8b80-44a

Workflows from 190 chats extracted!


{"message": "Finished processing all 199 chats", "name": "__main__", "asctime": "2025-02-13 21:58:51", "levelname": "INFO", "filename": "<ipython-input-34-e96ef876503f>", "funcName": "extract_flows_from_chats", "threadName": "MainThread", "status": "INFO"}


In [45]:
sum([len(w) for w in workflows.values()])

274

In [46]:
all_workflows = []
for w in workflows.values():
    all_workflows.extend(w)

# Grouping Workflows

In [62]:
def group_semantically_similar_workflows(workflows: list[Flow], embedder: 
                                         SentenceTransformer, group_on: Literal["product", "issue"], semantic_threshold: int = 0.9):
    embeddings = embedder.encode(workflows, convert_to_tensor=True)
        
    workflow_groups, solo_workflows = [], []
    for idx, workflow in enumerate(workflows):
        existing_groups = [group for group in workflow_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        print(f"Finding similar Workflows for Workflow#{idx}")

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in workflow_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            workflow_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_workflows.append(idx)

    print(f"Found {len(solo_workflows)} # of Solo Workflows..")
    print(f"Found {len(workflow_groups)} # Groups of Workflows..")

    return [workflows[idx] for idx in solo_workflows
           ], [[workflows[idx] for idx in group] for group in workflow_groups]

In [83]:
def group_workflows_by_product(workflows: list[Flow], embedder: SentenceTransformer, semantic_threshold: float=0.8) -> dict[str, list[Flow]]:
    
    products = [w.product for w in workflows] # ["camera", "brinks alarm", "alarm", "tv"]
    embeddings = embedder.encode(products, convert_to_tensor=True)
        
    product_groups, solo_products = [], []
    for idx, _ in enumerate(products):
        existing_groups = [group for group in product_groups if idx in group]
        if existing_groups:
            assert len(
                existing_groups) == 1, "A workflow should only be in 1 group"
            continue

        scores = util.cos_sim(embeddings[idx:idx + 1],
                              embeddings).cpu().tolist()[0]
        matches = [
            i for i, score in enumerate(scores) if score > semantic_threshold
        ]
        filtered_matches = [i for i in matches if i != idx]
        if filtered_matches:
            outstanding_groups = []
            new_group = filtered_matches + [idx]
            for group in product_groups:
                if set(filtered_matches).intersection(set(group)):
                    new_group.extend(group)
                else:
                    outstanding_groups.append(group)
            product_groups = outstanding_groups + [list(set(new_group))]
        else:
            solo_products.append(idx)

    print(f"Found {len(solo_products)} # of Solo Products.")
    print(f"Found {len(product_groups)} # Groups of Products.")

    # Create mapping of indices to product names
    idx_to_product = {i: p for i, p in enumerate(products)}
    
    retval = {}

    for group in product_groups:
        group_key = tuple(set([idx_to_product[idx] for idx in group]))
        retval[group_key] = [workflows[idx] for idx in group]

    for idx in solo_products:
        retval[(workflows[idx].product,)] = [workflows[idx]]

    return retval

In [84]:
tree = group_workflows_by_product(all_workflows, embedder)

Found 37 # of Solo Products.
Found 30 # Groups of Products.


In [87]:
for key in tree.values():
    print(len(key))
    break

18


In [None]:
"""
{
    "camera": [camera_flow1, camera_flow2, camera_flow3],
    "alarm": [alarm_flow4, alarm_flow5, alarm_flow6],
    "TV": [tv_flow1]
}
"""

"""
{
    "camera": {
        "issue1": [camera_issue1_flow1, camera_issue1_flow2, camera_issue1_flow3],
        "issue2": [camera_issue2_flow1, camera_issue2_flow2, camera_issue2_flow3],
    },
    "alarm": {
        "issue1": [alarm_issue1_flow1, alarm_issue1_flow2, alarm_issue1_flow3],
        "issue2": [alarm_issue2_flow1, alarm_issue2_flow2, alarm_issue2_flow3],
    },
}
"""

"""
{
    "camera": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
    "alarm": {
        "issue1": troubleshooting_steps,
        "issue2": troubleshooting_steps,
    },
}
"""

In [None]:
product_groups = group_semantically_similar_workflows(all_workflows, embedder, "product") # return dict of product -> list of workflows
issues = {}
for product, product_group in product_groups.items():
    issues[product] = group_semantically_similar_workflows(product_group, embedder, "issue") 

In [None]:
for each in grouped_workflows:
    print("-" * 50)
    print(len(each))
    for e in each:
        print(e)

In [None]:
unique_workflow_titles = solo_workflows + [random.choice(w) for w in grouped_workflows]
unique_workflows = [title_to_workflow[w] for w in unique_workflow_titles]
len(unique_workflows)

In [None]:
filtered_unique_workflows = [w for w in unique_workflows if len(w["steps"]) > 3] # skip too small workflows
len(filtered_unique_workflows)

In [None]:
filtered_unique_workflows[0]

In [None]:
for w in filtered_unique_workflows[:10]:
    print(w["title"])

In [None]:
for w in filtered_unique_workflows:
    print("-" * 100)
    print("\n\n")
    for k, v in w.items():
        if isinstance(v, list):
            val = "\n* ".join(v)
            print(f"{k.capitalize()}:\n* {val}")
        else:
            print(f"{k.capitalize()}: {v}")    

In [None]:
[w["title"] for w in filtered_unique_workflows]

In [None]:
len(filtered_unique_workflows)