In [3]:
import json

In [4]:
import dotenv

In [5]:
dotenv.load_dotenv()

True

In [6]:
from langchain_community.chat_models import ChatOpenAI

In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
import chatsky_llm_autoconfig.metrics.llm_metrics as llm_metrics

In [9]:
from chatsky_llm_autoconfig.graph import Graph

In [10]:
from tqdm import tqdm

In [11]:
def read_json(path):
    with open(path, mode="r") as file:
        data = file.read()
    return json.loads(data)

In [10]:
%pwd

'/Users/yuriypeshkichev/Projects/ipavlov/chatsky-llm-autoconfig/experiments/2024.11.14_dialog2graph'

In [12]:
data = read_json(
    "../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/new_data.json"
)

In [11]:
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [12]:
from chatsky_llm_autoconfig.graph import BaseGraph
from chatsky_llm_autoconfig.dialog import Dialog

In [16]:
def all_utterances_present(G: BaseGraph, dialogs: list[Dialog]) -> bool:
    """
    Check if all graph elements (nodes and edges) appear in at least one dialog.

    Args:
        G: BaseGraph object containing the dialog graph
        dialogs: List of Dialog objects to check against

    Returns:
        bool: True if all graph elements are present in at least one dialog
    """
    # Get all unique utterances from nodes and edges in the graph
    graph_utterances = set()

    # Add node utterances
    for node_id, node_data in G.graph.nodes(data=True):
        graph_utterances.update(node_data["utterances"])

    # Add edge utterances
    for _, _, edge_data in G.graph.edges(data=True):
        if isinstance(edge_data["utterances"], list):
            graph_utterances.update(edge_data["utterances"])
        else:
            graph_utterances.add(edge_data["utterances"])

    # Collect all utterances from dialogs
    dialog_utterances = set()
    for dialog in dialogs:
        dialog_utterances.update(utt.text for utt in dialog.messages)

    # Check if all graph utterances are present in dialogs
    if graph_utterances.issubset(dialog_utterances):
        return True
    else:
        return False
        # return graph_utterances.difference(dialog_utterances)

In [15]:
import pandas as pd

In [18]:
%pwd

'/Users/yuriypeshkichev/Projects/ipavlov/chatsky-llm-autoconfig/experiments/2024.11.14_dialog2graph'

In [14]:
gr = Graph(data[0]["graph"])
gr

Graph(graph_dict={'nodes': [{'id': 1, 'label': 'welcome and ask', 'is_start': True, 'utterances': ['Hello! This is tech support. How can I help you today?']}, {'id': 2, 'label': 'reload', 'is_start': False, 'utterances': ['Try to reload your device.']}, {'id': 3, 'label': 'check if helped', 'is_start': False, 'utterances': ['Has this resolved the issue for you?']}, {'id': 4, 'label': 'verification', 'is_start': False, 'utterances': ['Try cleaning browser cache.']}, {'id': 5, 'label': 'ask about additional problem', 'is_start': False, 'utterances': ['What is the other problem that you are experiencing?']}, {'id': 6, 'label': 'solution for balance', 'is_start': False, 'utterances': ['Try logging out of the website and then re-logging in.']}, {'id': 7, 'label': 'end', 'is_start': False, 'utterances': ['Thank you for contacting us, have a nice day.']}], 'edges': [{'source': 1, 'target': 2, 'utterances': ['The website loading takes too long']}, {'source': 2, 'target': 3, 'utterances': ['Sur

In [18]:
gr.graph_dict["nodes"]

[{'id': 1,
  'label': 'welcome and ask',
  'is_start': True,
  'utterances': ['Hello! This is tech support. How can I help you today?']},
 {'id': 2,
  'label': 'reload',
  'is_start': False,
  'utterances': ['Try to reload your device.']},
 {'id': 3,
  'label': 'check if helped',
  'is_start': False,
  'utterances': ['Has this resolved the issue for you?']},
 {'id': 4,
  'label': 'verification',
  'is_start': False,
  'utterances': ['Try cleaning browser cache.']},
 {'id': 5,
  'label': 'ask about additional problem',
  'is_start': False,
  'utterances': ['What is the other problem that you are experiencing?']},
 {'id': 6,
  'label': 'solution for balance',
  'is_start': False,
  'utterances': ['Try logging out of the website and then re-logging in.']},
 {'id': 7,
  'label': 'end',
  'is_start': False,
  'utterances': ['Thank you for contacting us, have a nice day.']}]

In [16]:
gr.graph_dict

{'nodes': [{'id': 1,
   'label': 'welcome and ask',
   'is_start': True,
   'utterances': ['Hello! This is tech support. How can I help you today?']},
  {'id': 2,
   'label': 'reload',
   'is_start': False,
   'utterances': ['Try to reload your device.']},
  {'id': 3,
   'label': 'check if helped',
   'is_start': False,
   'utterances': ['Has this resolved the issue for you?']},
  {'id': 4,
   'label': 'verification',
   'is_start': False,
   'utterances': ['Try cleaning browser cache.']},
  {'id': 5,
   'label': 'ask about additional problem',
   'is_start': False,
   'utterances': ['What is the other problem that you are experiencing?']},
  {'id': 6,
   'label': 'solution for balance',
   'is_start': False,
   'utterances': ['Try logging out of the website and then re-logging in.']},
  {'id': 7,
   'label': 'end',
   'is_start': False,
   'utterances': ['Thank you for contacting us, have a nice day.']}],
 'edges': [{'source': 1,
   'target': 2,
   'utterances': ['The website loading 

In [None]:
results = []
for case in tqdm(data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case["graph"]), model=model)
    case_results["are_triplets_valid"] = triplets["value"]
    case_results["are_triplets_valid_details"] = triplets["description"]
    themes = llm_metrics.is_theme_valid(
        G=Graph(case["graph"]), topic=case["topic"], model=model
    )
    case_results["is_theme_valid"] = themes["value"]
    case_results["is_theme_valid_details"] = themes["description"]
    case_results["all_utterances_present"] = all_utterances_present(
        G=Graph(case["graph"]),
        dialogs=[Dialog(messages=x["messages"]) for x in case["dialogs"]],
    )

    results.append(case_results)

df = pd.DataFrame(results)
print(df)
df.to_csv("results.csv")

In [23]:
[r["is_theme_valid"] for r in results]

[True, True, True, True, True, True, True, True, True, True]

In [24]:
[r["all_utterances_present"] for r in results]

[True, False, True, True, True, True, True, True, True, False]

In [None]:
test_data = read_json(
    "../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/complex_graphs.json"
)

In [30]:
case["dialogs"]

[[{'text': 'Good evening, how can I help?', 'participant': 'assistant'},
  {'text': 'I want to book a duplex room', 'participant': 'user'},
  {'text': 'How long are you going to stay?', 'participant': 'assistant'},
  {'text': 'One night', 'participant': 'user'},
  {'text': 'We have a vacant room. Do you need anything else?',
   'participant': 'assistant'},
  {'text': 'No, thanks', 'participant': 'user'},
  {'text': 'Okay, now I need your ID card.', 'participant': 'assistant'},
  {'text': 'Here it is', 'participant': 'user'},
  {'text': 'Thank you. This is your key, have a good stay!',
   'participant': 'assistant'}],
 [{'text': 'Good evening, how can I help?', 'participant': 'assistant'},
  {'text': 'I want to book a duplex room', 'participant': 'user'},
  {'text': 'How long are you going to stay?', 'participant': 'assistant'},
  {'text': 'Three nights', 'participant': 'user'},
  {'text': 'Unfortunately we do not have a vacant room for these dates, but we can offer two separate single 

In [None]:
test_data

In [None]:
comp_results = []
for case in tqdm(test_data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case["graph"]), model=model)
    case_results["are_triplets_valid"] = triplets["value"]
    case_results["are_triplets_valid_details"] = triplets["description"]
    themes = llm_metrics.is_theme_valid(
        G=Graph(case["graph"]), topic=case["topic"], model=model
    )
    case_results["is_theme_valid"] = themes["value"]
    case_results["is_theme_valid_details"] = themes["description"]
    case_results["all_utterances_present"] = all_utterances_present(
        G=Graph(case["graph"]),
        dialogs=[Dialog(messages=x["messages"]) for x in case["dialogs"]],
    )

    comp_results.append(case_results)

df = pd.DataFrame(comp_results)
print(df)
df.to_csv("comp_results.csv")

In [34]:
[r["is_theme_valid"] for r in comp_results]

[True, True, True, True, True]

In [35]:
[r["are_triplets_valid"] for r in comp_results]

[True, False, True, True, False]

In [36]:
[r["all_utterances_present"] for r in comp_results]

[True, True, True, True, True]

### gpt4o-mini

In [None]:
comp_mini_results = []
for case in tqdm(test_data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case["graph"]), model=model)
    case_results["are_triplets_valid"] = triplets["value"]
    case_results["are_triplets_valid_details"] = triplets["description"]
    themes = llm_metrics.is_theme_valid(
        G=Graph(case["graph"]), topic=case["topic"], model=model
    )
    case_results["is_theme_valid"] = themes["value"]
    case_results["is_theme_valid_details"] = themes["description"]
    case_results["all_utterances_present"] = all_utterances_present(
        G=Graph(case["graph"]),
        dialogs=[Dialog(messages=x["messages"]) for x in case["dialogs"]],
    )

    comp_mini_results.append(case_results)

df = pd.DataFrame(comp_mini_results)
print(df)
df.to_csv("comp_mini_results.csv")

In [40]:
[r["is_theme_valid"] for r in comp_mini_results]

[True, True, True, True, True]

In [41]:
[r["are_triplets_valid"] for r in comp_mini_results]

[False, False, True, False, False]

In [45]:
model

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x139406000>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x139406ba0>, root_client=<openai.OpenAI object at 0x1394041a0>, root_async_client=<openai.AsyncOpenAI object at 0x139404350>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://193.187.173.33:8002/api/providers/openai/v1')

In [None]:
mini_results = []
for case in tqdm(data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case["graph"]), model=model)
    case_results["are_triplets_valid"] = triplets["value"]
    case_results["are_triplets_valid_details"] = triplets["description"]
    themes = llm_metrics.is_theme_valid(
        G=Graph(case["graph"]), topic=case["topic"], model=model
    )
    case_results["is_theme_valid"] = themes["value"]
    case_results["is_theme_valid_details"] = themes["description"]
    case_results["all_utterances_present"] = all_utterances_present(
        G=Graph(case["graph"]),
        dialogs=[Dialog(messages=x["messages"]) for x in case["dialogs"]],
    )

    mini_results.append(case_results)

df = pd.DataFrame(mini_results)
print(df)
df.to_csv("mini_results.csv")

In [43]:
[r["is_theme_valid"] for r in mini_results]

[True, True, True, True, True, True, True, True, True, True]

In [44]:
[r["are_triplets_valid"] for r in mini_results]

[True, False, True, True, False, True, True, True, False, False]

In [None]:
dialog_results = []
for case in tqdm(data):
    for dialog in case["dialogs"]:
        case_results = {"topic": case["topic"]}
        res = llm_metrics.is_dialog_valid(dialog["messages"], model=model)
        case_results["is_dialog_valid"] = res["value"]
        case_results["is_dialog_valid_details"] = res["description"]

        dialog_results.append(case_results)

In [14]:
dialog_results

[{'topic': 'Tech Support',
  'is_dialog_valid': True,
  'is_dialog_valid_details': "The first message from the assistant naturally starts the dialog by offering help, which is appropriate for a tech support scenario. The final message thanks the user for contacting support and wishes them a nice day, which logically concludes the conversation after the user's issue has been resolved. The dialog appears to be logically finished."},
 {'topic': 'Tech Support',
  'is_dialog_valid': True,
  'is_dialog_valid_details': 'The first message naturally starts the dialog by introducing tech support and offering assistance. The final message logically connects to the previous dialog by thanking the user after their issue was resolved, indicating a successful conclusion to the conversation.'},
 {'topic': 'Tech Support',
  'is_dialog_valid': True,
  'is_dialog_valid_details': 'The first message from the assistant naturally starts the dialog by offering help, which is appropriate for a tech support sce

In [16]:
df = pd.DataFrame(dialog_results)
print(df)
df.to_csv("dialog_results.csv")

                              topic  is_dialog_valid  \
0                      Tech Support               True   
1                      Tech Support               True   
2                      Tech Support               True   
3                      Tech Support               True   
4                      Tech Support               True   
5                      Tech Support               True   
6                     food delivery               True   
7                     food delivery               True   
8                     food delivery               True   
9                     food delivery               True   
10                    food delivery               True   
11      Medical Appointment Booking               True   
12      Medical Appointment Booking               True   
13      Medical Appointment Booking               True   
14      Medical Appointment Booking               True   
15                          library               True   
16              

In [None]:
data = read_json(
    "../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/complex_graphs.json"
)
dialog_results = []
for case in tqdm(data):
    for dialog in case["dialogs"]:
        case_results = {"topic": case["topic"]}
        res = llm_metrics.is_dialog_valid(dialog["messages"], model=model)
        case_results["is_dialog_valid"] = res["value"]
        case_results["is_dialog_valid_details"] = res["description"]

        dialog_results.append(case_results)
df = pd.DataFrame(dialog_results)
print(df)
df.to_csv("dialog_complex_results.csv")