In [1]:
import json

In [2]:
import dotenv

In [3]:
dotenv.load_dotenv()

True

In [4]:
from langchain_openai  import ChatOpenAI

In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
import chatsky_llm_autoconfig.metrics.llm_metrics as llm_metrics


In [7]:
from chatsky_llm_autoconfig.graph import Graph

In [8]:
from tqdm import tqdm

In [9]:
def read_json(path):
    with open(path, mode="r") as file:
        data = file.read()
    return json.loads(data)

In [10]:
%pwd

'/Users/yuriypeshkichev/Projects/ipavlov/chatsky-llm-autoconfig/experiments/2024.11.14_dialogue2graph'

In [10]:
data = read_json("../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/new_data.json")

In [11]:
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [12]:
from chatsky_llm_autoconfig.graph import BaseGraph
from chatsky_llm_autoconfig.dialogue import Dialogue

In [16]:
def all_utterances_present(G: BaseGraph, dialogues: list[Dialogue]) -> bool:
    """
    Check if all graph elements (nodes and edges) appear in at least one dialogue.

    Args:
        G: BaseGraph object containing the dialogue graph
        dialogues: List of Dialogue objects to check against

    Returns:
        bool: True if all graph elements are present in at least one dialogue
    """
    # Get all unique utterances from nodes and edges in the graph
    graph_utterances = set()

    # Add node utterances
    for node_id, node_data in G.graph.nodes(data=True):
        graph_utterances.update(node_data["utterances"])

    # Add edge utterances
    for _, _, edge_data in G.graph.edges(data=True):
        if isinstance(edge_data["utterances"], list):
            graph_utterances.update(edge_data["utterances"])
        else:
            graph_utterances.add(edge_data["utterances"])

    # Collect all utterances from dialogues
    dialogue_utterances = set()
    for dialogue in dialogues:
        dialogue_utterances.update(utt.text for utt in dialogue.messages)

    # Check if all graph utterances are present in dialogues
    if graph_utterances.issubset(dialogue_utterances):
        return True
    else:
        return False
        # return graph_utterances.difference(dialogue_utterances)

In [15]:
import pandas as pd

In [18]:
%pwd

'/Users/yuriypeshkichev/Projects/ipavlov/chatsky-llm-autoconfig/experiments/2024.11.14_dialogue2graph'

In [21]:
results = []
for case in tqdm(data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case['graph']), model=model)
    case_results["are_triplets_valid"] = triplets['value']
    case_results['are_triplets_valid_details'] = triplets['description']
    themes = llm_metrics.is_theme_valid(G=Graph(case['graph']), topic=case['topic'], model=model)
    case_results['is_theme_valid'] = themes['value']
    case_results['is_theme_valid_details'] = themes['description']
    case_results['all_utterances_present'] = all_utterances_present(G=Graph(case['graph']), dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']])

    results.append(case_results)

df = pd.DataFrame(results)
print(df)
df.to_csv('results.csv')

  0%|          | 0/10 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http

                             topic  are_triplets_valid  \
0                     Tech Support                True   
1                    food delivery               False   
2      Medical Appointment Booking                True   
3                          library                True   
4                        auto care                True   
5             booking a hotel room                True   
6                abstract purchase                True   
7  chatting with a smart assistant                True   
8          taking a loan in a bank               False   
9                      coffee shop               False   

                          are_triplets_valid_details  is_theme_valid  \
0                         All transitions are valid.            True   
1  Invalid transition: The sequence is invalid be...            True   
2                         All transitions are valid.            True   
3                         All transitions are valid.            True   
4




In [23]:
[r['is_theme_valid'] for r in results]

[True, True, True, True, True, True, True, True, True, True]

In [24]:
[r['all_utterances_present'] for r in results]

[True, False, True, True, True, True, True, True, True, False]

In [31]:
test_data = read_json("../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/complex_graphs.json")

In [30]:
case['dialogues']

[[{'text': 'Good evening, how can I help?', 'participant': 'assistant'},
  {'text': 'I want to book a duplex room', 'participant': 'user'},
  {'text': 'How long are you going to stay?', 'participant': 'assistant'},
  {'text': 'One night', 'participant': 'user'},
  {'text': 'We have a vacant room. Do you need anything else?',
   'participant': 'assistant'},
  {'text': 'No, thanks', 'participant': 'user'},
  {'text': 'Okay, now I need your ID card.', 'participant': 'assistant'},
  {'text': 'Here it is', 'participant': 'user'},
  {'text': 'Thank you. This is your key, have a good stay!',
   'participant': 'assistant'}],
 [{'text': 'Good evening, how can I help?', 'participant': 'assistant'},
  {'text': 'I want to book a duplex room', 'participant': 'user'},
  {'text': 'How long are you going to stay?', 'participant': 'assistant'},
  {'text': 'Three nights', 'participant': 'user'},
  {'text': 'Unfortunately we do not have a vacant room for these dates, but we can offer two separate single 

In [32]:
comp_results = []
for case in tqdm(test_data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case['graph']), model=model)
    case_results["are_triplets_valid"] = triplets['value']
    case_results['are_triplets_valid_details'] = triplets['description']
    themes = llm_metrics.is_theme_valid(G=Graph(case['graph']), topic=case['topic'], model=model)
    case_results['is_theme_valid'] = themes['value']
    case_results['is_theme_valid_details'] = themes['description']
    case_results['all_utterances_present'] = all_utterances_present(G=Graph(case['graph']), dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']])

    comp_results.append(case_results)

df = pd.DataFrame(comp_results)
print(df)
df.to_csv('comp_results.csv')

  0%|          | 0/5 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/o

                             topic  are_triplets_valid  \
0             booking a hotel room                True   
1                abstract purchase               False   
2  chatting with a smart assistant                True   
3          taking a loan in a bank                True   
4                      coffee shop               False   

                          are_triplets_valid_details  is_theme_valid  \
0                         All transitions are valid.            True   
1  Invalid transition: The transition is invalid ...            True   
2                         All transitions are valid.            True   
3                         All transitions are valid.            True   
4  Invalid transition: The sequence is invalid be...            True   

                              is_theme_valid_details  all_utterances_present  
0  The dialog stays on the expected topic of book...                    True  
1  The dialog stays on the expected topic of abst...        




In [34]:
[r['is_theme_valid'] for r in comp_results]

[True, True, True, True, True]

In [35]:
[r['are_triplets_valid'] for r in comp_results]

[True, False, True, True, False]

In [36]:
[r['all_utterances_present'] for r in comp_results]

[True, True, True, True, True]

### gpt4o-mini

In [39]:
comp_mini_results = []
for case in tqdm(test_data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case['graph']), model=model)
    case_results["are_triplets_valid"] = triplets['value']
    case_results['are_triplets_valid_details'] = triplets['description']
    themes = llm_metrics.is_theme_valid(G=Graph(case['graph']), topic=case['topic'], model=model)
    case_results['is_theme_valid'] = themes['value']
    case_results['is_theme_valid_details'] = themes['description']
    case_results['all_utterances_present'] = all_utterances_present(G=Graph(case['graph']), dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']])

    comp_mini_results.append(case_results)

df = pd.DataFrame(comp_mini_results)
print(df)
df.to_csv('comp_mini_results.csv')

  0%|          | 0/5 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Invalid transition: The assistant's first response offers an alternative (two separate single rooms) after stating that there are no vacant rooms. The user's response indicates that this alternative does not suit them, which logically leads to a conclusion 

                             topic  are_triplets_valid  \
0             booking a hotel room               False   
1                abstract purchase               False   
2  chatting with a smart assistant                True   
3          taking a loan in a bank               False   
4                      coffee shop               False   

                          are_triplets_valid_details  is_theme_valid  \
0  Invalid transition: The assistant's first resp...            True   
1  Invalid transition: The assistant's first resp...            True   
2                         All transitions are valid.            True   
3  Invalid transition: The assistant's question a...            True   
4  Invalid transition: The assistant's question '...            True   

                              is_theme_valid_details  all_utterances_present  
0  The dialog consistently revolves around the to...                    True  
1  The dialog stays on the expected topic of abst...        




In [40]:
[r['is_theme_valid'] for r in comp_mini_results]

[True, True, True, True, True]

In [41]:
[r['are_triplets_valid'] for r in comp_mini_results]

[False, False, True, False, False]

In [45]:
model

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x139406000>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x139406ba0>, root_client=<openai.OpenAI object at 0x1394041a0>, root_async_client=<openai.AsyncOpenAI object at 0x139404350>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://193.187.173.33:8002/api/providers/openai/v1')

In [42]:
mini_results = []
for case in tqdm(data):
    case_results = {"topic": case["topic"]}
    triplets = llm_metrics.are_triplets_valid(G=Graph(case['graph']), model=model)
    case_results["are_triplets_valid"] = triplets['value']
    case_results['are_triplets_valid_details'] = triplets['description']
    themes = llm_metrics.is_theme_valid(G=Graph(case['graph']), topic=case['topic'], model=model)
    case_results['is_theme_valid'] = themes['value']
    case_results['is_theme_valid_details'] = themes['description']
    case_results['all_utterances_present'] = all_utterances_present(G=Graph(case['graph']), dialogues=[Dialogue(messages=x['messages']) for x in case['dialogues']])

    mini_results.append(case_results)

df = pd.DataFrame(mini_results)
print(df)
df.to_csv('mini_results.csv')

  0%|          | 0/10 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http

                             topic  are_triplets_valid  \
0                     Tech Support                True   
1                    food delivery               False   
2      Medical Appointment Booking                True   
3                          library                True   
4                        auto care               False   
5             booking a hotel room                True   
6                abstract purchase                True   
7  chatting with a smart assistant                True   
8          taking a loan in a bank               False   
9                      coffee shop               False   

                          are_triplets_valid_details  is_theme_valid  \
0                         All transitions are valid.            True   
1  Invalid transition: The assistant's first resp...            True   
2                         All transitions are valid.            True   
3                         All transitions are valid.            True   
4

FileNotFoundError: [Errno 2] No such file or directory: 'mini_results.csv'

In [43]:
[r['is_theme_valid'] for r in mini_results]

[True, True, True, True, True, True, True, True, True, True]

In [44]:
[r['are_triplets_valid'] for r in mini_results]

[True, False, True, True, False, True, True, True, False, False]

In [None]:
dialogue_results = []
for case in tqdm(data):

    for dialogue in case["dialogues"]:
        case_results = {"topic": case["topic"]}
        res = llm_metrics.is_dialogue_valid(dialogue["messages"], model=model)
        case_results["is_dialogue_valid"] = res['value']
        case_results['is_dialogue_valid_details'] = res['description']

        dialogue_results.append(case_results)


  0%|          | 0/10 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
 10%|█         | 1/10 [00:09<01:27,  9.71s/it]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/

NameError: name 'pd' is not defined

In [14]:
dialogue_results

[{'topic': 'Tech Support',
  'is_dialogue_valid': True,
  'is_dialogue_valid_details': "The first message from the assistant naturally starts the dialogue by offering help, which is appropriate for a tech support scenario. The final message thanks the user for contacting support and wishes them a nice day, which logically concludes the conversation after the user's issue has been resolved. The dialogue appears to be logically finished."},
 {'topic': 'Tech Support',
  'is_dialogue_valid': True,
  'is_dialogue_valid_details': 'The first message naturally starts the dialogue by introducing tech support and offering assistance. The final message logically connects to the previous dialogue by thanking the user after their issue was resolved, indicating a successful conclusion to the conversation.'},
 {'topic': 'Tech Support',
  'is_dialogue_valid': True,
  'is_dialogue_valid_details': 'The first message from the assistant naturally starts the dialogue by offering help, which is appropriate 

In [16]:
df = pd.DataFrame(dialogue_results)
print(df)
df.to_csv('dialogue_results.csv')

                              topic  is_dialogue_valid  \
0                      Tech Support               True   
1                      Tech Support               True   
2                      Tech Support               True   
3                      Tech Support               True   
4                      Tech Support               True   
5                      Tech Support               True   
6                     food delivery               True   
7                     food delivery               True   
8                     food delivery               True   
9                     food delivery               True   
10                    food delivery               True   
11      Medical Appointment Booking               True   
12      Medical Appointment Booking               True   
13      Medical Appointment Booking               True   
14      Medical Appointment Booking               True   
15                          library               True   
16            

In [17]:
data = read_json("../../dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/complex_graphs.json")
dialogue_results = []
for case in tqdm(data):

    for dialogue in case["dialogues"]:
        case_results = {"topic": case["topic"]}
        res = llm_metrics.is_dialogue_valid(dialogue["messages"], model=model)
        case_results["is_dialogue_valid"] = res['value']
        case_results['is_dialogue_valid_details'] = res['description']

        dialogue_results.append(case_results)
df = pd.DataFrame(dialogue_results)
print(df)
df.to_csv('dialogue_complex_results.csv')

  0%|          | 0/5 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
 20%|██        | 1/5 [00:07<00:30,  7.60s/it]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
 40%|████      | 2/5 [00:13<00:19,  6.34s/it]INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/

                              topic  is_dialogue_valid  \
0              booking a hotel room               True   
1              booking a hotel room               True   
2              booking a hotel room               True   
3                 abstract purchase               True   
4                 abstract purchase               True   
5                 abstract purchase              False   
6   chatting with a smart assistant              False   
7   chatting with a smart assistant               True   
8   chatting with a smart assistant               True   
9   chatting with a smart assistant               True   
10          taking a loan in a bank               True   
11          taking a loan in a bank              False   
12          taking a loan in a bank               True   
13          taking a loan in a bank              False   
14                      coffee shop              False   
15                      coffee shop               True   
16            


