In [1]:
import json
import dotenv

In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
from dialogue2graph import metrics
from dialogue2graph.pipelines.core.graph import Graph
from dialogue2graph.pipelines.model_storage import ModelStorage
from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType

In [4]:
dotenv.load_dotenv()

True

In [5]:
ms = ModelStorage()

In [None]:
ms.add(
    key="grouping_llm",
    config={"model_name": "chatgpt-4o-latest", "temperature": 0},
    model_type="llm",
)
ms.add(
    key="extending_llm",
    config={"model_name": "chatgpt-4o-latest", "temperature": 0},
    model_type="llm",
)
ms.add(
    key="filling_llm",
    config={"model_name": "o3-mini", "temperature": 1},
    model_type="llm",
)
ms.add(
    key="formatting_llm",
    config={"model_name": "gpt-4o-mini", "temperature": 0},
    model_type="llm",
)
ms.add(
    key="sim_model",
    config={"model_name": "BAAI/bge-m3", "device": "cuda:0"},
    model_type="emb",
)

In [25]:
from dialogue2graph.datasets.complex_dialogues import generation

In [29]:
sampler = generation.RecursiveDialogueSampler()

In [27]:
with open("transfer_2.json") as f:
    data = json.load(f)

In [None]:
sampler.invoke(Graph(data["true_graph"]), ms.storage["grouping_llm"].model, 15)

### D2G Light pipeline

In [7]:
from dialogue2graph.pipelines.d2g_light.pipeline import D2GLightPipeline

pipeline = D2GLightPipeline(
    name="d2g_light",
    model_storage=ms,
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
graph.graph_dict

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ],
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "No, I'll manage on my own.", "participant": "user"},
        {
            "text": "No worries! Feel free to reach out anytime.",
            "participant": "assistant",
        },
        {"text": "Alright, thanks anyway.", "participant": "user"},
        {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
    ],
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
graph.graph_dict

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data

In [None]:
graph.graph_dict

In [None]:
gr = Graph(data["true_graph"])
gr.graph_dict

In [47]:
report

PipelineReport(service='d2g_light', properties={'time': 2.1800806522369385, 'simple_graph_comparison': True}, subreports=[{}])

In [None]:
graph.graph_dict

In [None]:
with open("transfer_2.json") as f:
    data = json.load(f)

raw_data = PipelineRawDataType(dialogs=data["dialogs"])
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("transfer_2.json") as f:
    data = json.load(f)

raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [36]:
report

PipelineReport(service='d2g_light', properties={'time': 35.30027651786804, 'simple_graph_comparison': False}, subreports=[{}])

In [None]:
raw_data = PipelineRawDataType(dialogs="transfer_2.json", true_graph="transfer_2.json")
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [24]:
report

PipelineReport(service='d2g_light', properties={'time': 37.24576377868652, 'simple_graph_comparison': True}, subreports=[{}])

In [None]:
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
Graph(data["true_graph"]).visualise_short(
    "Fixing a calendar sync delay with time zones"
)

In [None]:
raw_data = PipelineRawDataType(dialogs="test_graph_1.json")
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("Fixing a calendar sync delay with time zones")

In [None]:
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

### D2G LLM pipeline

In [64]:
from dialogue2graph.pipelines.d2g_llm.pipeline import D2GLLMPipeline

pipeline = D2GLLMPipeline(
    name="d2g_llm",
    model_storage=ms,
    grouping_llm="grouping_llm",
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
raw_data = PipelineRawDataType(dialogs="transfer.json")
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ],
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "No, I'll manage on my own.", "participant": "user"},
        {
            "text": "No worries! Feel free to reach out anytime.",
            "participant": "assistant",
        },
        {"text": "Alright, thanks anyway.", "participant": "user"},
        {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
    ],
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("transfer.json") as f:
    data = json.load(f)
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

### D2G LLM extender

In [30]:
from dialogue2graph.pipelines.d2g_extender.pipeline import D2GExtenderPipeline

pipeline = D2GExtenderPipeline(
    name="d2g_ext",
    model_storage=ms,
    extending_llm="filling_llm",
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step1_evals=metrics.PreDGEvalBase,
    extender_evals=metrics.PreDGEvalBase,
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ]
]
raw_data = PipelineRawDataType(dialogs=data, supported_graph=graph.graph_dict)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [17]:
Graph(data["true_graph"]).graph_dict

{'edges': [{'source': 1,
   'target': 2,
   'utterances': ["I'm having trouble with my calendar not syncing properly."]},
  {'source': 2,
   'target': 3,
   'utterances': ["Yes, I checked but it still doesn't sync."]},
  {'source': 3,
   'target': 4,
   'utterances': ['Alright, please guide me through the steps.']},
  {'source': 4,
   'target': 6,
   'utterances': ["I've followed the steps and it seems to be working now."]},
  {'source': 2,
   'target': 8,
   'utterances': ["I think I'll handle it later. Thanks anyway."]},
  {'source': 4,
   'target': 7,
   'utterances': ['I want to change the time zone instead.']},
  {'source': 7,
   'target': 4,
   'utterances': ["Sure, let's adjust the time zone settings."]},
  {'source': 5,
   'target': 6,
   'utterances': ["After adjusting, it still doesn't sync."]},
  {'source': 3,
   'target': 5,
   'utterances': ['Time zone settings are correct, but the issue persists.']},
  {'source': 6,
   'target': 8,
   'utterances': ["Actually, it's still 

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
Graph(data["true_graph"]).visualise_short(
    "Fixing a calendar sync delay with time zones"
)

In [None]:
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

In [18]:
graph.graph_dict

{'edges': [{'source': 1,
   'target': 4,
   'utterances': ['alright, please guide me through the steps.']},
  {'source': 2,
   'target': 3,
   'utterances': ["i'm having trouble with my calendar not syncing properly."]},
  {'source': 3,
   'target': 1,
   'utterances': ["yes, i checked but it still doesn't sync."]},
  {'source': 4,
   'target': 5,
   'utterances': ['i want to change the time zone instead.']},
  {'source': 4,
   'target': 6,
   'utterances': ["i've followed the steps and it seems to be working now."]},
  {'source': 5,
   'target': 4,
   'utterances': ["sure, let's adjust the time zone settings."]},
  {'source': 6,
   'target': 7,
   'utterances': ["actually, it's still not syncing. i want to try something else."]},
  {'source': 1,
   'target': 8,
   'utterances': ['time zone settings are correct, but the issue persists.']},
  {'source': 8,
   'target': 6,
   'utterances': ["after adjusting, it still doesn't sync."]}],
 'nodes': [{'id': 1,
   'label': '',
   'is_start': 

In [None]:
graph.visualise_short("Fixing a calendar sync delay with time zones")

In [50]:
with open("test_metrics_data.json", encoding="utf-8") as f:
    data_2 = json.load(f)

In [None]:
data_2[2]

In [None]:
raw_data = PipelineRawDataType(
    dialogs=data_2[2]["dialogues"], true_graph=data["true_graph"]
)
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report