In [1]:
import json
import os
import dotenv

In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
from dialogue2graph import metrics
from dialogue2graph.pipelines.core.graph import Graph
from dialogue2graph.pipelines.model_storage import ModelStorage
from dialogue2graph.pipelines.helpers.parse_data import PipelineRawDataType

In [4]:
dotenv.load_dotenv()

True

In [5]:
ms = ModelStorage()

In [None]:
ms.add(
    key="grouping_llm",
    config={
        "model_name": "chatgpt-4o-latest",
        "temperature": 0,
        "api_key": os.getenv("OPENAI_API_KEY"),
        "base_url": os.getenv("OPENAI_BASE_URL"),
    },
    model_type="llm",
)
ms.add(
    key="extending_llm",
    config={
        "model_name": "chatgpt-4o-latest",
        "temperature": 0,
        "api_key": os.getenv("OPENAI_API_KEY"),
        "base_url": os.getenv("OPENAI_BASE_URL"),
    },
    model_type="llm",
)
ms.add(
    key="filling_llm",
    config={
        "model_name": "o3-mini",
        "temperature": 1,
        "api_key": os.getenv("OPENAI_API_KEY"),
        "base_url": os.getenv("OPENAI_BASE_URL"),
    },
    model_type="llm",
)
ms.add(
    key="formatting_llm",
    config={
        "model_name": "gpt-4o-mini",
        "temperature": 0,
        "api_key": os.getenv("OPENAI_API_KEY"),
        "base_url": os.getenv("OPENAI_BASE_URL"),
    },
    model_type="llm",
)
ms.add(
    key="sim_model",
    config={"model_name": "BAAI/bge-m3", "device": "cuda:0"},
    model_type="emb",
)

### D2G Algo pipeline

In [None]:
from dialogue2graph.pipelines.d2g_light.pipeline import Pipeline

pipeline = Pipeline(
    name="d2g_light",
    model_storage=ms,
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ],
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "No, I'll manage on my own.", "participant": "user"},
        {
            "text": "No worries! Feel free to reach out anytime.",
            "participant": "assistant",
        },
        {"text": "Alright, thanks anyway.", "participant": "user"},
        {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
    ],
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("transfer.json") as f:
    data = json.load(f)

raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
Graph(data["true_graph"]).visualise_short(
    "Fixing a calendar sync delay with time zones"
)

In [None]:
raw_data = PipelineRawDataType(dialogs="test_graph_1.json")
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("Fixing a calendar sync delay with time zones")

In [None]:
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

### D2G LLM pipeline

In [13]:
from dialogue2graph.pipelines.d2g_llm.pipeline import Pipeline

pipeline = Pipeline(
    name="d2g_llm",
    model_storage=ms,
    grouping_llm="grouping_llm",
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ],
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "No, I'll manage on my own.", "participant": "user"},
        {
            "text": "No worries! Feel free to reach out anytime.",
            "participant": "assistant",
        },
        {"text": "Alright, thanks anyway.", "participant": "user"},
        {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
    ],
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("transfer.json") as f:
    data = json.load(f)
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

### D2G LLM extender

In [16]:
from dialogue2graph.pipelines.d2g_extender.pipeline import Pipeline

pipeline = Pipeline(
    name="d2g_ext",
    model_storage=ms,
    extending_llm="extending_llm",
    filling_llm="filling_llm",
    formatting_llm="formatting_llm",
    sim_model="sim_model",
    step1_evals=metrics.PreDGEvalBase,
    extender_evals=metrics.PreDGEvalBase,
    step2_evals=metrics.DGEvalBase,
    end_evals=metrics.DGEvalBase,
)

In [None]:
data = [
    {"text": "Hey there! How can I help you today?", "participant": "assistant"},
    {"text": "I need to book a ride to the airport.", "participant": "user"},
    {
        "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
        "participant": "assistant",
    },
    {"text": "Do you have any other options?", "participant": "user"},
    {
        "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
        "participant": "assistant",
    },
    {"text": "No, I'll manage on my own.", "participant": "user"},
    {"text": "No worries! Feel free to reach out anytime.", "participant": "assistant"},
    {"text": "Alright, thanks anyway.", "participant": "user"},
    {"text": "You're welcome! Have a fantastic trip!", "participant": "assistant"},
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    [
        {"text": "Hey there! How can I help you today?", "participant": "assistant"},
        {"text": "I need to book a ride to the airport.", "participant": "user"},
        {
            "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
            "participant": "assistant",
        },
        {"text": "Do you have any other options?", "participant": "user"},
        {
            "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
            "participant": "assistant",
        },
        {"text": "Actually, never mind.", "participant": "user"},
        {
            "text": "Alright, let me know if you need help later. Have a great day!",
            "participant": "assistant",
        },
        {"text": "Okay, have a great day!", "participant": "user"},
        {"text": "Glad to help! Safe travels.", "participant": "assistant"},
    ]
]
raw_data = PipelineRawDataType(dialogs=data, supported_graph=graph.graph_dict)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
data = [
    {
        "messages": [
            {
                "text": "Hey there! How can I help you today?",
                "participant": "assistant",
            },
            {"text": "I need to book a ride to the airport.", "participant": "user"},
            {
                "text": "Sure! I can help with that. When is your flight, and where are you departing from?",
                "participant": "assistant",
            },
            {"text": "Do you have any other options?", "participant": "user"},
            {
                "text": "If you'd prefer, I can send you options for ride-share services instead. Would you like that?",
                "participant": "assistant",
            },
            {"text": "No, I'll manage on my own.", "participant": "user"},
            {
                "text": "No worries! Feel free to reach out anytime.",
                "participant": "assistant",
            },
            {"text": "Alright, thanks anyway.", "participant": "user"},
            {
                "text": "You're welcome! Have a fantastic trip!",
                "participant": "assistant",
            },
        ]
    }
]
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("transfer.json") as f:
    data = json.load(f)
raw_data = PipelineRawDataType(dialogs=data)
graph, report = pipeline.invoke(raw_data)
graph.visualise_short("transfer")

In [None]:
with open("test_graph_1.json", encoding="utf-8") as f:
    data = json.load(f)
Graph(data["true_graph"]).visualise_short(
    "Fixing a calendar sync delay with time zones"
)

In [None]:
raw_data = PipelineRawDataType(dialogs=data["dialogs"], true_graph=data["true_graph"])
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report

In [23]:
with open("test_metrics_data.json", encoding="utf-8") as f:
    data_2 = json.load(f)

In [None]:
data_2[2]

In [None]:
raw_data = PipelineRawDataType(
    dialogs=data_2[2]["dialogues"], true_graph=data["true_graph"]
)
graph, report = pipeline.invoke(raw_data, enable_evals=True)
report