In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

os.environ["PATH_TO_ENV"] = "~/projects/chatsky-llm-autoconfig/.env"

In [None]:
!python -m spacy download en_core_web_sm

In [None]:
from dialogue2graph.datasets.complex_dialogues.generation import LoopedGraphGenerator
from three_stages_embedder import ThreeStagesGraphGenerator as GrouperGenerator

In [None]:
from dialogue2graph.pipelines.core.graph import Graph
from dialogue2graph.pipelines.core.dialogue import Dialogue
from dialogue2graph.pipelines.core.dialogue_sampling import RecursiveDialogueSampler
from dialogue2graph.metrics.llm_metrics import compare_graphs

In [5]:
from settings import EnvSettings

env_settings = EnvSettings()

In [6]:
from langchain.chat_models import ChatOpenAI

In [None]:
!conda install pygraphviz --y

In [7]:
sampler = RecursiveDialogueSampler()

### Generating graph from topic

In [None]:
generation_model = ChatOpenAI(
    model="chatgpt-4o-latest",
    api_key=env_settings.OPENAI_API_KEY,
    base_url=env_settings.OPENAI_BASE_URL,
    temperature=1,
)
theme_validation_model = ChatOpenAI(
    model="gpt-3.5-turbo",
    api_key=env_settings.OPENAI_API_KEY,
    base_url=env_settings.OPENAI_BASE_URL,
    temperature=0,
)
validation_model = ChatOpenAI(
    model="gpt-3.5-turbo",
    api_key=env_settings.OPENAI_API_KEY,
    base_url=env_settings.OPENAI_BASE_URL,
    temperature=0,
)

In [9]:
topic_generator = LoopedGraphGenerator(
    generation_model, validation_model, theme_validation_model
)

In [None]:
graph = topic_generator.invoke("Arranging airport transfers")

In [25]:
graph[0]["graph"]["nodes"]

[{'id': 1,
  'label': 'start',
  'is_start': True,
  'utterances': ['Hey there! How can I help you today?']},
 {'id': 2,
  'label': 'ask_transfer_details',
  'is_start': False,
  'utterances': ['Got it! Which airport are you flying from or landing at, and what’s your destination?',
   'Sure! From which airport and to where do you need the transfer?']},
 {'id': 3,
  'label': 'ask_number_of_passengers',
  'is_start': False,
  'utterances': ['Alright, how many passengers will there be?',
   'Got it! How many people will be traveling?']},
 {'id': 4,
  'label': 'ask_vehicle_preference',
  'is_start': False,
  'utterances': ['Do you have a specific type of vehicle in mind? I can check availability.',
   'Would you prefer a private car, a shared shuttle, or something else?']},
 {'id': 5,
  'label': 'confirm_price_and_availability',
  'is_start': False,
  'utterances': ['Here are your options: Private car for $60, shared shuttle for $20 per person. Which do you prefer?',
   'I found a few opti

In [26]:
graph[0]["graph"]["edges"]

[{'source': 1,
  'target': 2,
  'utterances': ['Hey! I need an airport transfer.',
   'Hi, can I book a ride from the airport?',
   'Hello! I need help with transportation from the airport.']},
 {'source': 2,
  'target': 3,
  'utterances': ['I’m arriving at JFK and need a ride to Manhattan.',
   'Flying into LAX, heading to Santa Monica.',
   'I need a transfer from Heathrow to the city center.']},
 {'source': 3,
  'target': 4,
  'utterances': ['It’s just me.',
   'We are a group of four.',
   'There are two of us.']},
 {'source': 4,
  'target': 5,
  'utterances': ['I’d prefer a private car.',
   'A shared shuttle sounds good.',
   'Something affordable, maybe a shuttle?']},
 {'source': 5,
  'target': 6,
  'utterances': ['I’ll take the private car.',
   'I’ll go with the shared shuttle.',
   'Let’s do the private ride for $50.']},
 {'source': 6,
  'target': 7,
  'utterances': ['Name is John, flight number AA123, arriving at 2 PM.',
   'Flight BA456, landing at 6 PM, name’s Sarah.',
   

In [9]:
graph_generated = {
    "nodes": [
        {
            "id": 1,
            "label": "start",
            "is_start": True,
            "utterances": ["Hey there! How can I help you today?"],
        },
        {
            "id": 2,
            "label": "ask_transfer_details",
            "is_start": False,
            "utterances": [
                "Got it! Which airport are you flying from or landing at, and what’s your destination?",
                "Sure! From which airport and to where do you need the transfer?",
            ],
        },
        {
            "id": 3,
            "label": "ask_number_of_passengers",
            "is_start": False,
            "utterances": [
                "Alright, how many passengers will there be?",
                "Got it! How many people will be traveling?",
            ],
        },
        {
            "id": 4,
            "label": "ask_vehicle_preference",
            "is_start": False,
            "utterances": [
                "Do you have a specific type of vehicle in mind? I can check availability.",
                "Would you prefer a private car, a shared shuttle, or something else?",
            ],
        },
        {
            "id": 5,
            "label": "confirm_price_and_availability",
            "is_start": False,
            "utterances": [
                "Here are your options: Private car for $60, shared shuttle for $20 per person. Which do you prefer?",
                "I found a few options for you. A private car would be $50, and a shared shuttle is $25 per person. Which one works best?",
            ],
        },
        {
            "id": 6,
            "label": "ask_booking_details",
            "is_start": False,
            "utterances": [
                "Awesome choice! Could you provide your flight number and preferred pickup time?",
                "Great! I’ll need your name and flight details to confirm the booking.",
            ],
        },
        {
            "id": 7,
            "label": "confirm_booking",
            "is_start": False,
            "utterances": [
                "Your airport transfer is booked! The reference number is XFR67890. Need anything else?",
                "You’re all set! Your booking reference is TRF12345. Anything else I can help with?",
            ],
        },
        {
            "id": 8,
            "label": "closing",
            "is_start": False,
            "utterances": [
                "All set! Have a great day!",
                "Glad to help! Safe travels!",
                "You’re welcome! Have a great trip!",
            ],
        },
    ],
    "edges": [
        {
            "source": 1,
            "target": 2,
            "utterances": [
                "Hey! I need an airport transfer.",
                "Hi, can I book a ride from the airport?",
                "Hello! I need help with transportation from the airport.",
            ],
        },
        {
            "source": 2,
            "target": 3,
            "utterances": [
                "I’m arriving at JFK and need a ride to Manhattan.",
                "Flying into LAX, heading to Santa Monica.",
                "I need a transfer from Heathrow to the city center.",
            ],
        },
        {
            "source": 3,
            "target": 4,
            "utterances": [
                "It’s just me.",
                "We are a group of four.",
                "There are two of us.",
            ],
        },
        {
            "source": 4,
            "target": 5,
            "utterances": [
                "I’d prefer a private car.",
                "A shared shuttle sounds good.",
                "Something affordable, maybe a shuttle?",
            ],
        },
        {
            "source": 5,
            "target": 6,
            "utterances": [
                "I’ll take the private car.",
                "I’ll go with the shared shuttle.",
                "Let’s do the private ride for $50.",
            ],
        },
        {
            "source": 6,
            "target": 7,
            "utterances": [
                "Name is John, flight number AA123, arriving at 2 PM.",
                "Flight BA456, landing at 6 PM, name’s Sarah.",
                "Delta 789, getting in around 10 AM.",
            ],
        },
        {
            "source": 7,
            "target": 8,
            "utterances": [
                "Nope, that’s all. Thanks!",
                "Thanks a lot! That’s all I needed.",
                "No, I’m good. Thank you!",
            ],
        },
        {
            "source": 4,
            "target": 3,
            "utterances": [
                "Actually, can I change the number of passengers?",
                "Wait, I need to update the passenger count.",
                "Sorry, I need to adjust how many people are coming.",
            ],
        },
        {
            "source": 5,
            "target": 4,
            "utterances": [
                "Can I change my vehicle choice?",
                "Actually, can I switch to a private car instead?",
                "Wait, I think I want a different vehicle.",
            ],
        },
        {
            "source": 6,
            "target": 5,
            "utterances": [
                "Can we go over the prices again?",
                "Wait, what were the transfer options?",
                "Actually, can you repeat the pricing details?",
            ],
        },
        {
            "source": 2,
            "target": 8,
            "utterances": [
                "Never mind, I’ll figure it out myself.",
                "I changed my mind, thanks anyway.",
                "I don’t need a transfer anymore, but thanks!",
            ],
        },
        {
            "source": 4,
            "target": 8,
            "utterances": [
                "Actually, I’ll make other arrangements. Thanks!",
                "I’ll check some other options instead, thanks.",
                "You know what, never mind. Appreciate the help!",
            ],
        },
    ],
}

In [None]:
graph[0]["dialogues"]

In [None]:
Graph(graph[0]["graph"]).visualise_short("Arranging airport transfers")

In [None]:
Graph(graph_generated).visualise_short("Arranging airport transfers")

In [10]:
generator = GrouperGenerator()

In [33]:
dialogues = [Dialogue().from_list(seq["messages"]) for seq in graph[0]["dialogues"]]

In [14]:
dailogues = sampler.invoke(Graph(graph_generated), 15)

Graph has all the dialogues


In [11]:
import json

In [33]:
with open("dialogues.json", "w", encoding="utf-8") as f:
    json.dump([d.model_dump() for d in dialogues], f, ensure_ascii=False, indent=4)

In [12]:
with open("dialogues.json") as f:
    dialogues = json.load(f)

In [None]:
dialogues

In [13]:
dialogues = [Dialogue().from_list(seq["messages"]) for seq in dialogues]

### Generating graph from dialogues with embbeding grouper

In [None]:
res_graph = generator.invoke(dialogues, temp=1)

In [None]:
res_graph.visualise_short("Generated from graph dialogues")

In [18]:
compare_graphs(res_graph, Graph(graph_generated))

{'value': False, 'description': 'Numbers of nodes do not match: 14 != 8'}

### Generating graph from dialogues with algo grouper

In [14]:
from three_stages_algo import ThreeStagesGraphGenerator as AlgoGrouper

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-m3


In [15]:
algo_grouper = AlgoGrouper()

In [None]:
res_graph = algo_grouper.invoke(dialogues, model_name="chatgpt-4o-latest", temp=0)

In [None]:
res_graph.visualise_short("Generated from graph dialogues")

In [None]:
compare_graphs(res_graph, Graph(graph_generated))

### Generating graph from dialogues with LLM grouper

In [22]:
llm_generator = AlgoGrouper()

In [None]:
res_graph = llm_generator.invoke(dialogues, model_name="chatgpt-4o-latest", temp=0)

In [None]:
res_graph.visualise_short("Generated with LLM from graph dialogues")

In [25]:
compare_graphs(res_graph, Graph(graph_generated))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'value': True,
 'description': 'Nodes similarity: 0.9841386079788208, Nodes+edges similarity: 0.9515432119369507'}