In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
load_dotenv() 

True

## Cycle Graph Generation

In [2]:
from chatsky_llm_autoconfig.algorithms.dialogue_augmentation import DialogAugmentation
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.algorithms.topic_graph_generation import CycleGraphGenerator
from chatsky_llm_autoconfig.metrics.llm_metrics import are_triplets_valid
from typing import List, Dict, Any
import os
import json
from datetime import datetime
from pathlib import Path

def generate_dialogues(
    topics: List[str], 
    num_augmentations: int = 2,
    model_name: str = "gpt-4o-mini"
) -> List[Dict[str, Any]]:
    """
    Collects dialogues for each topic and returns them in a structured format.
    
    Args:
        topics: List of conversation topics
        num_augmentations: Number of augmented dialogues to generate per topic
        model_name: Name of the model to use
        
    Returns:
        List of dictionaries containing graph, topic, and dialogues for each topic
    """
    # Initialize components
    graph_generator = CycleGraphGenerator()
    sampler = DialogueSampler()
    augmenter = DialogAugmentation()
    
    model = ChatOpenAI(
        model=model_name, 
        api_key=os.getenv("OPENAI_API_KEY"), 
        base_url=os.getenv("OPENAI_BASE_URL"), 
        temperature=0
    )
    
    results = []
    
    for topic in topics:
        # Generate graph and validate
        graph = graph_generator.invoke(topic=topic)
        is_valid = are_triplets_valid(graph, model, topic)
        
        if is_valid["value"]:
            print(f"Graph: {graph}")
            sampled_dialogue = sampler.invoke(graph, 1, -1)[0]
            
            print(f"Sampled dialogue: {sampled_dialogue}")
            
            # Initialize dialogues array with the sampled dialogue
            dialogues = [sampled_dialogue.model_dump()]
            
            # Generate augmented versions
            for _ in range(num_augmentations):
                augmented = augmenter.invoke(dialogue=sampled_dialogue, topic=topic)
                dialogues.append(augmented.model_dump())
            
            # Create result entry
            result_entry = {
                "graph": graph.graph_dict,
                "topic": topic,
                "dialogues": dialogues
            }
            
            results.append(result_entry)
            
    return results

def save_dataset(data: List[Dict[str, Any]], num_augmentations: int):
    """
    Saves the dataset with a structured filename.
    
    Args:
        data: List of dictionaries containing graph, topic, and dialogues
        num_augmentations: Number of augmented dialogues generated
    """
    # Create dataset directory if it doesn't exist
    dataset_dir = Path("./dataset")
    dataset_dir.mkdir(parents=True, exist_ok=True)
    
    # Generate filename with timestamp and dataset info
    total_dialogues = 1 + num_augmentations
    filename = f"dialogue_dataset_{len(data)}_topics_{total_dialogues}.json"
    
    # Full path for the file
    file_path = dataset_dir / filename
    
    # Convert Graph objects to their dictionary representation
    serializable_data = []
    for entry in data:
        serializable_entry = {
            "graph": entry["graph"],  # Use graph_dict instead of Graph object
            "topic": entry["topic"],
            "dialogues": entry["dialogues"]
        }
        serializable_data.append(serializable_entry)
    
    # Save the data
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(serializable_data, f, indent=2, ensure_ascii=False)
    
    print(f"Dataset saved to: {file_path}")
    return file_path
# Example usage
topics = [
    "house cleaning service booking",          # Home Services
    "online therapy session booking",          # Mental Health
    "language course enrollment",              # Education
    "covid-19 test scheduling",               # Healthcare
    "food delivery subscription",              # Food Service
    "remote work tech support"                # Technical Support
]

# Number of augmentations (total dialogues will be num_augmentations + 1)
num_augmentations = 2

# Collect dialogues
dialogue_collection = generate_dialogues(topics, num_augmentations=num_augmentations)

saved_path = save_dataset(dialogue_collection, num_augmentations)


INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Invalid transition from ['Great! Could you please provide your address?'] to ['Do you have any pets we should be aware of?'] via edge '["Here's my address"]': The transition is invalid because the assistant's request for the user's address does not logically connect to the user's response of providing an address, followed by the assistant's question about pets. The assistant's question about pets seems unre

Graph: graph_dict={'edges': [{'source': 1, 'target': 2, 'utterances': ["I'd like to schedule a COVID-19 test"]}, {'source': 2, 'target': 3, 'utterances': ['I prefer a drive-through test']}, {'source': 3, 'target': 4, 'utterances': ['Yes, that location works for me']}, {'source': 4, 'target': 5, 'utterances': ["I'll be there at 10 AM"]}, {'source': 5, 'target': 2, 'utterances': ['I need to schedule another test']}], 'nodes': [{'id': 1, 'label': 'welcome', 'is_start': True, 'utterances': ['Welcome! How can I assist you with COVID-19 testing today?']}, {'id': 2, 'label': 'ask_test_type', 'is_start': False, 'utterances': ['What type of COVID-19 test would you like to schedule?']}, {'id': 3, 'label': 'confirm_location', 'is_start': False, 'utterances': ['We have a drive-through test available at our main center. Does that work for you?']}, {'id': 4, 'label': 'confirm_time', 'is_start': False, 'utterances': ['Great! What time would you like to schedule your test?']}, {'id': 5, 'label': 'comp

INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:root:Invalid transition from ['Thank you for your payment. Would yo

Dataset saved to: dataset/dialogue_dataset_1_topics_3.json
