In [14]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
load_dotenv() 

True

## Cycle Graph Generation

In [15]:
from chatsky_llm_autoconfig.algorithms.dialogue_augmentation import DialogAugmentation
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.algorithms.topic_graph_generation import CycleGraphGenerator
from chatsky_llm_autoconfig.metrics.llm_metrics import are_triplets_valid
from typing import List, Dict, Any
import os
import json
from datetime import datetime
from pathlib import Path

def generate_dialogues(
    topics: List[str], 
    num_augmentations: int = 2,
    model_name: str = "gpt-4o-mini"
) -> List[Dict[str, Any]]:
    """
    Collects dialogues for each topic and returns them in a structured format.
    
    Args:
        topics: List of conversation topics
        num_augmentations: Number of augmented dialogues to generate per topic
        model_name: Name of the model to use
        
    Returns:
        List of dictionaries containing graph, topic, and dialogues for each topic
    """
    # Initialize components
    graph_generator = CycleGraphGenerator()
    sampler = DialogueSampler()
    augmenter = DialogAugmentation()
    
    model = ChatOpenAI(
        model=model_name, 
        api_key=os.getenv("OPENAI_API_KEY"), 
        base_url=os.getenv("OPENAI_BASE_URL"), 
        temperature=0
    )
    
    results = []
    
    for topic in topics:
        # Generate graph and validate
        graph = graph_generator.invoke(topic=topic)
        is_valid = are_triplets_valid(graph, model, topic)
        
        if is_valid["value"]:
            print(f"Graph: {graph}")
            sampled_dialogue = sampler.invoke(graph, 1, -1)
            
            print(f"Sampled dialogue: {sampled_dialogue}")
            
            # Initialize dialogues array with the sampled dialogue
            dialogues = [sampled_dialogue.model_dump()]
            
            # Generate augmented versions
            for _ in range(num_augmentations):
                augmented = augmenter.invoke(dialogue=sampled_dialogue, topic=topic)
                dialogues.append(augmented.model_dump())
            
            # Create result entry
            result_entry = {
                "graph": graph.graph_dict,
                "topic": topic,
                "dialogues": dialogues
            }
            
            results.append(result_entry)
            
    return results

def save_dataset(data: List[Dict[str, Any]], num_augmentations: int):
    """
    Saves the dataset with a structured filename.
    
    Args:
        data: List of dictionaries containing graph, topic, and dialogues
        num_augmentations: Number of augmented dialogues generated
    """
    # Create dataset directory if it doesn't exist
    dataset_dir = Path("./dataset")
    dataset_dir.mkdir(parents=True, exist_ok=True)
    
    # Generate filename with timestamp and dataset info
    total_dialogues = 1 + num_augmentations
    filename = f"dialogue_dataset_{len(data)}_topics_{total_dialogues}.json"
    
    # Full path for the file
    file_path = dataset_dir / filename
    
    # Convert Graph objects to their dictionary representation
    serializable_data = []
    for entry in data:
        serializable_entry = {
            "graph": entry["graph"].graph_dict,  # Use graph_dict instead of Graph object
            "topic": entry["topic"],
            "dialogues": entry["dialogues"]
        }
        serializable_data.append(serializable_entry)
    
    # Save the data
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(serializable_data, f, indent=2, ensure_ascii=False)
    
    print(f"Dataset saved to: {file_path}")
    return file_path
# Example usage
topics = [
    # "booking a hotel room",                    # Travel/Accommodation
    # "scheduling a doctor's appointment",       # Healthcare
    # "ordering food delivery",                  # Food Service
    # "tech support for a laptop issue",         # Technical Support
    # "buying movie tickets",                    # Entertainment
    "gym membership registration",             # Fitness
    # "car maintenance service",                 # Automotive
    # "flight reservation",                      # Travel
    # "pizza delivery customization",            # Food Service
    # "hair salon appointment booking",          # Personal Care
    # "internet service installation",           # Utilities
    # "banking account assistance",              # Financial Services
    # "pet grooming appointment",               # Pet Services
    # "mobile phone plan upgrade",              # Telecommunications
    # "house cleaning service booking"           # Home Services
]

# Number of augmentations (total dialogues will be num_augmentations + 1)
num_augmentations = 2

# Collect dialogues
dialogue_collection = generate_dialogues(topics, num_augmentations=num_augmentations)

# Save dataset
# saved_path = save_dataset(dialogue_collection, num_augmentations)
dialogue_collection

INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"


Graph: graph_dict={'edges': [{'source': 1, 'target': 2, 'utterances': ["Hi, I'm interested in registering for a gym membership"]}, {'source': 2, 'target': 3, 'utterances': ["I'd like a monthly membership"]}, {'source': 3, 'target': 4, 'utterances': ['Yes, I have my ID with me']}, {'source': 4, 'target': 5, 'utterances': ["Here's my payment information"]}, {'source': 5, 'target': 2, 'utterances': ["I'd like to register another membership"]}], 'nodes': [{'id': 1, 'label': 'welcome', 'is_start': True, 'utterances': ['Welcome to our gym! How can I assist you with membership registration today?']}, {'id': 2, 'label': 'ask_membership_type', 'is_start': False, 'utterances': ['What type of membership are you interested in?']}, {'id': 3, 'label': 'confirm_id', 'is_start': False, 'utterances': ['A monthly membership. Do you have your ID for verification?']}, {'id': 4, 'label': 'payment', 'is_start': False, 'utterances': ['Great! Please provide your payment information to proceed.']}, {'id': 5, '

AttributeError: 'list' object has no attribute 'model_dump'