In [2]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
load_dotenv() 

True

## Test Graph Creation

In [6]:
from langchain.prompts import PromptTemplate

simple_graph_prompt = PromptTemplate.from_template(
    """
Create a dialogue graph for a {topic} conversation that follows these rules:

1. Each assistant message (node) must be a clear question or statement that expects a specific type of response
2. Each user message (edge) must directly answer or respond to the previous assistant message
3. Include these basic flows:
   - Main success path (completing the primary task)
   - Early exit path (user decides not to proceed)
   - Return path (user wants to modify earlier choice)

Example of correct flow:
Assistant: "What type of coffee would you like?"
User: "A latte please"
Assistant: "Would you like that hot or iced?"
User: "Hot please"

Example of incorrect flow:
Assistant: "What type of coffee would you like?"
User: "No thank you" (This response doesn't match the question)

Format:
{{
    "edges": [
        {{ "source": 1, "target": 2, "utterances": ["I'd like a coffee"] }},
        {{ "source": 2, "target": 3, "utterances": ["A latte please"] }}
    ],
    "nodes": [
        {{ "id": 1, "label": "welcome", "is_start": true, "utterances": ["Welcome! How can I help?"] }},
        {{ "id": 2, "label": "ask_drink", "is_start": false, "utterances": ["What would you like to drink?"] }}
    ]
}}

Return ONLY the JSON without any additional text.
"""
)

In [7]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.algorithms.topic_graph_generation import CycleGraphGenerator
from chatsky_llm_autoconfig.metrics.llm_metrics import are_triplets_valid
from langchain_openai import ChatOpenAI
from pathlib import Path
import os
import json
from datetime import datetime


def generate_valid_dialogues(topics: list[str], output_dir: str = "generated_datasets") -> None:
    # Setup
    Path(output_dir).mkdir(exist_ok=True)
    
    # Initialize with default prompt if complex_graph_prompt is not defined
    graph_generator = CycleGraphGenerator(prompt=simple_graph_prompt)  # Remove prompt parameter if not needed
    sampler = DialogueSampler()
    gen_model = ChatOpenAI(
        model='o1-mini',
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_BASE_URL"),
    )
    
    valid_model = ChatOpenAI(
        model='gpt-4o',
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_BASE_URL"),
    )
    
    valid_results = []
    
    # Generate and validate graphs
    for topic in topics:
        try:
            # Generate and validate using the model name from your ChatOpenAI instance
            graph = graph_generator.invoke(topic=topic, model=gen_model)
            validation_result = are_triplets_valid(graph, valid_model)
            
            print(f"Topic: {topic}")
            print(f"Validation result: {validation_result}")
            
            # If valid, sample dialogue and save
            if validation_result["value"]:
                sampled_dialogues = sampler.invoke(graph, 1, -1)
                valid_results.append({
                    "graph": graph.graph_dict,
                    "topic": topic,
                    "dialogues": [d.model_dump() for d in sampled_dialogues],
                    "validation_result": validation_result
                })
                print(f"✅ Valid dialogue generated for topic: {topic}")
            else:
                print(f"❌ Invalid dialogue for topic: {topic}")
                
        except Exception as e:
            print(f"Error processing {topic}: {str(e)}")
            continue

    # Save results
    if valid_results:
        output_file = Path(output_dir) / f"valid_dialogues_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(valid_results, f, ensure_ascii=False, indent=2)
        print(f"\nSaved {len(valid_results)} valid dialogues to: {output_file}")



                

In [4]:
# Run generation
topics = [
    "medical appointment scheduling",
    "food delivery service",
    "fitness membership registration",
    "apartment rental application",
    "tech support assistance",
    "travel package booking",
    "insurance policy purchase",
    "pet grooming service",
    "moving service arrangement",
    "home cleaning service"
]


# generate_valid_dialogues(topics)

In [5]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.graph import Graph
import json
from pathlib import Path

# Initialize sampler
sampler = DialogueSampler()

# Get all JSON files in the directory
dataset_dir = Path("generated_datasets")
json_files = list(dataset_dir.glob("*.json"))

for json_path in json_files:
    print(f"\nProcessing file: {json_path.name}")
    print("=" * 80)
    
    # Load JSON file
    with open(json_path, "r") as f:
        data = json.load(f)

    # Process each graph
    for entry in data:
        print(f"\nTopic: {entry['topic']}")
        graph = Graph(entry["graph"])
        
        # Sample dialogues
        dialogues = sampler.invoke(graph, 1, -1)
        
        # Print all dialogues
        for i, dialogue in enumerate(dialogues, 1):
            print(f"\nDialogue {i}:")
            for message in dialogue.messages:
                print(f"- {message}")
            
        print("-" * 50)

In [10]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.graph import BaseGraph
from chatsky_llm_autoconfig.metrics.llm_metrics import are_triplets_valid
from langchain_openai import ChatOpenAI
from pathlib import Path
import os
import json
from datetime import datetime


def check_existing_graphs_and_sample() -> None:
   sampler = DialogueSampler()
   
   valid_model = ChatOpenAI(
       model='gpt-4o',
       api_key=os.getenv("OPENAI_API_KEY"),
       base_url=os.getenv("OPENAI_BASE_URL"),
   )
   
   # Path to the directory containing generated datasets
   datasets_dir = Path("graph_gen")
   
   valid_results = []
   
   # Iterate through json files in the datasets directory
   for file_path in datasets_dir.glob("*.json"):
       try:
           # Load existing graph array
           with open(file_path, 'r', encoding='utf-8') as f:
               graphs_data = json.load(f)
           
           print(f"\nProcessing graphs from: {file_path}")
           
           # Process each graph in the array
           for graph_data in graphs_data:
               print(f"\nValidating graph with topic: {graph_data['topic']}")
               
               graph_obj: BaseGraph = Graph(graph_data['graph'])

               # Validate triplets
               validation_result = are_triplets_valid(graph_obj, valid_model)
               print(f"Validation result: {validation_result}")
               
               
               if validation_result["value"]:
                   sampled_dialogues = sampler.invoke(graph_obj, 1, -1)
                   valid_results.append({
                       "graph": graph_data['graph'],
                       "topic": graph_data['topic'],
                       "dialogues": [d.model_dump() for d in sampled_dialogues],
                       "validation_result": validation_result
                   })
                   print(f"✅ Valid dialogue generated for topic: {graph_data['topic']}")
               else:
                   print(f"❌ Invalid dialogue for topic: {graph_data['topic']}")
               
       except Exception as e:
           print(f"Error processing {file_path}: {str(e)}")
           continue

   # Save results
   if valid_results:
       output_dir = Path("valid_complex_graphs")
       output_dir.mkdir(exist_ok=True)
       
       output_file = output_dir / f"valid_dialogues_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
       with open(output_file, 'w', encoding='utf-8') as f:
           json.dump(valid_results, f, ensure_ascii=False, indent=2)
       print(f"\nSaved {len(valid_results)} valid dialogues to: {output_file}")


if __name__ == "__main__":
   check_existing_graphs_and_sample()


Processing graphs from: graph_gen/graph_gen.json

Validating graph with topic: library


INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/o

Validation result: {'value': True, 'description': 'All transitions are valid.'}
✅ Valid dialogue generated for topic: library

Saved 1 valid dialogues to: valid_complex_graphs/valid_dialogues_20241217_172116.json
