In [4]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os
load_dotenv() 

True

## Test Graph Creation

In [5]:
from langchain.prompts import PromptTemplate

cycle_graph_one_utter = PromptTemplate.from_template(
    '''
    Create a natural dialogue graph that represents any business conversation with logical cycles.

    Here's an example of a clothing store conversation flow:

    ```mermaid
    graph TD
        1[Welcome] -->|"Looking for jeans"| 2[Ask Preferences]
        2 -->|"Slim fit, blue"| 3[Offer Options]
        3 -->|"Like the first one"| 4[Size Check]
        4 -->|"Size 32"| 5[Location]
        5 -->|"Try in store"| 6[Complete]
        6 -->|"Look for something else"| 2
    ```

    Example JSON structure:
    {{
        "edges": [
            {{ "source": 1, "target": 2, "utterances": ["I'm looking for jeans"] }},
            {{ "source": 2, "target": 3, "utterances": ["I'd like slim fit in blue"] }},
            {{ "source": 3, "target": 4, "utterances": ["I like the first option"] }},
            {{ "source": 4, "target": 5, "utterances": ["Size 32 please"] }},
            {{ "source": 5, "target": 6, "utterances": ["I'll try them in store"] }},
            {{ "source": 6, "target": 2, "utterances": ["I'd like to look for another pair"] }}
        ],
        "nodes": [
            {{ "id": 1, "label": "welcome", "is_start": true,
              "utterances": ["Welcome to Style Store! How can I help you today?"] }},
            {{ "id": 2, "label": "ask_preferences", "is_start": false,
              "utterances": ["What style and color are you interested in?"] }},
            {{ "id": 3, "label": "offer_options", "is_start": false,
              "utterances": ["I have these slim fit blue jeans available. Would you like to see them?"] }},
            {{ "id": 4, "label": "check_size", "is_start": false,
              "utterances": ["What size would you like?"] }},
            {{ "id": 5, "label": "location_preference", "is_start": false,
              "utterances": ["Would you like to try them in store or have them delivered?"] }},
            {{ "id": 6, "label": "complete", "is_start": false,
              "utterances": ["Perfect! The jeans are ready for you. Would you like to look for anything else?"] }}
        ]
    }}

    Key points:
    1. Keep responses natural and contextual
    2. Each state should serve a clear purpose
    3. Return cycles should make conversational sense
    4. Nodes are assistant phrases, edges are user responses. We start with assistant node. We end with assistant node.

    Create a dialogue graph for topic: {topic}
    Return only valid JSON matching the format above.
    '''
)


cycle_graph_two_utter = PromptTemplate.from_template(
    '''
    Create a natural dialogue graph that represents any business conversation with logical cycles.
    Each node and edge should contain exactly 2 utterances that mean the same thing but are phrased differently.

    Here's an example of an ordering conversation flow:

    ```mermaid
    graph TD
        1[Welcome] -->|"Want to order"| 2[Ask Item]
        2 -->|"Pizza please"| 3[Size Options]
        3 -->|"Large size"| 4[Confirm]
        4 -->|"That's correct"| 5[Complete]
        5 -->|"More items"| 2
    ```

    Example JSON structure:
    {{
        "edges": [
            {{ "source": 1, "target": 2, "utterances": [
                "I need to make an order",
                "I want to order from you"
            ]}},
            {{ "source": 2, "target": 3, "utterances": [
                "I'd like a pizza",
                "A pizza please"
            ]}},
            {{ "source": 3, "target": 4, "utterances": [
                "Large size please",
                "I want it large"
            ]}},
            {{ "source": 4, "target": 5, "utterances": [
                "Yes, that's right",
                "Yes, that's correct"
            ]}},
            {{ "source": 5, "target": 2, "utterances": [
                "I want to order more",
                "I'd like to add something else"
            ]}}
        ],
        "nodes": [
            {{ "id": 1, "label": "welcome", "is_start": true,
              "utterances": [
                "How can I help you today?",
                "What can I do for you today?"
            ]}},
            {{ "id": 2, "label": "ask_item", "is_start": false,
              "utterances": [
                "What would you like to order?",
                "What can I get for you?"
            ]}},
            {{ "id": 3, "label": "size_options", "is_start": false,
              "utterances": [
                "What size would you like?",
                "Which size do you prefer?"
            ]}},
            {{ "id": 4, "label": "confirm", "is_start": false,
              "utterances": [
                "So that's a large pizza, is that correct?",
                "You want a large pizza, right?"
            ]}},
            {{ "id": 5, "label": "complete", "is_start": false,
              "utterances": [
                "Done. Anything else?",
                "Okay. Anything else?"
            ]}}
        ]
    }}

    Key points:
    1. Each node and edge must have exactly 2 utterances
    2. The two utterances must mean the same thing, just phrased differently
    3. Keep responses natural and contextual
    4. Each state should serve a clear purpose
    5. Return cycles should make conversational sense
    6. Nodes are assistant phrases, edges are user responses
    7. We start with assistant node and end with assistant node

    Create a dialogue graph for topic: {topic}
    Return only valid JSON matching the format above.
    '''
)

In [6]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.algorithms.topic_graph_generation import CycleGraphGenerator
from chatsky_llm_autoconfig.metrics.llm_metrics import are_triplets_valid
from langchain_openai import ChatOpenAI
from typing import List, Dict, Any, Tuple
import os
import json
from datetime import datetime
from pathlib import Path

def generate_and_validate_dialogues(
    topics: List[str],
    output_dir: str = "generated_datasets_two_utter"
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Initialize components with two utterance prompt
    graph_generator = CycleGraphGenerator(prompt=cycle_graph_two_utter)
    sampler = DialogueSampler()
    
    model = ChatOpenAI(
        model='gpt-4',
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_BASE_URL"),
        temperature=0
    )
    
    valid_results = []
    invalid_results = []
    
    for topic in topics:
        try:
            # Generate graph and validate
            graph = graph_generator.invoke(topic=topic)
            validation_result = are_triplets_valid(graph, model, topic)
            
            print(f"Topic: {topic}")
            print(f"Validation result: {validation_result}")
            
            # Sample dialogue from the graph
            sampled_dialogue = sampler.invoke(graph, 1, -1)[0]
            
            result_entry = {
                "graph": graph.graph_dict,
                "topic": topic,
                "dialogues": [sampled_dialogue.model_dump()],
                "validation_result": validation_result
            }
            
            # Sort into valid/invalid collections
            if validation_result["value"]:
                valid_results.append(result_entry)
                print(f"✅ Valid dialogue generated for topic: {topic} (double utterance)")
            else:
                invalid_results.append(result_entry)
                print(f"❌ Invalid dialogue generated for topic: {topic} (double utterance)")
                
        except Exception as e:
            print(f"Error processing topic {topic}: {str(e)}")
            continue
    
    # Save results to JSON files with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    valid_file = Path(output_dir) / f"valid_dialogues_two_utter_{timestamp}.json"
    invalid_file = Path(output_dir) / f"invalid_dialogues_two_utter_{timestamp}.json"
    
    with open(valid_file, 'w', encoding='utf-8') as f:
        json.dump(valid_results, f, ensure_ascii=False, indent=2)
    
    with open(invalid_file, 'w', encoding='utf-8') as f:
        json.dump(invalid_results, f, ensure_ascii=False, indent=2)
        
    print(f"\nGeneration Summary (Double Utterance):")
    print(f"Valid dialogues: {len(valid_results)}")
    print(f"Invalid dialogues: {len(invalid_results)}")
    print(f"\nResults saved to:")
    print(f"Valid: {valid_file}")
    print(f"Invalid: {invalid_file}")
    
    return valid_results, invalid_dialogues

# Extended list of topics
topics = [
    "library card registration",
    "dental appointment scheduling",
    "mobile phone plan subscription",
    "concert ticket reservation",
    "passport renewal application",
    "driving lesson booking",
    "moving service scheduling",
    "internet installation setup",
    "visa application consultation",
    "home cleaning service booking"
    "beauty salon appointment",
    "bank account opening",
]

# Generate and save the datasets
valid_dialogues, invalid_dialogues = generate_and_validate_dialogues(topics)

INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"


KeyboardInterrupt: 

In [17]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.graph import Graph
import json
from pathlib import Path

# Initialize sampler
sampler = DialogueSampler()

# Get all JSON files in the directory
dataset_dir = Path("generated_datasets_two_utter")
json_files = list(dataset_dir.glob("*.json"))

for json_path in json_files:
    print(f"\nProcessing file: {json_path.name}")
    print("=" * 80)
    
    # Load JSON file
    with open(json_path, "r") as f:
        data = json.load(f)

    # Process each graph
    for entry in data:
        print(f"\nTopic: {entry['topic']}")
        graph = Graph(entry["graph"])
        
        # Sample different paths
        for i in range(3):
            dialogue = sampler.invoke(graph, 1, -1)[0]
            print(f"\nPath {i+1}:")
            for message in dialogue.messages:
                print(f"- {message}")
        print("-" * 50)


Processing file: invalid_dialogues_two_utter_20241127_221403.json

Topic: library card registration

Path 1:
- text='Welcome to the library. How can I assist you?' participant='assistant'
- text='I want to register for a library card' participant='user'
- text='Do you have a valid ID with you?' participant='assistant'
- text='I have my ID with me' participant='user'
- text='Are you a resident of the city?' participant='assistant'
- text='I am a city resident' participant='user'
- text="You're registered. Do you need anything else?" participant='assistant'
- text="No, that's it" participant='user'
- text='Thank you for registering. Have a great day!' participant='assistant'
- text='I need to register another card' participant='user'

Path 2:
- text='Hello! What can I help you with today?' participant='assistant'
- text="I'd like to sign up for a library card" participant='user'
- text='Do you have a valid ID with you?' participant='assistant'
- text='I have my ID with me' participant='

In [19]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import json
import logging

from langchain_core.language_models.chat_models import BaseChatModel

def are_triplets_valid_new(G: Graph, model: BaseChatModel) -> dict[str]:
    """
    Validates dialogue graph structure and logical transitions between nodes.
    
    Parameters:
        G (BaseGraph): The dialogue graph to validate
        model (BaseChatModel): The LLM model to use for validation
    
    Returns:
        dict: {'value': bool, 'description': str}
    """
    # Define validation result model
    class TransitionValidationResult(BaseModel):
        isValid: bool = Field(description="Whether the transition is valid or not")
        description: str = Field(description="Explanation of why it's valid or invalid")

    # Create prompt template
    triplet_validate_prompt_template = """
    You are evaluating if dialog transitions make logical sense.
    
    Given this conversation graph in JSON:
    {json_graph}
    
    For the current transition:
    Source (Assistant): {source_utterances}
    User Response: {edge_utterances}
    Target (Assistant): {target_utterances}

    EVALUATE: Do these three messages form a logical sequence in the conversation?
    Consider:
    1. Does the assistant's first response naturally lead to the user's response?
    2. Does the user's response logically connect to the assistant's next message?
    3. Is the overall flow natural and coherent?

    Reply in JSON format:
    {{"isValid": true/false, "description": "Brief explanation of why it's valid or invalid"}}
    """

    triplet_validate_prompt = PromptTemplate(
        input_variables=["json_graph", "source_utterances", "edge_utterances", "target_utterances"],
        template=triplet_validate_prompt_template
    )

    parser = PydanticOutputParser(pydantic_object=TransitionValidationResult)
    
    # Convert graph to JSON string
    graph_json = json.dumps(G.graph_dict)
    
    # Create node mapping
    node_map = {node["id"]: node for node in G.graph_dict["nodes"]}
    
    overall_valid = True
    descriptions = []

    for edge in G.graph_dict["edges"]:
        source_id = edge["source"]
        target_id = edge["target"]
        
        if source_id not in node_map or target_id not in node_map:
            description = f"Invalid edge: missing node reference {source_id} -> {target_id}"
            overall_valid = False
            descriptions.append(description)
            continue

        # Get utterances
        source_utterances = node_map[source_id]["utterances"]
        target_utterances = node_map[target_id]["utterances"]
        edge_utterances = edge["utterances"]

        # Prepare input for validation
        input_data = {
            "json_graph": graph_json,
            "source_utterances": source_utterances,
            "edge_utterances": edge_utterances,
            "target_utterances": target_utterances
        }

        # Run validation
        triplet_check_chain = triplet_validate_prompt | model | parser
        response = triplet_check_chain.invoke(input_data)

        if not response.isValid:
            overall_valid = False
            description = f"Invalid transition: {response.description}"
            logging.info(description)
            descriptions.append(description)

    result = {
        "value": overall_valid,
        "description": " ".join(descriptions) if descriptions else "All transitions are valid."
    }
    
    return result

In [20]:
from chatsky_llm_autoconfig.algorithms.dialogue_generation import DialogueSampler
from chatsky_llm_autoconfig.graph import Graph
from langchain_openai import ChatOpenAI
from pathlib import Path
import json
from typing import Dict, Any, List
from datetime import datetime

def evaluate_dataset(file_path: Path, model: ChatOpenAI) -> Dict[str, Any]:
    """Evaluate a single dataset file and return metrics."""
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    total_graphs = len(data)
    valid_count = 0
    invalid_transitions = []
    results_by_topic = {}
    
    for entry in data:
        topic = entry['topic']
        graph = Graph(entry['graph'])
        
        # Run new validation
        validation_result = are_triplets_valid_new(graph, model)
        
        # Store results by topic
        results_by_topic[topic] = {
            'is_valid': validation_result['value'],
            'description': validation_result['description']
        }
        
        if validation_result['value']:
            valid_count += 1
        else:
            invalid_transitions.append({
                'topic': topic,
                'description': validation_result['description']
            })
    
    metrics = {
        'file_name': file_path.name,
        'total_graphs': total_graphs,
        'valid_graphs': valid_count,
        'invalid_graphs': total_graphs - valid_count,
        'validity_rate': valid_count / total_graphs if total_graphs > 0 else 0,
        'results_by_topic': results_by_topic,
        'invalid_transitions': invalid_transitions
    }
    
    return metrics

def compare_datasets():
    # Initialize model
    model = ChatOpenAI(
        model='gpt-4',
        api_key=os.getenv("OPENAI_API_KEY"),
        base_url=os.getenv("OPENAI_BASE_URL"),
        temperature=0
    )
    
    # Get both dataset files
    dataset_dir = Path("generated_datasets")
    timestamp = "20241127_215908"  # Update with your timestamp
    valid_file = dataset_dir / f"valid_dialogues_{timestamp}.json"
    invalid_file = dataset_dir / f"invalid_dialogues_{timestamp}.json"
    
    # Evaluate both datasets
    valid_metrics = evaluate_dataset(valid_file, model)
    invalid_metrics = evaluate_dataset(invalid_file, model)
    
    # Create comparison report
    timestamp_now = datetime.now().strftime("%Y%m%d_%H%M%S")
    report = {
        'timestamp': timestamp_now,
        'valid_dataset_metrics': valid_metrics,
        'invalid_dataset_metrics': invalid_metrics,
        'comparison': {
            'agreement_rate_valid': valid_metrics['validity_rate'],
            'agreement_rate_invalid': 1 - invalid_metrics['validity_rate'],
            'overall_agreement': (valid_metrics['validity_rate'] + (1 - invalid_metrics['validity_rate'])) / 2
        }
    }
    
    # Save report
    report_file = dataset_dir / f"validation_report_{timestamp_now}.json"
    with open(report_file, 'w', encoding='utf-8') as f:
        json.dump(report, f, ensure_ascii=False, indent=2)
    
    # Print summary
    print("\nValidation Summary:")
    print("=" * 50)
    print(f"Valid Dataset ({valid_file.name}):")
    print(f"- Total graphs: {valid_metrics['total_graphs']}")
    print(f"- Valid graphs: {valid_metrics['valid_graphs']}")
    print(f"- Validity rate: {valid_metrics['validity_rate']:.2%}")
    print(f"\nInvalid Dataset ({invalid_file.name}):")
    print(f"- Total graphs: {invalid_metrics['total_graphs']}")
    print(f"- Valid graphs: {invalid_metrics['valid_graphs']}")
    print(f"- Validity rate: {invalid_metrics['validity_rate']:.2%}")
    print(f"\nOverall Agreement: {report['comparison']['overall_agreement']:.2%}")
    print(f"\nDetailed report saved to: {report_file}")

    return report

# Run comparison
if __name__ == "__main__":
    report = compare_datasets()

INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://193.187.173.33:8002/api/providers/o

RateLimitError: Error code: 429 - {'error': {'code': '429', 'message': '[BricksLLM] too many requests', 'type': ''}}