In [1]:
import logging
import asyncio
from pathlib import Path
import time
import datetime

# Pyrit library imports for orchestrating AH Assistant tests
from pyrit.common import IN_MEMORY, initialize_pyrit
from pyrit.prompt_target import OpenAIChatTarget, HTTPTarget
from pyrit.score.evaluator import Evaluator
from pyrit.orchestrator import AHAssistantOrchestrator
from pyrit.orchestrator.multi_turn.red_teaming_orchestrator import RTOSystemPromptPaths
from pyrit.common.text_helper import save_html_report, generate_multi_turn_html_report
from pyrit.prompt_target.http_target.parsers.AHAssistantResponseParser import AHAssistantResponseParser

# Set logging level to WARNING to reduce log output
logging.basicConfig(level=logging.WARNING)

# ------------------------------------------------------------------------------
# Setup: Define Member ID, URL, and HTTP Request Template
# ------------------------------------------------------------------------------

member_id = ""
url= f""
token= ""

# Define a raw HTTP POST request template with headers and a placeholder for the prompt.
start_chat_request_raw = f"""
    POST {url}
    Content-Type: application/json
    X-Authorization: {token}
    Accept: text/event-stream
    x-test-mode: true

    {{
        "data": "{{PROMPT}}"
    }}
"""

# ------------------------------------------------------------------------------
# Configuration: Strategy, Scorer, and Report Paths
# ------------------------------------------------------------------------------

# The system prompt strategy for AH Assistant testing
strategy_path = RTOSystemPromptPaths.AH_ASSISTANT_CUSTOMER.value

# YAML configuration for the evaluator (scorer)
scorer_path = "assets/AH_Evaluators/chat_evaluator.yaml"

# Directory path to save the HTML report
report_path = "tests/E2E/reports/CustomerAutomation"

# ------------------------------------------------------------------------------
# Define Objectives for Customer Automation Testing
# ------------------------------------------------------------------------------

objectives = [
    # General Recipe Requests
    "As a home cook, I want a quick and easy pasta recipe for a busy weeknight. I live gluten free and I have a nut allergy. I like meat"
]

# ------------------------------------------------------------------------------
# Main Asynchronous Routine for Running Tests and Generating Report
# ------------------------------------------------------------------------------

async def main():
    # List to store conversation reports for each objective
    results = []
    
    # Process each objective individually
    for objective in objectives:
        # Initialize Pyrit with an in-memory database to ensure a clean environment
        initialize_pyrit(memory_db_type=IN_MEMORY)
        
        # Prepare evaluator variables using the current objective
        scorer_variables = {"objective": objective}
        
        # Create a chat target for adversarial conversation using OpenAI Chat API
        chat_target = OpenAIChatTarget()
        
        # Create an HTTP target to send the prompt to the assistant service
        http_prompt_target = HTTPTarget(
            http_request=start_chat_request_raw,
            prompt_regex_string="{PROMPT}",
            timeout=60.0,
            callback_function=AHAssistantResponseParser.parse_response
        )
        
        # Create an evaluator (scorer) to assess the quality of responses,
        # using a float scale for scoring.
        chat_scorer = Evaluator(
            chat_target=OpenAIChatTarget(),
            evaluator_yaml_path=Path(scorer_path),
            additional_evaluator_variables=scorer_variables,
            scorer_type="float_scale"
        )
        
        # Set up the orchestrator for AH Assistant testing with the following parameters:
        # - adversarial_chat: the chat target for generating responses
        # - adversarial_chat_system_prompt_path: the system prompt guiding the assistant
        # - objective_target: the HTTP target to send requests to the assistant
        # - objective_scorer: the evaluator that assesses responses
        # - evaluate_chat: enables evaluation of the conversation
        # - max_turns: maximum number of conversation turns allowed
        # - use_score_as_feedback: uses scoring as feedback for conversation generation
        orchestrator = AHAssistantOrchestrator(
            adversarial_chat=chat_target,
            adversarial_chat_system_prompt_path=strategy_path,
            objective_target=http_prompt_target,
            objective_scorer=chat_scorer,
            verbose=True,
            evaluate_chat=True,
            max_turns=10,
            use_score_as_feedback=True 
        )

        # Start the timer before sending prompts.
        start_time = time.time()
        
        # Run the orchestrator attack asynchronously for the current objective
        result = await orchestrator.run_attack_async(objective=objective)

        # Calculate the total execution time.
        execution_time = time.time() - start_time

        # Retrieve and store the conversation report from the result
        results.append(await result.get_conversation_report_async())

    # ------------------------------------------------------------------------------
    # Report Generation: Save the Conversation Results as an HTML Report
    # ------------------------------------------------------------------------------
    
    # Resolve the absolute path of the report directory and create it if it doesn't exist
    report_dir = Path(report_path).resolve()
    report_dir.mkdir(parents=True, exist_ok=True)
    
    # Generate and save the HTML report using the accumulated results
    save_html_report(
        results=results,
        is_chat_evaluation=True,
        report_generator=generate_multi_turn_html_report,
        directory=str(report_dir),
        description="This report provides an overview of multi-turn customer simulations, highlighting how the assistant engages in realistic, task-focused conversations. Each dialogue is evaluated for accuracy, helpfulness, and the ability to fulfill customer objectives across multiple turns. The report includes conversation transcripts, assistant responses, and corresponding scores reflecting the quality and relevance of the interaction.",
        execution_time=execution_time
    )

# ------------------------------------------------------------------------------
# Entry Point: Run the Main Asynchronous Function
# ------------------------------------------------------------------------------


await main()


[1m[92m
Starting new chat...

[1m[94mUser:[0m Ik zoek een snel en makkelijk pasta recept voor een drukke doordeweekse avond. Heb je suggesties?

[1m[92mAssistant:[0m {'text_message': 'Ik heb verschillende pastarecepten voor je gevonden. Pasta is altijd een goede keuze, of je nu zin hebt in iets romigs, pittigs of lichts. Geniet van de variatie en de heerlijke smaken die deze recepten te bieden hebben. Veel plezier met deze recepten!', 'data_message': '{"type":"RECIPE_LANE","data":{"title":null,"items":[{"id":1196159,"title":"Volkorenfusilli met kalkoen in tomatensaus","time":{"cook":15,"oven":0,"wait":0}', 'suggestion_pills': {'chips': ['Snelle pasta met tomatensaus', 'Makkelijke pasta carbonara', 'Pasta met pesto en kip', 'Hoe maak je verse pasta?']}}

Extracted Thread ID:  thread_jX7YnZjTXAAZwEWFtB8ySjxx
[1m[91mScore: [91m0.4 : [22mLast Turn Score:
The assistant provided a general response mentioning different types of pasta dishes but did not address the user's specific 

CancelledError: 