<a href="https://colab.research.google.com/github/A-Vamshi/deepeval/blob/main/examples/dag-examples/conversational_dag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install deepeval

Collecting deepeval
  Downloading deepeval-3.4.7-py3-none-any.whl.metadata (18 kB)
Collecting anthropic (from deepeval)
  Downloading anthropic-0.66.0-py3-none-any.whl.metadata (27 kB)
Collecting ollama (from deepeval)
  Downloading ollama-0.5.3-py3-none-any.whl.metadata (4.3 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc<2.0.0,>=1.24.0 (from deepeval)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.36.0-py3-none-any.whl.metadata (2.4 kB)
Collecting portalocker (from deepeval)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting posthog<7.0.0,>=6.3.0 (from deepeval)
  Downloading posthog-6.7.4-py3-none-any.whl.metadata (6.0 kB)
Collecting pyfiglet (from deepeval)
  Downloading pyfiglet-1.0.4-py3-none-any.whl.metadata (7.4 kB)
Collecting pytest-asyncio (from deepeval)
  Downloading pytest_asyncio-1.1.0-py3-none-any.whl.metadata (4.1 kB)
Collecting pytest-repeat (from deepeval)
  Downloading pytest_repeat-0.9.4-py3-none-any.whl.metadata (4.9 kB)
Col

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"

Want to use other evaluation models? [Click here](https://deepeval.com/integrations/models/openai) to see all supported models and their usage instructions.

In [None]:
from deepeval.test_case import ConversationalTestCase, Turn

test_case = ConversationalTestCase(
    turns=[
        Turn(role="user", content="what's the weather like today?"),
        Turn(role="assistant", content="Where do you live bro? T~T"),
        Turn(role="user", content="Just tell me the weather in Paris"),
        Turn(
            role="assistant",
            content="The weather in Paris today is sunny and 24°C.",
        ),
        Turn(role="user", content="Should I take an umbrella?"),
        Turn(
            role="assistant",
            content="You trying to be stylish? I don't recommend it.",
        ),
    ],
    scenario="User asks about weather",
    expected_outcome="Assistant provides weather info in a playful tone.",
)

In [None]:
from deepeval.metrics.dag import DeepAcyclicGraph
from deepeval.metrics.conversational_dag import (
    ConversationalTaskNode,
    ConversationalBinaryJudgementNode,
    ConversationalNonBinaryJudgementNode,
    ConversationalVerdictNode,
)
from deepeval.test_case import TurnParams

non_binary_node = ConversationalNonBinaryJudgementNode(
    criteria="How was the assistant's behaviour towards user?",
    evaluation_params=[TurnParams.ROLE, TurnParams.CONTENT],
    children=[
        ConversationalVerdictNode(verdict="Rude", score=0),
        ConversationalVerdictNode(verdict="Neutral", score=5),
        ConversationalVerdictNode(verdict="Playful", score=10),
    ],
)

binary_node = ConversationalBinaryJudgementNode(
    criteria="Do the assistant's replies satisfy user's questions?",
    children=[
        ConversationalVerdictNode(verdict=False, score=0),
        ConversationalVerdictNode(verdict=True, child=non_binary_node),
    ],
)

task_node = ConversationalTaskNode(
    instructions="Summarize the conversation and explain assiatant's behaviour overall.",
    output_label="Summary",
    evaluation_params=[TurnParams.ROLE, TurnParams.CONTENT],
    children=[binary_node],
)

dag = DeepAcyclicGraph(root_nodes=[task_node])

In [None]:
from deepeval.metrics import ConversationalDAGMetric

playful_chatbot_metric = ConversationalDAGMetric(
    name="Playful Chatbot",
    dag=dag,
)

In [None]:
from deepeval import evaluate

evaluate([test_case], [playful_chatbot_metric])