In [1]:
import os
import json
from typing import Dict, List, Any
from airops.defaults import make_chain, make_agent
from pydantic import BaseModel, Field
from tavily import TavilyClient


TAVILY = TavilyClient()

with open('./context/workflow_spec.md', 'r') as f:
    WORKFLOW_SPEC = f.read()

with open('./context/integration_actions.json', 'r') as f:
    INTEGRATION_ACTIONS = sorted(json.load(f), key = lambda x: x['integration'])
    for action in INTEGRATION_ACTIONS:
        action["inputs_schema"] = [field for field in action["inputs_schema"] if field["interface"] != "integration"]

SAMPLE_WORKFLOW_CONTEXTS = []
for file in os.listdir('./context/sample_workflows/'):
    fp = os.path.join('./context/sample_workflows/', file)
    with open(fp, 'r') as file:
        SAMPLE_WORKFLOW_CONTEXTS.append(json.load(file))

I do not understand what all of these integrations do and what fields are required and I don't need to. The agent/LLM is going to take care of that. All you need to do is generate relatively straightforward workflows, then ask an LLM to generate synthetic user requests for integrations that are applicable to the workflow. In the test case, you should include the desired integration selection for the input (use this for the evaluation).

Question: how do I evaluate that the payload is correct?

3 test steps:
1. check that the chosen integration is correct (simple pass or fail)
2. check that the payload matches the input schema for the integration (simple pass or fail)
3. check that the choices for the payload values makes sense given the user's request (GEval)

In [2]:
class TestCase(BaseModel):
    user_request: str = Field(..., description="the synthetic user request that should trigger the integration action")
    expected_integration_action_payload: IntegrationActionPayload = Field(..., description="""
        the desired integration action and properly-formatted payload.
    """)

In [15]:
def tavily_search(query: str) -> List[Dict]:
    """
    Given a plain text query, runs an internet search
    """
    return TAVILY.search(query)['results']

def tavily_extract(url: str) -> str:
    """
    Given the supplied URL, extracts all text
    """
    response = TAVILY.extract(url)['results'][0]['raw_content']

In [None]:
def create_test_cases_from_context(context: Dict, num_requests: 5):
    # use an agent to create test cases - the agent will research what each action does to understand it
    ...