In [None]:
import os
import json
from airops.defaults import make_chain, make_agent
from airops import models, prompts
from airops.tools import get_action_details, tavily_search, tavily_extract
from pydantic import BaseModel, Field
from typing import Dict, List, Any
from langfuse.decorators import observe, langfuse_context
from langfuse.callback import CallbackHandler
from tqdm import tqdm
import joblib


with open('./context/workflow_spec.md', 'r') as f:
    WORKFLOW_SPEC = f.read()

with open('./context/integration_actions.json', 'r') as f:
    INTEGRATION_ACTIONS = sorted(json.load(f), key = lambda x: x['integration'])
    for action in INTEGRATION_ACTIONS:
        action["inputs_schema"] = [field for field in action["inputs_schema"] if field["interface"] != "integration"]

AVAILABLE_INTEGRATION_ACTIONS = [{'integration': ia['integration'], 'action': ia['action']} for ia in INTEGRATION_ACTIONS]

SAMPLE_WORKFLOW_CONTEXTS = []
for file in os.listdir('./context/sample_workflows/'):
    fp = os.path.join('./context/sample_workflows/', file)
    with open(fp, 'r') as file:
        SAMPLE_WORKFLOW_CONTEXTS.append(json.load(file))

In [None]:
@observe()
def create_test_case(workflow_context, integration, action):
    create_test_case_agent = make_agent(
        tools = [get_action_details, tavily_search, tavily_extract],
        prompt_template = prompts.CREATE_TEST_CASE_PROMPT,
        output_model = models.TestCase
    )
    langfuse_handler = langfuse_context.get_current_langchain_handler()
    results = create_test_case_agent.invoke({
        "workflow_context": workflow_context,
        "integration": integration,
        "action": action
    }, config={"callbacks": [langfuse_handler]})
    return results

In [None]:
def create_test_cases(target_fp):
    if os.path.exists(target_fp):
        return joblib.load(target_fp)
    test_cases = []
    for action in tqdm(AVAILABLE_INTEGRATION_ACTIONS):
        for idx, sample in enumerate(SAMPLE_WORKFLOW_CONTEXTS):
            try:
                test_case = create_test_case(sample, action['integration'], action['action'])
                test_cases.append({
                    **action,
                    'context': sample,
                    **test_case.dict()
                })
            except Exception as e:
                print(f'unable to create test case for {action["integration"]}, {action["action"]}, {idx}')
    joblib.dump(test_cases, target_fp)
    return test_cases

In [None]:
test_cases = create_test_cases('./eval/test_cases.joblib')

In [None]:
test_cases[0]