In [488]:
!pip install pydantic anytree networkx matplotlib inflect openapi-core jsonref prance datamodel-code-generator


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [489]:
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

In [490]:
import json

def parse_spec(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

In [491]:
# filepath = "../openapi_specs/resolved/stripe-08-10-24.json"
filepath = "../openapi_specs/resolved/sgp-09-21-24.json"

spec = parse_spec(filepath)

In [579]:
tgt = spec["paths"]["/v4/evaluations"]["post"]

tgt

{'tags': ['Evaluations'],
 'summary': 'Create Evaluation',
 'description': '### Description\nCreates a evaluation\n\n### Details\nThis API can be used to create a evaluation. To use this API, review the request schema and pass in all fields that are required to create a evaluation.',
 'operationId': 'POST-V4-/evaluations',
 'parameters': [{'required': False,
   'schema': {'type': 'string', 'title': 'X-Selected-Account-Id'},
   'name': 'x-selected-account-id',
   'in': 'header'}],
 'requestBody': {'content': {'application/json': {'schema': {'oneOf': [{'properties': {'name': {'type': 'string',
         'title': 'Name'},
        'description': {'type': 'string', 'title': 'Description'},
        'application_spec_id': {'type': 'string',
         'title': 'Application Spec Id'},
        'application_variant_id': {'type': 'string',
         'title': 'Application Variant Id'},
        'tags': {'type': 'object', 'title': 'Tags'},
        'evaluation_config': {'type': 'object', 'title': 'Evalua

In [517]:
base_url = spec['components']['securitySchemes']
base_url

{'APIKeyHeader': {'type': 'apiKey', 'in': 'header', 'name': 'x-api-key'}}

In [587]:
# !datamodel-codegen  --input "../openapi_specs/resolved/sgp-09-21-24.json" --input-file-type openapi --output ./models/

In [493]:
resource_blacklist = ['delete', 'query', 'cancel', 'batch', 'verify', 'process', 'validate', 'approve', 'publish', 'history', 'approve-batch', 'batch-delete', 'process', 'claim-task']

In [494]:
import re
from inflect import engine


def standardize(name: str) -> str:
    # ignored words for singularization
    invariant_words = {'synthesis', 'analysis', 'basis', 'thesis', 'process'}

    # Convert to lowercase, kebab case
    name = name.lower().replace('_', '-')
    name = name.lower().replace(' ', '-')
    name = re.sub(r'[^a-z0-9-]', '', name)
    parts = name.split('-')

    # singularize
    p = engine()
    parts = [part if part in invariant_words else (p.singular_noun(part) or part) for part in parts]

    standardized_name = '-'.join(parts)
    standardized_name = re.sub(r'-ids?$', '', standardized_name) # strip id
    return standardized_name

In [495]:
from collections import defaultdict
from typing import Dict, Any

def extract_resources(openapi: Dict[str, Any]):
    resources: List[str] = []

    for path, methods in openapi['paths'].items():
        if not set(methods.keys()) & {'get', 'post'}:
            continue

        resource = find_resource(path)
        if not resource or resource in resource_blacklist:
            continue
        
        resources.append(resource)

    return resources

def find_resource(path: str) -> str:
    elements = path.split('/')
    for element in reversed(elements):
        if not (element.startswith('{') and element.endswith('}')):
            return standardize(element)
    return ''

In [496]:
# extraction = extract_resources(spec)
resources = extract_resources(spec)

resources

['knowledge-base',
 'knowledge-base',
 'async-job',
 'chunk',
 'upload-file',
 'upload',
 'upload',
 'artifact',
 'artifact',
 'rank',
 'synthesis',
 'execute',
 'completion',
 'chat-completion',
 'execute',
 'embedding',
 'reranking',
 'completion',
 'chat-completion',
 'deployment',
 'usage-statistic',
 'model-deployment',
 'usage-statistic',
 'model',
 'usage-statistic',
 'user-info',
 'user',
 'account',
 'question-set',
 'contributor-metric',
 'contributor-metric',
 'evaluation-metric',
 'hybrid-eval-metric',
 'evaluation-config',
 'evaluation-config',
 'evaluation-dataset',
 'evaluation-dataset',
 'evaluation-dataset-version',
 'evaluation-dataset-version',
 'test-case',
 'test-case',
 'studio-project',
 'studio-project',
 'application-spec',
 'application-spec',
 'evaluation',
 'evaluation',
 'test-case-result',
 'test-case-result',
 'question',
 'question',
 'question-set',
 'knowledge-base-datum-source',
 'knowledge-base-datum-source',
 'upload-schedule',
 'upload-schedule',
 

In [497]:
print(len(resources))

94


In [553]:
# - path
# - [method -> description]
# - resources

def organize_routes(openapi, resources):
    route_list = []
    for path, methods in openapi['paths'].items():
        defn = {"path": path, "methods": {}, "resources": []}
        
        for m, details in methods.items():
            defn["methods"][m] = {"description": details.get('description', 'No description available')}
                
        path_segments = [standardize(seg) for seg in path.split('/') if seg]
        resource_stack = [seg for seg in path_segments if seg in resources]
        defn["resources"] = resource_stack
        
        route_list.append(defn)
        
    return route_list
        

In [554]:
rt = organize_routes(spec, resources)

rt

[{'path': '/v4/knowledge-bases',
  'methods': {'get': {'description': '### Description\nLists all knowledge bases owned by the authorized user.\n\n### Details\nThis API can be used to list all knowledge bases that have been created by the user.         This API will return the details of all knowledge bases including their IDs, names, the         embedding models they use, any metadata associated with the knowledge bases, and the         timestamps for their creation, last-updated time.\n\n#### Backwards Compatibility\nV2 and V1 Knowledge Bases are entirely separate and not backwards compatible. Users who         have existing V1 knowledge bases will need to migrate their data to V2 knowledge bases.'},
   'post': {'description': '### Description\nCreates an EGP knowledge base.\n\n### Details\nA knowledge base is a storage device for all data that needs to be accessible to EGP models.         Users can upload data from a variety of data sources into a knowledge base, and then query the 

In [562]:
import textwrap


def format_route_list(objects):
    output = []
    for obj in objects:
        path = obj['path']
        methods = obj.get('methods', {})
        
        # Format methods
        method_str = ' '.join(f"[{m.upper()}]" for m in methods.keys())
        
        # Combine methods and path
        route_line = f"{method_str} {path}"
        output.append(route_line)
        
        # Add descriptions
        for method, details in methods.items():
            description = details.get('description', 'No description available')
            
            # Wrap the description
            wrapped_description = textwrap.fill(
                description,
                width=80,  # Subtract 4 for the indentation
                initial_indent="    ",
                subsequent_indent="    "
            )
            
            output.append(f"    {method}:")
            output.append(wrapped_description)
        
        # Add a blank line between path entries for readability
        output.append("")
    
    return "\n".join(output).strip()

print(format_route_list(rt))

[GET] [POST] /v4/knowledge-bases
    get:
    ### Description Lists all knowledge bases owned by the authorized user.  ###
    Details This API can be used to list all knowledge bases that have been
    created by the user.         This API will return the details of all
    knowledge bases including their IDs, names, the         embedding models
    they use, any metadata associated with the knowledge bases, and the
    timestamps for their creation, last-updated time.  #### Backwards
    Compatibility V2 and V1 Knowledge Bases are entirely separate and not
    backwards compatible. Users who         have existing V1 knowledge bases
    will need to migrate their data to V2 knowledge bases.
    post:
    ### Description Creates an EGP knowledge base.  ### Details A knowledge base
    is a storage device for all data that needs to be accessible to EGP models.
    Users can upload data from a variety of data sources into a knowledge base,
    and then query the         knowledge base fo

In [498]:
from typing import List, Tuple


# GOAL: create resource dependency tree
# Thesis: to create a resource you need to do POST, dependents are ids and other resource names

def organize_resources(openapi):
    edges = [] # list(parent, child)
    
    def extract_param_names(obj) -> List[str]:
        param_names = []
        
        def recursive_extract(current_obj):
            if isinstance(current_obj, dict):
                if 'properties' in current_obj:
                    param_names.extend(current_obj['properties'].keys())
                else:
                    for value in current_obj.values():
                        recursive_extract(value)
            elif isinstance(current_obj, list):
                for item in current_obj:
                    recursive_extract(item)
        
        recursive_extract(obj)
        return list(set(param_names))

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'post' not in methods.keys():
            continue
        
        path_segments = [standardize(seg) for seg in path.split('/') if seg]
        resource_stack = [seg for seg in path_segments if seg in resources]

        if not resource_stack:
            continue
            
        parent = resource_stack[0]
        if len(resource_stack) > 1:
            for r in resource_stack[1:]:
                edges.append((r, parent))
            
        # edges from request shape
        def is_in_main_list(string, main_list):
            return string in main_list
        
        def filter_strings(main_list, filter_list):
            conditions = [
                is_in_main_list,
            ]
            
            return [
                string for string in filter_list 
                if any(condition(string, main_list) for condition in conditions)
            ]
        
        params = []
        if 'requestBody' in methods['post']:
            params.extend(extract_param_names(methods['post']['requestBody']))
        if 'parameters' in methods['post']:
            params.extend(extract_param_names(methods['post']['parameters']))
        if not params:
            params = extract_param_names(methods['post'])
        
        params = [standardize(p) for p in params]
        for r in filter_strings(params, resources):
            edges.append((parent, r))
    
    def clean_edges(inp_e: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        return [(e1, e2) for e1, e2 in inp_e if e1 and e2 and e1 != e2]
    
    return clean_edges(edges)

In [499]:
edgs = organize_resources(spec)
edgs = list(set(edgs))

edgs

[('evaluation', 'evaluation-dataset-version'),
 ('model', 'account'),
 ('application-test-case-output', 'application-variant'),
 ('model', 'model-group'),
 ('theme', 'account'),
 ('evaluation-config', 'question-set'),
 ('application-variant-report', 'account'),
 ('install-async', 'account'),
 ('model-group', 'model-template'),
 ('application-variant', 'account'),
 ('application-variant-report', 'evaluation-dataset'),
 ('install', 'account'),
 ('evaluation-dataset-version', 'evaluation-dataset'),
 ('question-set', 'question'),
 ('deployment', 'model'),
 ('studio-project', 'account'),
 ('execute', 'message'),
 ('upload-schedule', 'knowledge-base'),
 ('test-case', 'evaluation-dataset'),
 ('generation-job', 'evaluation-dataset'),
 ('application-spec', 'account'),
 ('evaluation', 'account'),
 ('completion', 'model'),
 ('chat-completion', 'model'),
 ('evaluation', 'evaluation-dataset'),
 ('application-spec', 'theme'),
 ('fine-tuning-job', 'account'),
 ('thread', 'application-variant'),
 ('au

In [500]:
len(edgs)

74

In [501]:
from networkx import DiGraph
import networkx as nx


def build_dependency_tree(edges: []) -> DiGraph:
    graph = nx.DiGraph()
    graph.add_edges_from(edges)

    return graph

In [502]:
g = build_dependency_tree(edgs)

In [533]:
from io import StringIO
from anytree import Node, RenderTree
from collections import deque

def resource_subtree(graph: nx.DiGraph, root_node: str) -> str:
    if root_node not in graph.nodes():
        return f"Node '{root_node}' not found in the graph."

    node_dict = {}
    visited = set()

    def create_tree(nx_node):
        queue = deque([(nx_node, None)])
        while queue:
            current_node, parent = queue.popleft()
            
            if current_node in visited:
                continue
            visited.add(current_node)
            
            node_name = str(current_node).lstrip('/')
            
            if current_node not in node_dict:
                node_dict[current_node] = Node(node_name, parent=parent)
            elif parent:
                node_dict[current_node] = Node(f"{node_name}_dup", parent=parent)
            
            for child in graph.successors(current_node):
                if child not in visited:
                    queue.append((child, node_dict[current_node]))

    create_tree(root_node)

    output = StringIO()
    output.write(f"Tree rooted at {root_node}:\n")
    for pre, _, node in RenderTree(node_dict[root_node]):
        output.write(f"{pre}{node.name}\n")

    return output.getvalue()

In [538]:
print(resource_subtree(g, 'application-variant'))

Tree rooted at application-variant:
application-variant
├── account
└── application-spec
    └── theme



In [548]:
from io import StringIO
from anytree import Node, RenderTree
import networkx as nx

def resource_subtree(graph: nx.DiGraph, root_node: str) -> str:
    if root_node not in graph.nodes():
        return f"Node '{root_node}' not found in the graph."

    node_dict = {}
    output = StringIO()
    output.write(f"Tree rooted at {root_node}:\n")

    def topological_sort_dfs(node):
        WHITE, GRAY, BLACK = 0, 1, 2
        colors = {n: WHITE for n in graph.nodes()}
        sorted_nodes = []
        has_cycle = False

        def dfs(current):
            nonlocal has_cycle
            if colors[current] == BLACK:
                return
            if colors[current] == GRAY:
                has_cycle = True
                return

            colors[current] = GRAY

            for neighbor in graph.successors(current):
                dfs(neighbor)

            colors[current] = BLACK
            sorted_nodes.append(current)

        dfs(node)
        return sorted_nodes[::-1], has_cycle

    sorted_nodes, has_cycle = topological_sort_dfs(root_node)

    if has_cycle:
        output.write("Warning: Cycle detected in the graph\n")

    def create_tree(nodes):
        for node in nodes:
            node_name = str(node).lstrip('/')
            if node not in node_dict:
                parent = next((node_dict[parent] for parent in graph.predecessors(node) 
                               if parent in node_dict), None)
                node_dict[node] = Node(node_name, parent=parent)
            else:
                parent = next((node_dict[parent] for parent in graph.predecessors(node) 
                               if parent in node_dict), None)
                if parent and parent is not node_dict[node].parent:
                    Node(f"{node_name}_dup", parent=parent)

    create_tree(sorted_nodes)

    for pre, _, node in RenderTree(node_dict[root_node]):
        output.write(f"{pre}{node.name}\n")

    return output.getvalue()

In [549]:
print(resource_subtree(g, 'evaluation'))

Tree rooted at evaluation:
evaluation
├── application-variant
│   └── application-spec
│       └── theme
│           └── account
├── test-case
├── evaluation-config
│   ├── studio-project
│   └── question-set
│       └── question
└── evaluation-dataset-version
    └── evaluation-dataset
        ├── knowledge-base
        │   ├── chunk
        │   ├── knowledge-base-datum-source
        │   ├── upload
        │   └── artifact
        └── autogenerated-draft-test-case



In [550]:
import networkx as nx
from typing import List, Tuple

def find_cycles(graph: nx.DiGraph) -> List[Tuple[str, ...]]:
    def dfs_cycles(node: str, path: List[str], visited: set) -> List[Tuple[str, ...]]:
        if node in visited:
            start = path.index(node)
            return [tuple(path[start:])]
        
        visited.add(node)
        path.append(node)
        cycles = []
        
        for neighbor in graph.successors(node):
            if neighbor in path or neighbor not in visited:
                cycles.extend(dfs_cycles(neighbor, path, visited))
        
        path.pop()
        visited.remove(node)
        return cycles

    all_cycles = []
    visited = set()

    for node in graph.nodes():
        if node not in visited:
            cycles = dfs_cycles(node, [], visited)
            all_cycles.extend(cycles)

    # Remove duplicates and sort cycles for consistent output
    unique_cycles = list(set(tuple(sorted(cycle)) for cycle in all_cycles))
    return sorted(unique_cycles)

def print_cycles(graph: nx.DiGraph) -> str:
    cycles = find_cycles(graph)
    if not cycles:
        return "No cycles found in the graph."
    
    output = f"Found {len(cycles)} cycle(s) in the graph:\n"
    for i, cycle in enumerate(cycles, 1):
        output += f"{i}. {' -> '.join(cycle)} -> {cycle[0]}\n"
    return output

In [551]:
print(print_cycles(g))


Found 4 cycle(s) in the graph:
1. artifact -> knowledge-base -> artifact
2. autogenerated-draft-test-case -> evaluation-dataset -> autogenerated-draft-test-case
3. knowledge-base -> knowledge-base-datum-source -> knowledge-base
4. knowledge-base -> upload -> knowledge-base



In [592]:
plan_prompt = """You are an agent to help users interact with an API. This make include tasks like creating resources 
or executing workflows for a REST API. A user will provide you with some information on what they would like to run 
and you will be provided with content about the API.

Here is the user query: 
{} 
"""

In [591]:
plan_prompt

'You are an agent to help users interact with an API. This make include tasks like creating resources \nor executing workflows for a REST API. A user will provide you with some information on what they would like to run  \nand you will be provided with content about the API.'

In [597]:
from openai.types import ChatModel as OpenAIModel


OPENAI_MODELS = set(OpenAIModel.__args__)

OPENAI_MODELS

{'gpt-3.5-turbo',
 'gpt-3.5-turbo-0125',
 'gpt-3.5-turbo-0301',
 'gpt-3.5-turbo-0613',
 'gpt-3.5-turbo-1106',
 'gpt-3.5-turbo-16k',
 'gpt-3.5-turbo-16k-0613',
 'gpt-4',
 'gpt-4-0125-preview',
 'gpt-4-0314',
 'gpt-4-0613',
 'gpt-4-1106-preview',
 'gpt-4-32k',
 'gpt-4-32k-0314',
 'gpt-4-32k-0613',
 'gpt-4-turbo',
 'gpt-4-turbo-2024-04-09',
 'gpt-4-turbo-preview',
 'gpt-4-vision-preview',
 'gpt-4o',
 'gpt-4o-2024-05-13'}

In [598]:
'gpt-4' in OPENAI_MODELS

True

In [599]:
from anthropic.types import Model as AnthropicModel


ANTHROPIC_MODELS = set(AnthropicModel.__args__)

ANTHROPIC_MODELS

{str,
 typing.Literal['claude-3-5-sonnet-20240620', 'claude-3-opus-20240229', 'claude-3-sonnet-20240229', 'claude-3-haiku-20240307', 'claude-2.1', 'claude-2.0', 'claude-instant-1.2']}

In [600]:
from typing import List, Optional, Union, TypedDict, Dict

class ModelGatewayInput(TypedDict, total=False):
    model: str
    messages: List[Dict[str, str]]
    timeout: Optional[Union[float, str]]
    temperature: Optional[float]
    top_p: Optional[float]
    n: Optional[int]
    stream: Optional[bool]
    stream_options: Optional[Dict[str, any]]
    stop: Optional[Union[str, List[str]]]
    max_tokens: Optional[int]
    presence_penalty: Optional[float]
    frequency_penalty: Optional[float]
    logit_bias: Optional[Dict[str, float]]
    user: Optional[str]

    # OpenAI specific parameters
    response_format: Optional[Union[Dict[str, str], Dict[str, any]]]
    seed: Optional[int]
    tools: Optional[List[Dict[str, any]]]
    tool_choice: Optional[Union[str, Dict[str, any]]]
    logprobs: Optional[bool]
    top_logprobs: Optional[int]
    parallel_tool_calls: Optional[bool]
    deployment_id: Optional[str]

    # Configuration parameters
    base_url: Optional[str]
    api_version: Optional[str]
    api_key: Optional[str]
    model_list: Optional[List[Dict[str, any]]]

    # Extra parameters
    extra_headers: Optional[Dict[str, str]]

    # Alias for compatibility
    max_completion_tokens: Optional[int]

In [None]:
def inference(openning: ModelGatewayInput):
    return "hi"

