In [41]:
!pip install pydantic anytree networkx matplotlib inflect openapi-core jsonref


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [42]:
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

In [43]:
# parse openapi
import json
import jsonref

def parse_spec(file_path):
    with open(file_path, 'r') as file:
        return jsonref.load(file)

In [44]:
from pydantic import BaseModel
from typing import List, Optional


class GraphNode(BaseModel):
    asset_name: str
    dependent_assets: List[str]
    inputs: List[str]
    route: str
    method: str
    tag: Optional[str]


In [45]:
# helpers
import re
import json
from collections import defaultdict
import re
from inflect import engine

def resolve_schema_ref(spec, schema):
    if isinstance(schema, dict):
        # if '$ref' in schema:
        #     ref = schema['$ref']
        #     parts = ref.split('/')
        #     current = spec
        #     for part in parts[1:]:  # Skip the first '#' part
        #         current = current[part]
        #     return resolve_schema_ref(spec, current)

        resolved_schema = {}
        for key, value in schema.items():
            if key in ['oneOf', 'allOf', 'anyOf']:
                resolved_schema[key] = [resolve_schema_ref(spec, item) for item in value]
            elif isinstance(value, dict):
                resolved_schema[key] = resolve_schema_ref(spec, value)
            elif isinstance(value, list):
                resolved_schema[key] = [resolve_schema_ref(spec, item) if isinstance(item, dict) else item for item in value]
            else:
                resolved_schema[key] = value
        return resolved_schema
    elif isinstance(schema, list):
        return [resolve_schema_ref(spec, item) for item in schema]
    else:
        return schema

def standardize_asset_name(name: str) -> str:
    # Initialize inflect engine for handling plurals
    p = engine()

    # Convert to lowercase and replace underscores with dashes
    name = name.lower().replace('_', '-')
    name = name.lower().replace(' ', '-')

    # Remove any non-alphanumeric characters (except dashes)
    name = re.sub(r'[^a-z0-9-]', '', name)

    # Split the name into parts
    parts = name.split('-')

    # Singularize each part
    invariant_words = {'synthesis', 'analysis', 'basis', 'thesis'}

    parts = [part if part in invariant_words else (p.singular_noun(part) or part) for part in parts]

    # Join the parts back together
    standardized_name = '-'.join(parts)

    # Remove 'id' or 'ids' if it's at the end of the name
    standardized_name = re.sub(r'-ids?$', '', standardized_name)
    return standardized_name

def get_last_route_segment(route: str) -> str:
    # Split the route by '/' and get the last non-empty segment
    segments = route.strip('/').split('/')
    last_segment = segments[-1] if segments else ''

    # If the last segment is surrounded by curly braces, remove them
    return re.sub(r'^\{(.*)\}$', r'\1', last_segment)

def last_part_has_id(route: str) -> bool:
    # Split the route into parts
    parts = route.strip('/').split('/')

    # Get the last part
    last_part = parts[-1] if parts else ''

    # Check if the last part is enclosed in curly braces or ends with '_id'
    return bool(re.match(r'^\{.*\}$', last_part) or last_part.endswith('_id'))

In [46]:

def organize_resources(spec):
    node_registry = {}

    get_assets = set()
    dependent_assets = set()
    all_tags = set()

    for path, methods in spec['paths'].items():
        if last_part_has_id(path):
            continue

        for method, details in methods.items():
            if method in ["get"]:
                get_assets.add(standardize_asset_name(get_last_route_segment(path)))

            if method not in ["post"] or last_part_has_id(path) : # maybe add `patch`
                continue

            tags = details.get("tags")

            dependents = set()
            inputs = set()

            bad_ids = ["account_id", "x-selected-account-id"]

            def handle_properties(_properties):
                for _input in _properties:
                    handle_input(_input)


            def handle_input(_input):
                if "id" in _input and _input not in bad_ids:
                    dependents.add(standardize_asset_name(_input))

                inputs.add(_input)

            # get ids from request body
            body = details.get("requestBody")
            if body:
                try:
                    content = details["requestBody"]["content"]
                    pointer = content.get("application/json", None)
                    if not pointer:
                        pointer = content["multipart/form-data"]
                    ref = pointer["schema"]
                    
                    print(ref)
                    request_body_schema = resolve_schema_ref(spec, ref)

                    # properties
                    properties = request_body_schema.get("properties", None)
                    if properties:
                        handle_properties(properties)

                    # nestings
                    nested_patterns = ["allOf", "oneOf"]
                    for pat in nested_patterns:
                        if pat in request_body_schema:
                            for prop in request_body_schema[pat]:
                                if prop.get("properties", None):
                                    handle_properties(prop["properties"])

                except Exception as e:
                    print(f"Error parsing request body {path}", e)

            # get ids from parameters
            params = details.get("parameters")

            if params:
                try:
                    for param in params:
                        if param.get("name", None):
                            name = param["name"]
                            handle_input(name)

                except Exception as e:
                    print(f"Error parsing parameters {path}", e)

            # get ids from response
            # responses = details.get("responses")
            #
            # if responses:
            #     try:
            #         success_ref = responses["200"]["content"]["application/json"]["schema"]
            #         success_resp_schema = resolve_schema_ref(spec, success_ref)
            #
            #         properties = success_resp_schema.get("properties", None)
            #         if properties:
            #             handle_properties(properties)
            #
            #     except Exception as e:
            #         print(f"Error parsing responses {path}", e)

            dependent_assets.update(dependents)

            _tag = tags[0] if tags and len(tags) > 0 else None
            all_tags.add(standardize_asset_name(_tag))

            curr_asset = standardize_asset_name(get_last_route_segment(path))
            new_node = GraphNode(
                asset_name=curr_asset,
                dependent_assets=list(dependents),
                inputs=list(inputs),
                route=path,
                method=method,
                tag=_tag,
            )

            if curr_asset not in node_registry:
                node_registry[curr_asset] = new_node
            elif len(node_registry[curr_asset].dependent_assets) < len(dependents):
                node_registry[curr_asset] = new_node

    # print(dependent_assets)
    # print(get_assets)
    # print(all_tags)
    _nodes = list(node_registry.values())
    real_nodes = []
    for node in _nodes:
        curr_asset = node.asset_name
        if curr_asset in get_assets or curr_asset in dependent_assets or curr_asset in all_tags:
            real_nodes.append(node)
        else:
            print("removing...", node.asset_name)

    return real_nodes

In [47]:
from networkx import DiGraph
from typing import List, Dict
import networkx as nx


def build_dependency_tree(_nodes: List[GraphNode]) -> DiGraph:
    _graph = nx.DiGraph()  # Directed graph allows a child to have multiple parents

    # Create all nodes
    for graph_node in _nodes:
        _graph.add_node(graph_node.asset_name)

    # Create edges based on dependencies
    for graph_node in _nodes:
        for dependent_asset in graph_node.dependent_assets:
            _graph.add_edge(dependent_asset, graph_node.asset_name)
        # print("-" * 50)
        # print(graph_node.asset_name)
        # print("children:", graph_node.dependent_assets)

    return _graph

In [48]:
spec = parse_spec('v4-sgp-spec-07-04-2024.json')
nodes = organize_resources(spec)

print(nodes)
graph = build_dependency_tree(nodes)

{'properties': {'knowledge_base_name': {'type': 'string', 'title': 'Knowledge Base Name', 'description': 'A unique name for the knowledge base'}, 'embedding_config': {'allOf': [{'anyOf': [{'properties': {'type': {'type': 'string', 'enum': ['models_api'], 'title': 'Type', 'description': 'The type of the embedding configuration.'}, 'model_deployment_id': {'type': 'string', 'title': 'Model Deployment Id', 'description': 'The ID of the deployment of the created model in the Models API V3.'}}, 'type': 'object', 'required': ['type', 'model_deployment_id'], 'title': 'EmbeddingConfigModelsAPI'}, {'properties': {'type': {'type': 'string', 'enum': ['base'], 'title': 'Type', 'description': 'The type of the embedding configuration.', 'default': 'base'}, 'embedding_model': {'allOf': [{'type': 'string', 'enum': ['sentence-transformers/all-MiniLM-L12-v2', 'sentence-transformers/multi-qa-distilbert-cos-v1', 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2', 'openai/text-embedding-ada-002',

In [49]:
# Dictionary to store anytree nodes
from anytree import Node, RenderTree

# Dictionary to store anytree nodes
nodes_dict = defaultdict(lambda: None)
sink_nodes = [node for node in graph.nodes() if graph.out_degree(node) == 0]

# Function to build the tree using anytree
def build_tree_for_node(_graph, _node):
    # Initialize the root node for the tree
    _root = Node(_node)
    nodes_dict[_node] = _root
    # Use a stack for DFS
    stack = [_node]
    visited = set()

    while stack:
        node = stack.pop()
        if node not in visited:
            visited.add(node)
            for pred in _graph.predecessors(node):
                if pred not in nodes_dict:
                    nodes_dict[pred] = Node(pred, parent=nodes_dict[node])
                else:
                    nodes_dict[pred].parent = nodes_dict[node]
                stack.append(pred)
    return _root


def print_tree(_root: Node):
    print(f"Tree for {sink_node}:")
    for pre, _, node in RenderTree(_root):
        print("%s%s" % (pre, node.name))

# Visualize trees for each sink node
for sink_node in sink_nodes:
    root = build_tree_for_node(graph, sink_node)
    print_tree(root)
    print("\n" + "="*40)  # Separator between trees

Tree for upload:
upload
└── knowledge-base

Tree for completion:
completion
└── model-deployment

Tree for chat-completion:
chat-completion
└── model-deployment

Tree for deployment:
deployment
└── model-instance

Tree for account:
account

Tree for evaluation-dataset-version:
evaluation-dataset-version
└── evaluation-dataset
    └── knowledge-base

Tree for test-case-result:
test-case-result
├── annotated-by-user
├── test-case
│   └── evaluation-dataset
│       └── knowledge-base
└── evaluation
    ├── application-variant
    │   └── application-spec
    │       └── theme
    └── evaluation-config
        ├── studio-project
        └── question-set
            └── question

Tree for upload-schedule:
upload-schedule
├── knowledge-base-datum-source
└── knowledge-base

Tree for autogenerated-draft-test-case:
autogenerated-draft-test-case
└── evaluation-dataset
    └── knowledge-base

Tree for generation-job:
generation-job
└── evaluation-dataset
    └── knowledge-base

Tree for model:
mo

In [50]:
node_assets = [str(node) for node in graph.nodes()]
node_assets

['knowledge-base',
 'upload',
 'completion',
 'chat-completion',
 'deployment',
 'account',
 'evaluation-config',
 'evaluation-dataset',
 'evaluation-dataset-version',
 'test-case',
 'studio-project',
 'application-spec',
 'evaluation',
 'test-case-result',
 'question',
 'question-set',
 'knowledge-base-datum-source',
 'upload-schedule',
 'autogenerated-draft-test-case',
 'generation-job',
 'model',
 'model-group',
 'model-template',
 'fine-tuning-job',
 'training-dataset',
 'application-variant',
 'application-deployment',
 'application-variant-report',
 'thread',
 'theme',
 'model-deployment',
 'model-instance',
 'annotated-by-user',
 'base-model',
 'fine-tuned-model',
 'validation-dataset']

In [51]:
asset_tree = build_tree_for_node(graph, "test-case")

print(print_tree(asset_tree))

Tree for thread:
test-case
└── evaluation-dataset
    └── knowledge-base
None


In [52]:
assert False

AssertionError: 

In [None]:
hello_world = 'hello world (placeholder)'

In [None]:
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

client = OpenAI()
ASST_ID = os.environ["OPENAI_PARROT_AGENT_ASST"]

In [None]:
from typing import List
# tools for assistant
import httpx

BASE_URL = "https://example.com/api"
HEADERS = {"Authorization": "Bearer your_token_here"}

def run_api_call(route: str, method: str, data: dict = None) -> dict:
    """
    Function to make an API call to a specified route using the specified method.
    Supports GET and POST methods, with data only being used for POST.

    Parameters:
        route (str): The API route.
        method (str): The HTTP method ('GET' or 'POST').
        data (dict, optional): Data to send with POST requests.

    Returns:
        dict: The response from the API as a dictionary.
    """
    url = f"{BASE_URL}/{route.strip('/')}"  # Ensure the route is correctly appended to the base URL
    with httpx.Client(headers=HEADERS) as client:
        if method.upper() == 'POST':
            response = client.post(url, json=data)
        elif method.upper() == 'GET':
            response = client.get(url)
        else:
            return {"error": f"Unsupported method: {method}"}

        # Return the JSON response if possible, otherwise return the status code
        return response.json() if response.is_success else {'status_code': response.status_code, 'detail': response.text}

def get_dependency_tree(asset_names: List[str]):
    # generate dependency tree
    pass

def get_routes_for_asset():
    pass
