In [204]:
!pip install pydantic anytree networkx matplotlib inflect openapi-core jsonref prance datamodel-code-generator


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [205]:
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

In [206]:
import json

def parse_spec(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

In [207]:
# filepath = "../openapi_specs/resolved/stripe-08-10-24.json"
filepath = "../openapi_specs/resolved/sgp-09-21-24.json"

spec = parse_spec(filepath)

In [208]:
# !datamodel-codegen  --input "../openapi_specs/stripe-08-10-24.yaml" --input-file-type openapi --output model.py

In [209]:
resource_blacklist = ['delete', 'query', 'cancel', 'batch', 'verify', 'process', 'validate', 'approve', 'publish', 'history', 'approve-batch']

In [210]:
from pydantic import BaseModel
from collections import defaultdict


class RouteMethodDescription(BaseModel):
    method: str
    description: str


# fetch all resources
def extract_resources(openapi: dict):
    resource_to_routes = defaultdict()  # resource -> {paths} -> {methods} -> description

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'get' not in methods.keys() and 'post' not in methods.keys():
            continue

        # find current resource  
        def find_last_unwrapped_element(path_list):
            for element in reversed(path_list):
                if not (element.startswith('{') and element.endswith('}')):
                    return element
            return None

        resource = find_last_unwrapped_element(path.split('/'))

        if not resource or resource in resource_blacklist:
            continue
        
        if resource not in resource_to_routes:
            resource_to_routes[resource] = {}

        resource_to_routes[resource][path] = {}

        for m, v in methods.items():
            resource_to_routes[resource][path][m] = v.get('description')

    return resource_to_routes

In [211]:
extraction = extract_resources(spec)
resources = extraction.keys()

resources

dict_keys(['knowledge-bases', 'async-jobs', 'chunks', 'upload_files', 'uploads', 'artifacts', 'batch-delete', 'rank', 'synthesis', 'execute', 'completions', 'chat-completions', 'embeddings', 'rerankings', 'deployments', 'usage-statistics', 'model-deployments', 'models', 'user-info', 'users', 'accounts', 'question-sets', 'claim-task', 'contributor-metrics', 'evaluation-metrics', 'hybrid-eval-metrics', 'evaluation-configs', 'evaluation-datasets', 'evaluation-dataset-versions', 'test-cases', 'studio-projects', 'application-specs', 'evaluations', 'test-case-results', 'questions', 'knowledge-base-data-sources', 'upload-schedules', 'autogenerated-draft-test-cases', 'generation-jobs', 'model-groups', 'model-templates', 'fine-tuning-jobs', 'events', 'training-datasets', 'contents', 'install', 'copy-to-control-plane-hook', 'install_async', 'application-variants', 'application-deployments', 'application-variant-reports', 'application-test-case-outputs', 'application-with-variant', 'application-s

In [212]:
# GOAL: create resource dependency tree
# Thesis: to create a resource you need to do POST, dependents are ids and other resource names

def organize_resources(openapi):
    edges = [] # list(parent, child)

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'post' not in methods.keys():
            continue
        
        path_list = path.split('/')
        
        # edges from path structure
        flag = None
        for p in path_list:
            if p in resources and flag:
                edges.append((flag, p))
            if p in resources:
                flag = p    
        
        if flag is None:
            continue
            
        # edges from request shape
        def extract_param_names(obj):
            param_names = []
            
            def recursive_extract(current_obj):
                if isinstance(current_obj, dict):
                    if 'properties' in current_obj:
                        param_names.extend(current_obj['properties'].keys())
                    else:
                        for value in current_obj.values():
                            recursive_extract(value)
                elif isinstance(current_obj, list):
                    for item in current_obj:
                        recursive_extract(item)
            
            recursive_extract(obj)
            return list(set(param_names))
        
        def is_in_main_list(string, main_list):
            return string in main_list
        
        def is_singular_in_main_list(string, main_list):
            if string.endswith('s'):
                return string[:-1] in main_list
            elif string.endswith('es'):
                return string[:-2] in main_list
            return False
        
        def is_referenced_as_id(string, main_list):
            variations = [
                f"{string}_id",
                f"{string}_ids",
            ]
            if string.endswith('s'):
                variations.append(f"{string[:-1]}_id")
                variations.append(f"{string[:-1]}_ids")
            if string.endswith('es'):
                variations.append(f"{string[:-2]}_id")
                variations.append(f"{string[:-2]}_ids")
            
            variations = [var.replace('-', '_') for var in variations]
            return any(variation in main_list for variation in variations)
        
        def filter_strings(main_list, filter_list):
            conditions = [
                is_in_main_list,
                is_singular_in_main_list,
                is_referenced_as_id,
            ]
            
            return [
                string for string in filter_list 
                if any(condition(string, main_list) for condition in conditions)
            ]
        
        if 'requestBody' in methods['post']:
            params = extract_param_names(methods['post']['requestBody'])
        else:
            params = extract_param_names(methods['post'])
        
        for r in filter_strings(params, resources):
            edges.append((flag, r))
        
    return edges

In [213]:
edgs = organize_resources(spec)
edgs = list(set(edgs))

edgs

[('threads', 'application-variants'),
 ('evaluation-configs', 'question-sets'),
 ('knowledge-bases', 'accounts'),
 ('rerankings', 'chunks'),
 ('evaluation-configs', 'studio-projects'),
 ('autogenerated-draft-test-cases', 'autogenerated-draft-test-cases'),
 ('test-case-results', 'accounts'),
 ('knowledge-base-data-sources', 'accounts'),
 ('uploads', 'chunks'),
 ('models', 'chat-completions'),
 ('evaluations', 'test-case-results'),
 ('application-variants', 'application-specs'),
 ('application-variants', 'accounts'),
 ('evaluations', 'claim-task'),
 ('model-groups', 'models'),
 ('batch-delete', 'artifacts'),
 ('completions', 'models'),
 ('chunks', 'synthesis'),
 ('chunks', 'rank'),
 ('evaluation-datasets', 'accounts'),
 ('chat-completions', 'accounts'),
 ('evaluation-datasets', 'evaluation-dataset-versions'),
 ('artifacts', 'batch-delete'),
 ('threads', 'accounts'),
 ('uploads', 'uploads'),
 ('test-case-results', 'application-specs'),
 ('deployments', 'accounts'),
 ('evaluations', 'evalu

In [214]:
from networkx import DiGraph
import networkx as nx


def build_dependency_tree(edges: []) -> DiGraph:
    graph = nx.DiGraph()
    graph.add_edges_from(edges)

    return graph

In [215]:
g = build_dependency_tree(edgs)

In [216]:
import networkx as nx
from anytree import Node, RenderTree
from collections import deque

def print_tree_for_node(graph: nx.DiGraph, start_node_name: str):
    if start_node_name not in graph.nodes():
        print(f"Node '{start_node_name}' not found in the graph.")
        return

    # Create a dictionary to store anytree Nodes
    node_dict = {}
    visited = set()  # To keep track of visited nodes

    def create_tree(nx_node):
        queue = deque([(nx_node, None)])
        while queue:
            current_node, parent = queue.popleft()
            
            if current_node in visited:
                continue
            visited.add(current_node)
            
            # Create anytree Node if it doesn't exist
            if current_node not in node_dict:
                node_dict[current_node] = Node(str(current_node), parent=parent)
            elif parent:
                # If node exists but with different parent, create a new node
                node_dict[current_node] = Node(f"{current_node}_dup", parent=parent)
            
            # Add child nodes to the queue
            for child in graph.successors(current_node):
                if child not in visited:
                    queue.append((child, node_dict[current_node]))

    # Create the tree starting from the specified node
    create_tree(start_node_name)

    # Print the tree
    print(f"\nTree rooted at {start_node_name}:")
    print(RenderTree(node_dict[start_node_name]))

# Example usage
# edges = [
#     ('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E'),
#     ('C', 'F'), ('E', 'G'), ('F', 'H'),
#     ('I', 'J'), ('J', 'K'),
#     ('D', 'B')  # Adding a cycle to test
# ]
# 
# graph = nx.DiGraph(edges)
# 
# # Print tree for node 'A'
# print_tree_for_node(graph, 'A')
# 
# # Print tree for node 'B'
# print_tree_for_node(graph, 'B')
# # Try to print tree for a non-existent node
# print_tree_for_node(graph, 'Z')

In [217]:
print_tree_for_node(g, 'evaluation-datasets')


Tree rooted at evaluation-datasets:
Node('/evaluation-datasets')
├── Node('/evaluation-datasets/accounts')
├── Node('/evaluation-datasets/evaluation-dataset-versions')
├── Node('/evaluation-datasets/knowledge-bases')
│   ├── Node('/evaluation-datasets/knowledge-bases/uploads')
│   │   └── Node('/evaluation-datasets/knowledge-bases/uploads/chunks')
│   │       ├── Node('/evaluation-datasets/knowledge-bases/uploads/chunks/synthesis')
│   │       └── Node('/evaluation-datasets/knowledge-bases/uploads/chunks/rank')
│   ├── Node('/evaluation-datasets/knowledge-bases/artifacts')
│   │   └── Node('/evaluation-datasets/knowledge-bases/artifacts/batch-delete')
│   ├── Node('/evaluation-datasets/knowledge-bases/upload_files')
│   └── Node('/evaluation-datasets/knowledge-bases/upload-schedules')
│       └── Node('/evaluation-datasets/knowledge-bases/upload-schedules/knowledge-base-data-sources')
├── Node('/evaluation-datasets/autogenerated-draft-test-cases')
├── Node('/evaluation-datasets/test-c

In [218]:
def print_graph(graph: nx.DiGraph):
    print("Nodes:")
    for node in graph.nodes():
        print(f"  {node}")
    
    print("\nEdges:")
    for edge in graph.edges():
        print(f"  {edge[0]} -> {edge[1]}")

In [219]:
print_graph(g)

Nodes:
  threads
  application-variants
  evaluation-configs
  question-sets
  knowledge-bases
  accounts
  rerankings
  chunks
  studio-projects
  autogenerated-draft-test-cases
  test-case-results
  knowledge-base-data-sources
  uploads
  models
  chat-completions
  evaluations
  application-specs
  claim-task
  model-groups
  batch-delete
  artifacts
  completions
  synthesis
  rank
  evaluation-datasets
  evaluation-dataset-versions
  deployments
  fine-tuning-jobs
  training-datasets
  application-deployments
  test-cases
  questions
  application-test-case-outputs
  execute
  embeddings
  upload-schedules
  upload_files
  model-templates
  messages
  themes
  application-variant-reports
  generation-jobs

Edges:
  threads -> application-variants
  threads -> accounts
  application-variants -> application-specs
  application-variants -> accounts
  evaluation-configs -> question-sets
  evaluation-configs -> studio-projects
  evaluation-configs -> accounts
  question-sets -> account