In [5]:
!pip install pydantic anytree networkx matplotlib inflect openapi-core jsonref prance datamodel-code-generator

Collecting datamodel-code-generator
  Downloading datamodel_code_generator-0.26.0-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.9/110.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting argcomplete<4.0,>=1.10
  Downloading argcomplete-3.5.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting black>=19.10b0
  Downloading black-24.8.0-cp311-cp311-macosx_11_0_arm64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting genson<2.0,>=1.2.1
  Downloading genson-1.3.0-py3-none-any.whl (21 kB)
Collecting inflect
  Downloading inflect-5.6.2-py3-none-any.whl (33 kB)
Collecting isort<6.0,>=4.3.21
  Downloading isort-5.13.2-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m

In [1]:
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
import json

def parse_spec(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

In [3]:
filepath = "../openapi_specs/resolved/stripe-08-10-24.json"

spec = parse_spec(filepath)

In [50]:
# !datamodel-codegen  --input "../openapi_specs/stripe-08-10-24.yaml" --input-file-type openapi --output model.py

Modular references require an output directory, not a file


In [69]:
from pydantic import BaseModel
from collections import defaultdict


class RouteMethodDescription(BaseModel):
    method: str
    description: str


# fetch all resources
def extract_resources(openapi: dict):
    resource_to_routes = defaultdict()  # resource -> {paths} -> {methods} -> description

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'get' not in methods.keys() and 'post' not in methods.keys():
            continue

        # find current resource  
        def find_last_unwrapped_element(path_list):
            for element in reversed(path_list):
                if not (element.startswith('{') and element.endswith('}')):
                    return element
            return None

        resource = find_last_unwrapped_element(path.split('/'))

        if not resource:
            continue

        if resource not in resource_to_routes:
            resource_to_routes[resource] = {}

        resource_to_routes[resource][path] = {}

        for m, v in methods.items():
            resource_to_routes[resource][path][m] = v.get('description')

    return resource_to_routes

In [116]:
extraction = extract_resources(spec)
resources = extraction.keys()

resources



In [131]:
# GOAL: create resource dependency tree
# Thesis: to create a resource you need to do POST, dependents are ids and other resource names

def organize_resources(openapi):
    edges = [] # list(parent, child)

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'post' not in methods.keys():
            continue
        
        path_list = path.split('/')
        
        # edges from path structure
        flag = None
        for p in path_list:
            if p in resources and flag:
                edges.append((flag, p))
            if p in resources:
                flag = p    
        
        # edges from request shape
        def extract_param_names(obj):
            param_names = []
            
            def recursive_extract(current_obj):
                if isinstance(current_obj, dict):
                    if 'properties' in current_obj:
                        param_names.extend(current_obj['properties'].keys())
                    else:
                        for value in current_obj.values():
                            recursive_extract(value)
                elif isinstance(current_obj, list):
                    for item in current_obj:
                        recursive_extract(item)
            
            recursive_extract(obj)
            return list(set(param_names))
        
        def is_in_main_list(string, main_list):
            return string in main_list
        
        def is_singular_in_main_list(string, main_list):
            return string.endswith('s') and string[:-1] in main_list
        
        def filter_strings(main_list, filter_list):
            conditions = [
                is_in_main_list,
                is_singular_in_main_list,
            ]
            
            return [
                string for string in filter_list 
                if any(condition(string, main_list) for condition in conditions)
            ]
        
        params = extract_param_names(methods['post']['requestBody'])
        for r in filter_strings(params, resources):
            edges.append((flag, r))
    
    return edges

In [132]:
edgs = organize_resources(spec)
edgs

len(edgs)

350

In [134]:
from networkx import DiGraph
import networkx as nx


def build_dependency_tree(edges: []) -> DiGraph:
    graph = nx.DiGraph()  # Directed graph allows a child to have multiple parents

    graph.add_edges_from(edges)

    return graph

In [135]:
g = build_dependency_tree(edgs)

In [136]:
g

<networkx.classes.digraph.DiGraph at 0x1232095d0>