In [1]:
!pip install pydantic anytree networkx matplotlib inflect openapi-core jsonref prance datamodel-code-generator


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import logging

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

In [3]:
import json

def parse_spec(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

In [4]:
filepath = "../openapi_specs/resolved/stripe-08-10-24.json"

spec = parse_spec(filepath)

In [5]:
# !datamodel-codegen  --input "../openapi_specs/stripe-08-10-24.yaml" --input-file-type openapi --output model.py

In [6]:
from pydantic import BaseModel
from collections import defaultdict


class RouteMethodDescription(BaseModel):
    method: str
    description: str


# fetch all resources
def extract_resources(openapi: dict):
    resource_to_routes = defaultdict()  # resource -> {paths} -> {methods} -> description

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'get' not in methods.keys() and 'post' not in methods.keys():
            continue

        # find current resource  
        def find_last_unwrapped_element(path_list):
            for element in reversed(path_list):
                if not (element.startswith('{') and element.endswith('}')):
                    return element
            return None

        resource = find_last_unwrapped_element(path.split('/'))

        if not resource:
            continue

        if resource not in resource_to_routes:
            resource_to_routes[resource] = {}

        resource_to_routes[resource][path] = {}

        for m, v in methods.items():
            resource_to_routes[resource][path][m] = v.get('description')

    return resource_to_routes

In [7]:
extraction = extract_resources(spec)
resources = extraction.keys()

In [8]:
# GOAL: create resource dependency tree
# Thesis: to create a resource you need to do POST, dependents are ids and other resource names

def organize_resources(openapi):
    edges = [] # list(parent, child)

    for path, methods in openapi['paths'].items():
        # not a _real_ resource
        if 'post' not in methods.keys():
            continue
        
        path_list = path.split('/')
        
        # edges from path structure
        flag = None
        for p in path_list:
            if p in resources and flag:
                edges.append((flag, p))
            if p in resources:
                flag = p    
        
        # edges from request shape
        def extract_param_names(obj):
            param_names = []
            
            def recursive_extract(current_obj):
                if isinstance(current_obj, dict):
                    if 'properties' in current_obj:
                        param_names.extend(current_obj['properties'].keys())
                    else:
                        for value in current_obj.values():
                            recursive_extract(value)
                elif isinstance(current_obj, list):
                    for item in current_obj:
                        recursive_extract(item)
            
            recursive_extract(obj)
            return list(set(param_names))
        
        def is_in_main_list(string, main_list):
            return string in main_list
        
        def is_singular_in_main_list(string, main_list):
            return (string.endswith('s') and string[:-1] in main_list) or (string.endswith('es') and string[:-2] in main_list)
        
        def filter_strings(main_list, filter_list):
            conditions = [
                is_in_main_list,
                is_singular_in_main_list,
            ]
            
            return [
                string for string in filter_list 
                if any(condition(string, main_list) for condition in conditions)
            ]
        
        params = extract_param_names(methods['post']['requestBody'])
        for r in filter_strings(params, resources):
            edges.append((flag, r))
    
    return edges

In [9]:
edgs = organize_resources(spec)
edgs = list(set(edgs))

In [10]:
from networkx import DiGraph
import networkx as nx


def build_dependency_tree(edges: []) -> DiGraph:
    graph = nx.DiGraph()
    graph.add_edges_from(edges)

    return graph

In [11]:
g = build_dependency_tree(list(set(edgs)))

In [12]:
import networkx as nx
from anytree import Node, RenderTree
from collections import deque

def print_tree_for_node(graph: nx.DiGraph, start_node_name: str):
    if start_node_name not in graph.nodes():
        print(f"Node '{start_node_name}' not found in the graph.")
        return

    # Create a dictionary to store anytree Nodes
    node_dict = {}
    visited = set()  # To keep track of visited nodes

    def create_tree(nx_node):
        queue = deque([(nx_node, None)])
        while queue:
            current_node, parent = queue.popleft()
            
            if current_node in visited:
                continue
            visited.add(current_node)
            
            # Create anytree Node if it doesn't exist
            if current_node not in node_dict:
                node_dict[current_node] = Node(str(current_node), parent=parent)
            elif parent:
                # If node exists but with different parent, create a new node
                node_dict[current_node] = Node(f"{current_node}_dup", parent=parent)
            
            # Add child nodes to the queue
            for child in graph.successors(current_node):
                if child not in visited:
                    queue.append((child, node_dict[current_node]))

    # Create the tree starting from the specified node
    create_tree(start_node_name)

    # Print the tree
    print(f"\nTree rooted at {start_node_name}:")
    print(RenderTree(node_dict[start_node_name]))

# Example usage
edges = [
    ('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E'),
    ('C', 'F'), ('E', 'G'), ('F', 'H'),
    ('I', 'J'), ('J', 'K'),
    ('D', 'B')  # Adding a cycle to test
]

graph = nx.DiGraph(edges)

# Print tree for node 'A'
print_tree_for_node(graph, 'A')

# Print tree for node 'B'
print_tree_for_node(graph, 'B')
# Try to print tree for a non-existent node
print_tree_for_node(graph, 'Z')


Tree rooted at A:
Node('/A')
├── Node('/A/B')
│   ├── Node('/A/B/D')
│   └── Node('/A/B/E')
│       └── Node('/A/B/E/G')
└── Node('/A/C')
    └── Node('/A/C/F')
        └── Node('/A/C/F/H')

Tree rooted at B:
Node('/B')
├── Node('/B/D')
└── Node('/B/E')
    └── Node('/B/E/G')
Node 'Z' not found in the graph.


In [13]:
print_tree_for_node(g, 'tokens')


Tree rooted at tokens:
Node('/tokens')
├── Node('/tokens/account')
├── Node('/tokens/accounts')
│   ├── Node('/tokens/accounts/people')
│   ├── Node('/tokens/accounts/subscribe')
│   │   └── Node('/tokens/accounts/subscribe/features')
│   │       ├── Node('/tokens/accounts/subscribe/features/outbound_transfers')
│   │       │   ├── Node('/tokens/accounts/subscribe/features/outbound_transfers/cancel')
│   │       │   └── Node('/tokens/accounts/subscribe/features/outbound_transfers/post')
│   │       ├── Node('/tokens/accounts/subscribe/features/outbound_payments')
│   │       └── Node('/tokens/accounts/subscribe/features/inbound_transfers')
│   │           └── Node('/tokens/accounts/subscribe/features/inbound_transfers/succeed')
│   ├── Node('/tokens/accounts/reject')
│   ├── Node('/tokens/accounts/settings')
│   ├── Node('/tokens/accounts/unsubscribe')
│   ├── Node('/tokens/accounts/refresh')
│   ├── Node('/tokens/accounts/login_links')
│   ├── Node('/tokens/accounts/external_accounts

In [14]:
def print_graph(graph: nx.DiGraph):
    print("Nodes:")
    for node in graph.nodes():
        print(f"  {node}")
    
    print("\nEdges:")
    for edge in graph.edges():
        print(f"  {edge[0]} -> {edge[1]}")

In [15]:
print_graph(g)

Nodes:
  outbound_payments
  customers
  funding_instructions
  authorizations
  expire
  create_reversal
  line_items
  application_fees
  refund
  quotes
  finalize
  disputes
  transactions
  subscription_items
  tax_rates
  topups
  sources
  payment_intents
  cancel
  outbound_transfers
  return
  payouts
  reverse
  personalization_designs
  activate
  transfers
  reversals
  subscription_schedules
  invoices
  mark_uncollectible
  readers
  locations
  plans
  products
  features
  tokens
  account
  account_sessions
  fail
  value_list_items
  value_lists
  alerts
  deactivate
  cards
  deliver
  bank_accounts
  verify
  physical_bundles
  sessions
  payment_methods
  accounts
  refunds
  inbound_transfers
  update_lines
  secrets
  delete
  test_clocks
  lines
  prices
  cancel_action
  confirm
  confirmation_tokens
  owners
  setup_intents
  charges
  tax_ids
  invoiceitems
  subscriptions
  accept
  financial_accounts
  release
  present_payment_method
  refresh
  add_lines
