In [19]:
import subprocess
import sys
from pyflowchart import *
from graphviz import Digraph
import os

diagrams_dir = '/Users/chrissoria/documents/research/empirical_investigation_llm/plots/diagrams'
os.makedirs(diagrams_dir, exist_ok=True)

In [20]:
from graphviz import Digraph
import os

def create_catllm_flowchart_three_nodes():
    dot = Digraph(comment='Multiple-Classification with CatLLM', format='png')
    dot.attr(rankdir='TB', splines='ortho')
    dot.attr('node', shape='box', style='rounded')

    # Entry and early nodes
    dot.node('start', 'User enters:\n1. Column of text to classify and API Key\n2. (Optional) Categories to classify into\n3. (Optional) Prompt Instructions\n4. (Optional) Example Categorizations',
             shape='ellipse', fillcolor='white', style='filled')
    dot.node('cat_auto_check', 'Categories input = "auto"?',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('auto_gen', 'Auto-generate categories\nfrom corpus',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('package_handler', 'Package automatically handles:\n- Different Category Lengths\n- Identifies Model Provider\n- Constructs Prompt',
             shape='ellipse', fillcolor='white', style='filled')
    
    # Input validation check (side branch)
    dot.node('input_valid', 'Input valid?\n(not NaN or empty string)',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('invalid_input', 'Invalid input detected\n(NaN or empty string)\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')
    
    dot.node('api_call', 'CatLLM calls model provider to classify\ntext into categories\nand return results in JSON format',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API call outcome check
    dot.node('api_check', 'API call outcome?',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API outcome nodes - three paths
    dot.node('api_failure', 'API call fails:\n- Rate limit exceeded\n- Invalid JSON format\n- Transitory server error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')
    dot.node('api_success', 'API call succeeds and\nproduces valid JSON object',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    # Terminal failure node (red)
    dot.node('terminal_failure', 'All retries exhausted:\n- 6+ consecutive failures\n- Persistent errors\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')

    # JSON processing nodes
    dot.node('json_trim', 'Trims all non-JSON text and fixes JSON structure if needed',
             shape='box', fillcolor='white', style='filled,rounded')
    
    dot.node('json_parse_check', 'JSON parsing successful?',
             shape='diamond', fillcolor='white', style='filled')
    
    dot.node('json_success', 'Successfully sorts data\nfrom JSON into columns',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    dot.node('json_error', 'JSON error persists:\nAdds placeholder for row\nindicating error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')

    # Keep API outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('api_failure')
        s.node('api_success')
        s.node('terminal_failure')
    
    # Keep JSON parsing outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('json_success')
        s.node('json_error')

    # Edges: start -> category check -> (auto_gen) -> package_handler
    dot.edge('start', 'cat_auto_check')
    dot.edge('cat_auto_check', 'auto_gen', label='yes')
    dot.edge('auto_gen', 'package_handler')
    dot.edge('cat_auto_check', 'package_handler', label='no')

    # Input validation (side branch from package_handler)
    dot.edge('package_handler', 'input_valid', constraint='false')  # side branch
    dot.edge('input_valid', 'invalid_input', label='no')
    dot.edge('input_valid', 'api_call', label='yes')
    
    dot.edge('api_call', 'api_check')
    
    # API check outcomes - three paths
    dot.edge('api_check', 'api_failure', label='fail (retry)')
    dot.edge('api_check', 'api_success', label='success')
    dot.edge('api_check', 'terminal_failure', label='6+ failures')
    
    # Retry loop: failure -> back to api_call
    dot.edge('api_failure', 'api_call', label='retry')
    
    # Continue flow from success -> JSON processing
    dot.edge('api_success', 'json_trim')
    dot.edge('json_trim', 'json_parse_check')
    
    # JSON parsing fork
    dot.edge('json_parse_check', 'json_success', label='success')
    dot.edge('json_parse_check', 'json_error', label='error')

    # Render
    output_path = os.path.join(diagrams_dir, 'catllm_flowchart_three_nodes')
    dot.render(output_path, view=True, cleanup=True)
    print(f"Three-node flowchart saved as: {output_path}.png")
    return dot

dot = create_catllm_flowchart_three_nodes()




Three-node flowchart saved as: /Users/chrissoria/documents/research/empirical_investigation_llm/plots/diagrams/catllm_flowchart_three_nodes.png


version 2

In [21]:
from graphviz import Digraph
import os

def create_catllm_flowchart_three_nodes():
    dot = Digraph(comment='Multiple-Classification with CatLLM', format='png')
    dot.attr(rankdir='TB', splines='ortho')
    dot.attr('node', shape='box', style='rounded')
    
    # Add title/context note
    dot.attr(label='Process repeats for each row in the input column', labelloc='t', fontsize='14', fontname='bold')

    # Entry and early nodes
    dot.node('start', 'User enters:\n1. Column of text to classify and API Key\n2. (Optional) Categories to classify into\n3. (Optional) Prompt Instructions\n4. (Optional) Example Categorizations',
             shape='ellipse', fillcolor='white', style='filled')
    dot.node('cat_auto_check', 'Categories input = "auto"?',
             shape='diamond', fillcolor='white', style='filled')
    dot.node('auto_gen', 'Auto-generate categories\nfrom corpus',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('package_handler', 'Package automatically handles:\n- Different Category Lengths\n- Identifies Model Provider\n- Constructs Prompt',
             shape='ellipse', fillcolor='white', style='filled')
    
    # Input validation check (side branch)
    dot.node('input_valid', 'Input valid?\n(not NaN or empty string)',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('invalid_input', 'Invalid input detected\n(NaN or empty string)\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')
    
    dot.node('api_call', 'CatLLM calls model provider to classify\ntext into categories\nand return results in JSON format',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API call outcome check
    dot.node('api_check', 'API call outcome?',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API outcome nodes - three paths
    dot.node('api_failure', 'API call fails:\n- Rate limit exceeded\n- Invalid JSON format\n- Transitory server error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')
    dot.node('api_success', 'API call succeeds and\nproduces valid JSON object',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    # Terminal failure node (red)
    dot.node('terminal_failure', 'All retries exhausted:\n- 6+ consecutive failures\n- Persistent errors\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')

    # JSON processing nodes
    dot.node('json_trim', 'Trims all non-JSON text and fixes JSON structure if needed',
             shape='box', fillcolor='white', style='filled,rounded')
    
    dot.node('json_parse_check', 'JSON parsing successful?',
             shape='diamond', fillcolor='white', style='filled')
    
    dot.node('json_success', 'Successfully sorts data\nfrom JSON into columns',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    dot.node('json_error', 'JSON error persists:\nAdds placeholder for row\nindicating error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')
    
    # Loop control
    dot.node('more_rows', 'More rows\nto process?',
             shape='diamond', fillcolor='lightblue', style='filled')
    dot.node('complete', 'Return categorized dataset',
             shape='ellipse', fillcolor='lightgreen', style='filled')

    # Keep API outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('api_failure')
        s.node('api_success')
        s.node('terminal_failure')
    
    # Keep JSON parsing outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('json_success')
        s.node('json_error')

    # Main flow - setup phase (happens once)
    dot.edge('start', 'cat_auto_check')
    dot.edge('cat_auto_check', 'auto_gen', label='yes')
    dot.edge('auto_gen', 'package_handler')
    dot.edge('cat_auto_check', 'package_handler', label='no')

    # Per-row processing
    # Input validation (side branch from package_handler)
    dot.edge('package_handler', 'input_valid', constraint='false')
    dot.edge('input_valid', 'invalid_input', label='no')
    dot.edge('input_valid', 'api_call', label='yes')
    
    dot.edge('api_call', 'api_check')
    
    # API check outcomes - three paths
    dot.edge('api_check', 'api_failure', label='fail (retry)')
    dot.edge('api_check', 'api_success', label='success')
    dot.edge('api_check', 'terminal_failure', label='6+ failures')
    
    # Retry loop: failure -> back to api_call
    dot.edge('api_failure', 'api_call', label='retry')
    
    # Continue flow from success -> JSON processing
    dot.edge('api_success', 'json_trim')
    dot.edge('json_trim', 'json_parse_check')
    
    # JSON parsing fork
    dot.edge('json_parse_check', 'json_success', label='success')
    dot.edge('json_parse_check', 'json_error', label='error')
    
    # All terminal outcomes flow to "more rows?" check
    dot.edge('invalid_input', 'more_rows')
    dot.edge('terminal_failure', 'more_rows')
    dot.edge('json_success', 'more_rows')
    dot.edge('json_error', 'more_rows')
    
    # Loop decision
    dot.edge('more_rows', 'package_handler', label='yes')
    dot.edge('more_rows', 'complete', label='no')

    # Render
    output_path = os.path.join(diagrams_dir, 'catllm_flowchart_three_nodes')
    dot.render(output_path, view=True, cleanup=True)
    print(f"Three-node flowchart saved as: {output_path}.png")
    return dot

dot = create_catllm_flowchart_three_nodes()




Three-node flowchart saved as: /Users/chrissoria/documents/research/empirical_investigation_llm/plots/diagrams/catllm_flowchart_three_nodes.png


version 2 for presentation

In [22]:
from graphviz import Digraph
import os

def create_catllm_flowchart_three_nodes():
    dot = Digraph(comment='Multiple-Classification with CatLLM', format='png')
    dot.attr(rankdir='TB', splines='ortho')
    dot.attr('node', shape='box', style='rounded')
    
    # Increase spacing for wider diagram
    dot.attr(nodesep='1.5')  # Horizontal spacing between nodes (in inches)
    dot.attr(ranksep='0.75')  # Vertical spacing between ranks (in inches)
    
    # Much larger font sizes for PowerPoint
    dot.attr(fontsize='24')  # Graph-level font size
    dot.attr('node', fontsize='22')  # Default node font size
    dot.attr('edge', fontsize='20')  # Edge label font size
    
    # Add title/context note
    dot.attr(label='Process repeats for each row in the input column', labelloc='t', fontsize='28', fontname='bold')

    # Entry and early nodes
    dot.node('start', 'User enters:\n1. Column of text to classify and API Key\n2. (Optional) Categories to classify into\n3. (Optional) Prompt Instructions\n4. (Optional) Example Categorizations',
             shape='ellipse', fillcolor='white', style='filled')
    dot.node('cat_auto_check', 'Categories input = "auto"?',
             shape='diamond', fillcolor='white', style='filled')
    dot.node('auto_gen', 'Auto-generate categories\nfrom corpus',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('package_handler', 'Package automatically handles:\n- Different Category Lengths\n- Identifies Model Provider\n- Constructs Prompt',
             shape='ellipse', fillcolor='white', style='filled')
    
    # Input validation check (side branch)
    dot.node('input_valid', 'Input valid?\n(not NaN or empty string)',
             shape='box', fillcolor='white', style='filled,rounded')
    dot.node('invalid_input', 'Invalid input detected\n(NaN or empty string)\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')
    
    dot.node('api_call', 'CatLLM calls model provider to classify\ntext into categories\nand return results in JSON format',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API call outcome check
    dot.node('api_check', 'API call outcome?',
             shape='box', fillcolor='white', style='filled,rounded')
    
    # API outcome nodes - three paths
    dot.node('api_failure', 'API call fails:\n- Rate limit exceeded\n- Invalid JSON format\n- Transitory server error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')
    dot.node('api_success', 'API call succeeds and\nproduces valid JSON object',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    # Terminal failure node (red)
    dot.node('terminal_failure', 'All retries exhausted:\n- 6+ consecutive failures\n- Persistent errors\nSkips to Next Entry',
             shape='box', fillcolor='lightcoral', style='filled,rounded')

    # JSON processing nodes
    dot.node('json_trim', 'Trims all non-JSON text and fixes JSON structure if needed',
             shape='box', fillcolor='white', style='filled,rounded')
    
    dot.node('json_parse_check', 'JSON parsing successful?',
             shape='diamond', fillcolor='white', style='filled')
    
    dot.node('json_success', 'Successfully sorts data\nfrom JSON into columns',
             shape='box', fillcolor='lightgreen', style='filled,rounded')
    
    dot.node('json_error', 'JSON error persists:\nAdds placeholder for row\nindicating error',
             shape='box', fillcolor='lightyellow', style='filled,rounded')
    
    # Loop control
    dot.node('more_rows', 'More rows\nto process?',
             shape='diamond', fillcolor='lightblue', style='filled')
    dot.node('complete', 'Return categorized column',
             shape='ellipse', fillcolor='lightgreen', style='filled')

    # Keep API outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('api_failure')
        s.node('api_success')
        s.node('terminal_failure')
    
    # Keep JSON parsing outcomes on the same rank
    with dot.subgraph() as s:
        s.attr(rank='same')
        s.node('json_success')
        s.node('json_error')

    # Main flow - setup phase (happens once)
    dot.edge('start', 'cat_auto_check')
    dot.edge('cat_auto_check', 'auto_gen', label='yes')
    dot.edge('auto_gen', 'package_handler')
    dot.edge('cat_auto_check', 'package_handler', label='no')

    # Per-row processing
    # Input validation (side branch from package_handler)
    dot.edge('package_handler', 'input_valid', constraint='false')
    dot.edge('input_valid', 'invalid_input', label='no')
    dot.edge('input_valid', 'api_call', label='yes')
    
    dot.edge('api_call', 'api_check')
    
    # API check outcomes - three paths
    dot.edge('api_check', 'api_failure', label='fail (retry)')
    dot.edge('api_check', 'api_success', label='success')
    dot.edge('api_check', 'terminal_failure', label='6+ failures')
    
    # Retry loop: failure -> back to api_call
    dot.edge('api_failure', 'api_call', label='retry')
    
    # Continue flow from success -> JSON processing
    dot.edge('api_success', 'json_trim')
    dot.edge('json_trim', 'json_parse_check')
    
    # JSON parsing fork
    dot.edge('json_parse_check', 'json_success', label='success')
    dot.edge('json_parse_check', 'json_error', label='error')
    
    # All terminal outcomes flow to "more rows?" check
    dot.edge('invalid_input', 'more_rows')
    dot.edge('terminal_failure', 'more_rows')
    dot.edge('json_success', 'more_rows')
    dot.edge('json_error', 'more_rows')
    
    # Loop decision
    dot.edge('more_rows', 'package_handler', label='yes')
    dot.edge('more_rows', 'complete', label='no')

    # Render
    output_path = os.path.join(diagrams_dir, 'catllm_flowchart_three_nodes_presentation')
    dot.render(output_path, view=True, cleanup=True)
    print(f"Three-node flowchart saved as: {output_path}.png")
    return dot

dot = create_catllm_flowchart_three_nodes()




Three-node flowchart saved as: /Users/chrissoria/documents/research/empirical_investigation_llm/plots/diagrams/catllm_flowchart_three_nodes_presentation.png
