# Agentic AI Workflow System

### Run Agentic AI Workflows on Cloudera AI Inference

This notebook allow you to run an Agentic AI workflow to scan a github repository for OWASP TOP 10 vulnerabilities.

Check out the AI Inference docs for setup instructions, such as how to generate a JWT token and model endpoint.

https://docs.cloudera.com/machine-learning/cloud/ai-inference/topics/ml-caii-use-caii.html

In [None]:
# Install Dependencies
!pip install -r 0_install-dependencies/requirements.txt

In [None]:
# Cloudera AI Inference Client options
jwt = ""
model_id= ""
url= ""
repo_url = "https://github.com/OWASP/Vulnerable-Web-Application"

In [None]:
import agents
import chunks
import os
import yaml

In [None]:
# Example usage of the AgenticWorkflow system
def demo_workflow(client_type,jwt, url,repo_url, model_id, config_path: str = "sec_agents.yaml"):
    """Demonstrate the AgenticWorkflow system.
    
    Args:
        client_type: Type of LLM client to use ("bedrock" or "caii")
    """
    
    # Initialize the workflow system
    workflow = agents.AgenticWorkflow()
    
    # Clone and process the GitHub repository. You can scan any public repository easily

    print(f"Cloning repository: {repo_url}")
    
    try:
        # Initialize Git Repository Processor and process to markdown
        git_processor = workflow.GitRepoProcessor(repo_url)
        codebase_input_path = "data/inputs/codebase_files.md"
        git_processor.process_repository(codebase_input_path)
        
        print(f"Repository processed and saved to: {codebase_input_path}")
        
        # Clean up cloned repository directory
        git_processor.cleanup()
        
    except Exception as e:
        print(f"Error processing repository: {str(e)}")
        print("Workflow cannot continue without repository data.")
        return
    
    # Load configuration and run workflow    
    if os.path.exists(config_path):
        # Parse config
        with open(config_path, 'r') as f:
            parsed_config = yaml.safe_load(f)
        
        # Initialize LLM Client 
        # Depending on the LLM context window size, you can adjust max_token to speed up the chunking into the first agent
        llm_client = workflow.LLMClient(client_type=client_type,jwt=jwt,url=url, model_id=model_id, max_tokens=40000)
        
        # Initialize the DAG constructor
        constructor = workflow.DAGConstructor(jwt=jwt,config_path=config_path,llm_client=llm_client)
        
        # Build and execute the workflow
        dag_executor_function = constructor.build_executor()
        dag_executor_function()
        
    else:
        print(f"Configuration file {config_path} not found.")
        print("Please ensure agents.yaml exists in the current directory.")


In [None]:
# Run Security Workflow on Cloudera AI Inference
demo_workflow(client_type="caii",model_id=model_id,url=url,jwt=jwt,repo_url=repo_url)

In [None]:
# Run Documentation Workflow
demo_workflow(client_type="caii",config_path="docs_agents.yaml",model_id=model_id,url=url,jwt=jwt)


In [None]:
# Run Test Coverage Workflow
demo_workflow(client_type="caii",config_path="test_agents.yaml",model_id=model_id,url=url,jwt=jwt)


### Run Security Workflow on AWS Bedrock

In [None]:
# Must have AWS_PROFILE and AWS_REGION, and be able to call aws sts to authenticate

model_id ="us.anthropic.claude-3-7-sonnet-20250219-v1:0"
jwt=""
demo_workflow(client_type="bedrock",model_id=model_id,url="",jwt=jwt)