## Enrich KG Agent using MutliAgent WorkFlow

In [None]:
# %pip install llama-index-llms-google-genai llama-index

In [1]:
import os
import aiofiles
import asyncio
from typing import Dict, List, Any, Optional
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from llama_index.core.workflow import Context
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
from llama_index.core.agent.workflow import AgentInput, AgentOutput, ToolCall, ToolCallResult, AgentStream


In [4]:
os.environ["GOOGLE_API_KEY"]="AIzaSyBuGAPWnqtxGoCBnSgF_jm8X74-0CSavsk"
llm = GoogleGenAI(model="gemini-2.0-flash")

## Agent 

In [2]:


# ----- TOOL FUNCTIONS -----

import os
import aiofiles

async def extract_file_content(file_relative_path: str, repo_base_path: str) -> str:
    """Extracts and returns the content of a file given its relative path from the repository base asynchronously."""
    absolute_path = os.path.join(repo_base_path, file_relative_path)
    try:
        async with aiofiles.open(absolute_path, mode='r', encoding='utf-8') as f:
            content = await f.read()
        return content
    except UnicodeDecodeError:
        try:
            async with aiofiles.open(absolute_path, mode='r', encoding='latin-1') as f:
                content = await f.read()
            return content
        except Exception as e:
            return f"Error reading file (tried multiple encodings): {str(e)}"
    except FileNotFoundError:
        return f"File not found: {absolute_path}"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def get_project_tree_string(root_path: str, prefix: str = "") -> str:
    """
    Recursively generates a tree-like string for the given directory.
    Example output:
        ├── folder1
        │   ├── file1.py
        │   └── file2.py
        └── folder2
            └── file3.py
    """
    lines = []
    try:
        entries = os.listdir(root_path)
    except Exception as e:
        return f"Error reading directory {root_path}: {e}"
    
    entries.sort()
    entries_count = len(entries)
    for index, entry in enumerate(entries):
        full_path = os.path.join(root_path, entry)
        is_last = (index == entries_count - 1)
        connector = "└── " if is_last else "├── "
        lines.append(prefix + connector + entry)
        if os.path.isdir(full_path):
            extension_prefix = prefix + ("    " if is_last else "│   ")
            subtree = get_project_tree_string(full_path, extension_prefix)
            if subtree:
                lines.append(subtree)
    return "\n".join(lines)

async def get_combined_file_content_with_tree(file_relative_path: str, repo_base_path: str) -> str:
    """Combines the project tree (as textual context) with the content of a file."""
    file_content = await extract_file_content(file_relative_path, repo_base_path)    
    project_tree = get_project_tree_string(repo_base_path)
    combined_content = (
        "Project Tree:\n"
        "-------------\n"
        f"{project_tree}\n\n"
        "File Content:\n"
        "-------------\n"
        f"{file_content}"
    )
    return combined_content


async def generate_file_description(ctx: Context, description:str) ->str :
    """Usefull to generate detailed description of a file based on its code"""
    current_state = await ctx.get("state")
    current_state["file_description"] = description
    await ctx.set("state", current_state)
    return "Description recorded."

async def generate_code_summary(ctx: Context, summary: str, need_analysis: bool) -> str:
    """Generates a detailed summary of a file's code and determines if further analysis is necessary based on the file's content."""
    current_state = await ctx.get("state")
    if "code_summary" not in current_state:
        current_state["code_summary"] = {}
    current_state["code_summary"]["code_summary"] = summary
    current_state["code_summary"]["need_analysis"] =  need_analysis
    await ctx.set("state", current_state)    
    return "Summary recorded."


async def analyze_complexity(ctx: Context, complexity_analysis: str) -> str:
    """Usefull to generate detailed analyze complexity of a file based on its code"""
    current_state = await ctx.get("state")
    current_state["complexity_analysis"] = complexity_analysis
    await ctx.set("state", current_state)
    return "complexity analysis detailed."

async def analyze_dependency(
    ctx: Context,
    source: str,
    target: str,
    full_path:str,
    type: str,
    external: bool,
    description: str
) -> str:
    """Analyzes code dependencies and records detailed dependency information as a structured list"""
    current_state = await ctx.get("state")
    if "dependency_analysis" not in current_state:
        current_state["dependency_analysis"] = []
    
    dependency_item = {
        "source": source,
        "target": target,
        "full_path":full_path,
        "type": type,
        "external": external,
        "description":description
    }
    
    current_state["dependency_analysis"].append(dependency_item)
    await ctx.set("state", current_state)
    return "Dependency analysis completed with structured data."

async def extract_class_block(
        ctx: Context,
        docstring: str,
        class_name:str,
        code: str,
        description: str
    ):
    """Usefull to Extracts and registers a class block from the provided code."""
    current_state = await ctx.get("state")
    if "code_analysis" not in current_state:
        current_state["code_analysis"] = {"classes": []}
    class_block = {
        "description": description,
        "docstring": docstring,
        "class":class_name,
        "code": code
    }
    current_state["code_analysis"]["classes"].append(class_block)
    await ctx.set("state", current_state)
    return "Class block extraction completed"


async def extract_method_block(
    ctx: Context,
    docstring: str,
    method_name: str,
    code: str,
    description: str
):
    """Usefull to Extracts and registers a method block from the provided code."""
    current_state = await ctx.get("state")
    if "code_analysis" not in current_state:
        current_state["code_analysis"] = {"methods": []}
    method_block = {
        "description": description,
        "docstring": docstring,
        "method": method_name,
        "code": code
    }
    current_state["code_analysis"]["methods"].append(method_block)
    await ctx.set("state", current_state)
    return "Method block extraction completed"


async def extract_script_block(
    ctx: Context,
    code: str,
    description: str,
    script_name:str,
):
    """Usefull to Extracts and registers a script block from the provided code."""
    current_state = await ctx.get("state")
    if "code_analysis" not in current_state:
        current_state["code_analysis"] = {"scripts": []}
    script_block = {
        "script_name":script_name,
        "description": description,
        "code": code,
    }
    current_state["code_analysis"]["scripts"].append(script_block)
    await ctx.set("state", current_state)
    return "Script block extraction completed"


In [5]:


description_agent = FunctionAgent(
    name="DescriptionAgent",
    description="Generates a detailed description of a file based on its code.",
    system_prompt=(
        "Please provide a clear, professional, and comprehensive description of the file's purpose and functionality based solely on the provided code."
        "After generated the Description you should hand off control to  SummaryAgent to genearte code summary."
        ),    
    llm=llm,  # Replace with your LLM instance as needed.
    tools=[generate_file_description],
    can_handoff_to=["SummaryAgent"],

)

summary_agent = FunctionAgent(
    name="SummaryAgent",
    description="Generates a summary of the code including its functionality.",
   system_prompt = """
    Provide a high-level overview of what the code does, outlining its key functionality, main components, 
    and purpose within the larger system. The summary should capture the essence of the code without delving into 
    implementation details.

    After generating the summary, return a boolean indicating whether this file requires further analysis. 
    - If Return `True` You should hand of control to  ComplexityAgent for detailed analysis. 
    - If Return `False` should be skipped and no further analysis is needed (e.g., for Dockerfiles, README.md, .sh files).
    """,
    llm=llm,
    tools=[generate_code_summary],
    can_handoff_to=["ComplexityAgent"],

)
# Complexity agent with proper handoff instructions
complexity_agent = FunctionAgent(
    name="ComplexityAgent",
    description="Analyzes the complexity of the code.",
    system_prompt=(
        "Analyze the code's complexity and provide a comprehensive assessment.\n\n"
        "After completing the complexity analysis, hand off control to the ParserCodeAgent "
        "to perform a  code  parsering and analysis"
    ),
    llm=llm,
    tools=[analyze_complexity],
    can_handoff_to=["ParserCodeAgent"],
)

dependency_agent = FunctionAgent(
    name="DependencyAgent",
    description="Analyzes code dependencies and extracts internal and external dependencies into a simplified structure.",
    system_prompt="""
    Your task is to analyze the provided code and extract dependencies into a simple format that is easy to load into a Neo4j database.

    ANALYSIS PROCEDURE:
    1. Identify all imports (standard libraries, third-party packages, local modules).
    2. Detect function or class dependencies within the file.
    3. Find references to other project files and external libraries.
    4. For local dependencies, resolve import paths using the provided project tree, converting dotted paths into full relative paths.
    5. For each dependency, create a structured output that includes:
        - 'source': The file where the dependency originates.
        - 'target': The file/module being referenced.
        - 'type': The type of dependency (e.g., 'import', 'usage', etc.).
        - 'path': The full relative path to the dependency (e.g., `app/db/wait_for_db.py`).
        - 'external': Whether the dependency is external (e.g., third-party packages like `requests`) or internal (use `true/false`).
        - 'description': A brief description of how the dependency is used.

    OUTPUT FORMAT:
    A list of dictionaries. Each dictionary should represent one dependency relationship and include the following keys:
        - 'source': Name of the current module/file (string).
        - 'target': The dependency module/file being referenced (string).
        - 'type': The dependency type ('import', 'usage', etc.) (string).
        - 'path': Full relative path to the target (string).
        - 'external': `true` if the dependency is external, otherwise `false` (boolean).
        - 'description': A short description of how the dependency is used (string).
    """,
    llm=llm,
    tools=[analyze_dependency],
)

# Define the parser agent responsible for analyzing code structure.
parser_code_agent = FunctionAgent(
    name="ParserCodeAgent",
    description=(
        "This agent is responsible for parsing code  and generate documentation to extract class definitions, "
        "methods, and script blocks. For each class, the agent aggregates its methods "
        "and associated script blocks, while generating descriptive documentation "
    ),
    system_prompt=("""
    You are a code analysis agent. Your task is to analyze the given code and accurately identify and register and generate the description:

    1. Class Blocks: Extract complete class definitions, including their docstrings and code. For each class found, use the extract_class_block function. 
    2. Method Blocks: Within classes, identify methods along with their names, docstrings, and code. For each method, use the extract_method_block function.
    3. Script Blocks: Identify any standalone code blocks (i.e., not part of a class or method) and extract them as script blocks using the extract_script_block function.

    For each extracted entity, generate clear, concise documentation and ensure that methods are nested or associated with their corresponding class where applicable. Make sure to always use the correct extraction function based on whether the block of code is a class, method, or a standalone script    
    After completing the complexity analysis, hand off control to the DependencyAgent 
    to perform a detailed code dependency analysis. The DependencyAgent will extract
    dependencies in a structured format suitable for Neo4j integration.
                   
    """
    ),
    llm=llm,
    tools=[extract_class_block, extract_method_block, extract_script_block],
    can_handoff_to=["DependencyAgent"],
)


In [6]:

# Initialize the workflow with an initial state.
agent_workflow = AgentWorkflow(
    agents=[
        description_agent,
        summary_agent,
        complexity_agent,
        dependency_agent,
        parser_code_agent
    ],
    root_agent=description_agent.name,
    initial_state={
        "file_description": "",
        "code_summary": {},
        "complexity_analysis": "",
        "dependency_analysis": [],
        "code_analysis": {
            "scripts": [],
            "classes": [],
            "methods": []
        }
    },
)

In [None]:

repo_base = "./repos/sec-insights"
file_rel = "backend/app/main.py"  # for example
# Extract file content asynchronously.
file_content = await get_combined_file_content_with_tree(file_rel, repo_base)

result = await description_agent.run(file_content)

result.response.content

'This file serves as the entry point for the backend application, configuring and launching the FastAPI application. It manages the application lifecycle, including database initialization and migrations, sets up middleware for CORS, configures logging and error tracking with Sentry, and includes routers for different API endpoints. The application is then run using Uvicorn.\n\nI will now hand off control to the SummaryAgent to generate a code summary.\n'

In [20]:


# ----- Main Asynchronous Function -----

repo_base = "./repos/sec-insights"
file_rel = "backend/app/main.py"  # for example
# Extract file content asynchronously.
file_content = await get_combined_file_content_with_tree(file_rel, repo_base)

# Start the agent workflow with the file content as input.
handler = agent_workflow.run(user_msg=file_content)

# Stream workflow events to monitor progress.
current_agent = None
async for event in handler.stream_events():
    if hasattr(event, "current_agent_name") and event.current_agent_name != current_agent:
        current_agent = event.current_agent_name
        print(f"\n{'='*50}")
        print(f"🤖 Agent: {current_agent}")
        print(f"{'='*50}\n")
    if isinstance(event, AgentStream):
        if event.delta:
            print(event.delta, end="", flush=True)
    # elif isinstance(event, AgentInput):
    #     print("📥 Input:", event.input)        
    if isinstance(event, AgentOutput):
        if event.response.content:
            print("📤 Output:", event.response.content)
        if event.tool_calls:
            print("🛠️  Planning to use tools:", [call.tool_name for call in event.tool_calls])
    elif isinstance(event, ToolCallResult):
        print(f"🔧 Tool Result ({event.tool_name}):")
        print(f"  Arguments: {event.tool_kwargs}")
        print(f"  Output: {event.tool_output}")
    elif isinstance(event, ToolCall):
        print(f"🔨 Calling Tool: {event.tool_name}")
        print(f"  With arguments: {event.tool_kwargs}")



🤖 Agent: DescriptionAgent

This file serves as the entry point for the FastAPI application. It handles the configuration and initialization of various components, including logging, error tracking with Sentry, CORS settings, database connection and migration checks using Alembic, and the lifespan of the application. The lifespan function ensures the database is up-to-date and initializes a vector store. It also sets up API routes and starts the Uvicorn server to run the application.

📤 Output: This file serves as the entry point for the FastAPI application. It handles the configuration and initialization of various components, including logging, error tracking with Sentry, CORS settings, database connection and migration checks using Alembic, and the lifespan of the application. The lifespan function ensures the database is up-to-date and initializes a vector store. It also sets up API routes and starts the Uvicorn server to run the application.


🛠️  Planning to use tools: ['handoff'