In [1]:
import os 
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.program import FunctionCallingProgram

from typing import List, Literal, Optional
from pydantic import BaseModel, Field

In [2]:
os.environ["GOOGLE_API_KEY"]="AIzaSyBuGAPWnqtxGoCBnSgF_jm8X74-0CSavsk"
llm = GoogleGenAI(model="gemini-2.0-flash")

In [3]:


# ----- TOOL FUNCTIONS -----

import os
import aiofiles

async def extract_file_content(file_relative_path: str, repo_base_path: str) -> str:
    """Extracts and returns the content of a file given its relative path from the repository base asynchronously."""
    absolute_path = os.path.join(repo_base_path, file_relative_path)
    try:
        async with aiofiles.open(absolute_path, mode='r', encoding='utf-8') as f:
            content = await f.read()
        return content
    except UnicodeDecodeError:
        try:
            async with aiofiles.open(absolute_path, mode='r', encoding='latin-1') as f:
                content = await f.read()
            return content
        except Exception as e:
            return f"Error reading file (tried multiple encodings): {str(e)}"
    except FileNotFoundError:
        return f"File not found: {absolute_path}"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def get_project_tree_string(root_path: str, prefix: str = "") -> str:
    """
    Recursively generates a tree-like string for the given directory.
    Example output:
        ├── folder1
        │   ├── file1.py
        │   └── file2.py
        └── folder2
            └── file3.py
    """
    lines = []
    try:
        entries = os.listdir(root_path)
    except Exception as e:
        return f"Error reading directory {root_path}: {e}"
    
    entries.sort()
    entries_count = len(entries)
    for index, entry in enumerate(entries):
        full_path = os.path.join(root_path, entry)
        is_last = (index == entries_count - 1)
        connector = "└── " if is_last else "├── "
        lines.append(prefix + connector + entry)
        if os.path.isdir(full_path):
            extension_prefix = prefix + ("    " if is_last else "│   ")
            subtree = get_project_tree_string(full_path, extension_prefix)
            if subtree:
                lines.append(subtree)
    return "\n".join(lines)

async def get_combined_file_content_with_tree(file_relative_path: str, repo_base_path: str) -> str:
    """Combines the project tree (as textual context) with the content of a file."""
    file_content = await extract_file_content(file_relative_path, repo_base_path)    
    project_tree = get_project_tree_string(repo_base_path)
    combined_content = (
        "Project Tree:\n"
        "-------------\n"
        f"{project_tree}\n\n"
        "File Content:\n"
        "-------------\n"
        f"{file_content}"
    )
    return combined_content

In [4]:

class Dependency(BaseModel):
    """Represents a single dependency relationship between modules."""
    source: str = Field(description="Name of the current module/file")
    target: str = Field(description="The dependency being referenced")
    full_path: str = Field(description="Full relative path based on project structure")
    type: Literal["import", "inheritance", "usage", "composition"] = Field(
        description="Type of dependency relationship"
    )
    relationship: str = Field(description="Detailed description of how the dependency is used")
    is_external: bool = Field(description="Whether the dependency is external to the project")
    is_standard_lib: bool = Field(description="Whether the dependency is from Python's standard library")


class Dependencies(BaseModel):
    """Container for a list of dependencies."""
    dependencies: List[Dependency] = Field(description="List of dependency relationships")


DEPENDENCY_ANALYSIS_PROMPT = """
Your task is to analyze the provided code and extract all dependency relationships into a structured format. 
In addition to the code, you are provided with the project's folder tree. Use this tree to convert dotted 
import paths into full relative paths.

ANALYSIS PROCEDURE:
1. Identify all imports (standard libraries, third-party packages, local modules).
2. Detect function/class dependencies within the current file.
3. Find references to other project files and their relationships.
4. For local dependencies, use the provided project tree to convert the dotted import path (e.g. "app.db.wait_for_db") 
   into a full relative path that reflects the underlying file system structure (e.g. "app/db/wait_for_db"). 
   The full relative path should be constructed based on the project's folder hierarchy, ensuring that the 
   unique file and folder nodes can be identified correctly.

OUTPUT FORMAT:
You MUST produce a list of dictionaries where each dictionary represents one dependency relationship. 
Each dictionary MUST contain the following keys:
    - 'source': Name of the current module/file.
    - 'target': The dependency being referenced.
    - 'full_path': The full relative path (constructed using the provided tree) to uniquely identify the dependency.
    - 'type': One of the following: import | inheritance | usage | composition.
    - 'relationship': A detailed description of how the dependency is used.
    - 'is_external': true/false (use lowercase boolean values).
    - 'is_standard_lib': true/false.

Ensure that your output is valid JSON and that all required fields are present.

Heres the code content along with tree project stucture 
{file_content}
"""



In [7]:
sllm=llm.as_structured_llm(Dependencies)

In [5]:

program = FunctionCallingProgram.from_defaults(
    output_cls=Dependencies,
    prompt_template_str=DEPENDENCY_ANALYSIS_PROMPT,
    verbose=True,
    llm=llm,
    tool_choice={"type": "auto"}
    
)

In [8]:
repo_base = "./repos/sec-insights"
file_rel = "backend/app/main.py"  # for example
# Extract file content asynchronously.
file_content = await get_combined_file_content_with_tree(file_rel, repo_base)
output = sllm.complete(file_content)
output.text

'{"dependencies":[{"source":"app/main.py","target":"typing","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":true},{"source":"app/main.py","target":"uvicorn","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":false},{"source":"app/main.py","target":"logging","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":true},{"source":"app/main.py","target":"sys","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":true},{"source":"app/main.py","target":"sentry_sdk","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":false},{"source":"app/main.py","target":"FastAPI","full_path":"app/main.py","type":"import","relationship":"import","is_external":true,"is_standard_lib":false},{"source":"app/main.py","target":"CORSMiddleware","full_path":"app/ma

In [11]:
import json 
data = json.loads(output.text)
data

{'dependencies': [{'source': 'app/main.py',
   'target': 'typing',
   'full_path': 'app/main.py',
   'type': 'import',
   'relationship': 'import',
   'is_external': True,
   'is_standard_lib': True},
  {'source': 'app/main.py',
   'target': 'uvicorn',
   'full_path': 'app/main.py',
   'type': 'import',
   'relationship': 'import',
   'is_external': True,
   'is_standard_lib': False},
  {'source': 'app/main.py',
   'target': 'logging',
   'full_path': 'app/main.py',
   'type': 'import',
   'relationship': 'import',
   'is_external': True,
   'is_standard_lib': True},
  {'source': 'app/main.py',
   'target': 'sys',
   'full_path': 'app/main.py',
   'type': 'import',
   'relationship': 'import',
   'is_external': True,
   'is_standard_lib': True},
  {'source': 'app/main.py',
   'target': 'sentry_sdk',
   'full_path': 'app/main.py',
   'type': 'import',
   'relationship': 'import',
   'is_external': True,
   'is_standard_lib': False},
  {'source': 'app/main.py',
   'target': 'FastAPI',
  