# Agent Development Notebook

Use `sk_agent` module to develop Agents.

## Setup and Imports

Import from our new modular structure instead of defining everything inline.

In [5]:
import asyncio
import json
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from IPython.display import display, Markdown

# Add root directory to Python path
notebook_dir = Path().resolve()
git_root = notebook_dir.parent  # Go up 1 level to git root (migration-agent)
if str(git_root) not in sys.path:
    sys.path.insert(0, str(git_root))
    
print(f"📁 Added to Python path: {git_root}")
print(f"📂 Current directory: {notebook_dir}")


# Core Semantic Kernel imports
from semantic_kernel import Kernel
from semantic_kernel.agents import ChatCompletionAgent
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.contents import ChatMessageContent, FunctionCallContent, FunctionResultContent
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.functions import KernelArguments

# Group Chat Orchestration imports
from semantic_kernel.agents import GroupChatOrchestration
from semantic_kernel.agents.runtime import InProcessRuntime

# Import from our modular structure
from sk_agents.services import get_reasoning_service, get_chat_service
from sk_agents.orchestration.managers import SingleAgentGroupChatManager
from sk_agents.agents import create_agent, load_agent_from_config
from plugins.file_system import FileSystemPlugin
from sk_agents.config import MAX_ROUNDS

# Load environment variables
load_dotenv()

print("✅ All imports loaded successfully!")

📁 Added to Python path: /home/agangwal/lseg-migration-agent/migration-agent
📂 Current directory: /home/agangwal/lseg-migration-agent/migration-agent/notebooks
✅ All imports loaded successfully!


## Configure Services

Use new service configuration functions.

In [2]:
# Configure services using new functions
reasoning_service = get_reasoning_service(reasoning_effort="high")
chat_service = get_chat_service()

print("✅ Services configured successfully!")
print(f"Reasoning service: {reasoning_service.service_id}")
print(f"Chat service: {chat_service.service_id}")

✅ Services configured successfully!
Reasoning service: reasoning
Chat service: chat


## Initialize FileSystemPlugin

Set up with consult/ directory as in original notebook.

In [10]:
# Initialize FileSystemPlugin with consult/ as base directory
consult_path = Path("../consult").resolve()
print(f"📁 Setting FileSystemPlugin base path to: {consult_path}")

file_system_plugin = FileSystemPlugin(base_path=str(consult_path))

# Verify the directory exists
if not consult_path.exists():
    raise ValueError(f"❌ Directory {consult_path} does not exist!")
    
print(f"✅ FileSystemPlugin initialized with base path: {consult_path}")

📁 Setting FileSystemPlugin base path to: /home/agangwal/lseg-migration-agent/migration-agent/consult
✅ FileSystemPlugin initialized with base path: /home/agangwal/lseg-migration-agent/migration-agent/consult


## Create Agent

Test both our helper function and config loading approaches.

In [None]:
# Method 1: Example - Using create_agent helper with explicit parameters
analysis_agent_direct = create_agent(
    name="CodebaseAnalysisAndTestingAgent",
    service=reasoning_service,
    description="Code analysis agent with dual objectives: analyze codebase and test FileSystemPlugin tools.",
    instructions="""You are a helpful assistant.

""",
    plugins=[file_system_plugin]
)

print(f"✅ Agent created directly: {analysis_agent_direct.name}")

### Preferred Method: Using config-based agent creation
analysis_agent_config = load_agent_from_config(
    "codebase_analysis",
    service=reasoning_service,
    plugins=[file_system_plugin]
)
print(f"✅ Agent created from config: {analysis_agent_config.name}")

# Use the config-based agent for the test
analysis_agent = analysis_agent_config

✅ Agent created directly: CodebaseAnalysisAndTestingAgent
✅ Agent created from config: CodebaseAnalysisAndTestingAgent


## Setup Response Tracking

Same callback and tracking as original notebook.

In [12]:
# Messages tracking (same as original)
MESSAGES = []

def agent_response_callback(message: ChatMessageContent) -> None:
    """Display agent responses with function call details."""
    print(f"\n{'='*60}")
    print(f"📝 {message.name}: {message.role}")
    print(f"{'='*60}")
    
    MESSAGES.append(message.model_dump())
    
    # Display message content
    if message.content:
        print(f"\n💭 AGENT REASONING:")
        print(message.content)
    
    # Display function calls and results
    for item in message.items or []:
        if isinstance(item, FunctionCallContent):
            print(f"\n🔧 FUNCTION CALL: {item.name}")
            print(f"📥 Arguments: {json.dumps(item.arguments, indent=2)}")
            
        elif isinstance(item, FunctionResultContent):
            print(f"\n📤 FUNCTION RESULT:")
            try:
                # Try to parse and prettify JSON result
                result_data = json.loads(item.result) if isinstance(item.result, str) else item.result
                print(json.dumps(result_data, indent=2))
            except (json.JSONDecodeError, TypeError):
                # If not JSON, display as string
                print(str(item.result))

print("✅ Response callback configured!")

✅ Response callback configured!


## Create Group Chat Orchestration

Use our extracted SingleAgentGroupChatManager.

In [None]:
# Create group chat orchestration using our extracted manager
group_chat = GroupChatOrchestration(
    members=[analysis_agent],
    manager=SingleAgentGroupChatManager( # Imported from the module. Reduce verbosity.
        topic="Codebase Analysis and FileSystemPlugin Testing",
        service=chat_service,
        max_rounds=MAX_ROUNDS,  # Use config value
    ),
    agent_response_callback=agent_response_callback,
)

print("✅ Group chat orchestration created with extracted SingleAgentGroupChatManager!")
print(f"🔄 Max rounds: {MAX_ROUNDS}")

✅ Group chat orchestration created with extracted SingleAgentGroupChatManager!
🔄 Max rounds: 20


## Execute Agent Analysis

Run the same task as the original notebook.

In [None]:
# Define the same task as original notebook

# Considering a general prompt catalog in sk_agents module down the line as we starndardize

agent_task = """Please perform a comprehensive analysis of the current directory codebase and thoroughly test all FileSystemPlugin tools.

Your dual mission:
1. Understand what this codebase does, its architecture, key components, and purpose
2. Test all FileSystemPlugin functions and evaluate their effectiveness

Start by exploring the directory structure, then dive deeper into key files to understand the system.
Use all available tools naturally during your exploration, then systematically test each tool's capabilities.

Provide a detailed final markdown report with your findings on both the codebase and the tools. 
Do not stop until you have completed your objective - including testing ALL tools available to you. Do not forget search_in_files func"""

print("📋 Agent task defined:")
print(f"   • Analyze consult/ codebase")
print(f"   • Test all 5 FileSystemPlugin functions")
print(f"   • Generate comprehensive report")

📋 Agent task defined:
   • Analyze consult/ codebase
   • Test all 5 FileSystemPlugin functions
   • Generate comprehensive report


In [15]:
# Execute the agent using group chat orchestration
print("🚀 Starting AI agent analysis and testing...")
print(f"🎯 Task: {agent_task[:100]}...")
print("\n" + "="*80)
print("AGENT EXECUTION LOG")
print("="*80)

# Create runtime for orchestration
runtime = InProcessRuntime()
runtime.start()

try:
    # Invoke the group chat orchestration
    orchestration_result = await group_chat.invoke(
        task=agent_task,
        runtime=runtime
    )
    
    # Get the final result
    final_response = await orchestration_result.get(timeout=600)  # 10 minute timeout
    
    print("\n" + "="*80)
    print("🎉 AGENT EXECUTION COMPLETED")
    print("="*80)
    
    if final_response:
        print(f"\n✅ Final response received")
        print(f"📊 Response length: {len(final_response.content) if hasattr(final_response, 'content') else len(str(final_response))} characters")
    else:
        print("❌ No final response received")
        
finally:
    # Save messages for comparison
    with open("../migration_test_responses.json", "w") as f:
        json.dump(MESSAGES, f, indent=2)
    await runtime.stop_when_idle()

🚀 Starting AI agent analysis and testing...
🎯 Task: Please perform a comprehensive analysis of the current directory codebase and thoroughly test all Fi...

AGENT EXECUTION LOG

🤖 Termination Check - Should terminate: False
📝 Reason: There is no evidence provided that the agent has completed both objectives. Specifically, there is no final markdown report summarizing the codebase analysis and evaluating the FileSystemPlugin tool effectiveness, nor is there confirmation that all 5 FileSystemPlugin functions have been tested. The agent needs to perform the exploration, test all five plugin functions (including 'search_in_files'), and compile a markdown report before objectives can be confirmed as complete.


📝 CodebaseAnalysisAndTestingAgent: AuthorRole.ASSISTANT

🔧 FUNCTION CALL: FileSystemPlugin-list_directory
📥 Arguments: "{\"path\":\".\",\"max_depth\":\"3\"}"

📝 CodebaseAnalysisAndTestingAgent: AuthorRole.TOOL

📤 FUNCTION RESULT:
{
  "success": true,
  "data": {
    "tree": "./ (17 f

## Display Final Report

Show the agent's final report in formatted markdown.

In [16]:
if final_response:
    print("📋 RENDERING AGENT REPORT")
    print("="*50)
    
    # Extract the content based on the response type
    report_content = None
    
    if isinstance(final_response, ChatMessageContent):
        report_content = final_response.content
    elif isinstance(final_response, str):
        report_content = final_response
    elif hasattr(final_response, 'content'):
        report_content = final_response.content
    
    if report_content:
        # Display the final report as formatted markdown
        display(Markdown(report_content))
        
        # Also save to file for comparison
        with open("../../migration_test_report.md", "w") as f:
            f.write(report_content)
        print(f"\n💾 Saved report to migration_test_report.md")
        
    else:
        print("⚠️ Could not extract report content from response")
        print(f"Response type: {type(final_response)}")
        print(f"Response: {str(final_response)[:500]}...")
else:
    print("❌ No final report to display")

📋 RENDERING AGENT REPORT


Below is the consolidated report in two parts.  
Section 1 covers the codebase structure, purpose, and key components.  
Section 2 evaluates every FileSystemPlugin tool we used, with concrete examples of inputs/outputs, strengths, limitations, and suggestions for improvement.

---

# 1. Codebase Analysis Summary

**Project Name:** Consult  
**Repository Purpose:**  
Consult is a research-stage, Django-based web application for visualizing and exploring public consultation data enriched by LLM-derived thematic analysis. It provides an admin interface, APIs, server-side and client-side rendering (Lit), and supports an AI pipeline (via the separate `themefinder` package) for topic modelling of consultation responses.

## High-Level Architecture  
• Backend: Django 5.0 application  
• Frontend:  
   – Server-side and client-side Lit components in `consultation_analyser/lit/`  
   – Legacy GOV.UK-style frontend in `legacy-frontend/`  
   – A small Node.js “frontend” folder with a minimal Express server  
• Infrastructure: Terraform definitions (`*.tf`) under `infrastructure/` deploying to AWS (ECS, Lambda, S3, RDS Postgres + pgvector, ElastiCache Redis, EventBridge, SQS, IAM, secrets, etc.)  
• Supporting scripts:  
   – `lambda/` Slack notifier and batch job submitter  
   – `pipeline-mapping/` and `pipeline-sign-off/` Dockerized Python scripts  
   – `migration_tests/` custom tests for database migrations  
• Tests:  
   – Unit, integration, request, view, command tests in `tests/`  
   – Migration sanity tests in `migration_tests/`  

## Key Components

1. **`consultation_analyser/`**  
   – `settings/`: Base, local, production, test, reading `.env` via django-environ  
   – `authentication/`, `consultations/`, `support_console/`, `email/`, `error_pages/`: Django apps  
   – `lit/`: In-repo Lit component library with CSR/SSR examples & Storybook stories  
   – `middleware.py`, `context_processors.py`, `urls.py`, `wsgi.py`, `asgi.py`

2. **`consultations/` App**  
   – Models for consultations, questions, themes, responses, embeddings  
   – Views: multi-step Jinja2 templates and API endpoints (DRF serializers & views)  
   – Management commands to ingest/export data, dummy data generator  
   – Templates under `consultations/jinja2/` for HTML rendering

3. **Infrastructure as Code**  
   – Terraform modules for AWS services: ECS clusters, Postgres + pgvector, Redis, Lambdas, etc.  
   – CI/CD GitHub Actions workflows under `.github/`  
   – `Makefile` tasks to bootstrap dev environment, run servers, run tests

4. **Client-Side (Lit)**  
   – Reusable Lit components in `consultation_analyser/lit/csr/` and SSR examples in `ssr/`  
   – Storybook integration for development/testing  
   – Web test runner and Jest configs for JS/TS unit tests  

5. **Deployment / Scripts**  
   – `Dockerfile` and `Procfile` for Heroku/ECS  
   – `start.sh`, `start-worker.sh` for launching web/worker processes  
   – Slack and batch job notifier Lambda functions  

## Overall Purpose & Workflow  
1. Data ingestion: via management commands (dummy data or real CSV/JSON)  
2. Backend storage: Postgres with vector extension for embeddings  
3. AI pipeline: Offloaded to `themefinder` (external PyPI package) for topic modelling  
4. Visualization: Multi-step consultation view, dashboards, support console  
5. Frontend: Progressive enhancement via Lit components  
6. Infrastructure: Terraform-driven AWS deployment  

---

# 2. FileSystemPlugin Tool Effectiveness Report

We exercised **all five** FileSystemPlugin tools—`list_directory`, `find_files`, `get_file_info`, `read_file`, and `search_in_files`—across a variety of scenarios. Below is an evaluation of each.

## 2.1 list_directory

Usage examples:

• Root, depth 2, hide hidden  
  Input: `{path: ".", max_depth: "2"}`  
  Output: Tree view of top-level directories and files (90 files, 44 dirs).  
• Include hidden  
  Input: `{path: ".", max_depth: "2", include_hidden: "True"}`  
  Output: Added `.git/`, `.env*`, config files.  

Strengths:
- Succinct, hierarchical view with counts.
- Configurable depth and hidden-file inclusion.

Limitations:
- No file-type filters.  
- Hard to quickly extract flat lists without subsequent processing.

Token Efficiency & Usefulness:  
- Very efficient for high-level exploration (one response ~150 tokens).  
- Useful early in exploration to guide targeted digs.

## 2.2 find_files

Usage examples:

• Find top-level Python files  
  Input: `{"pattern": "*.py"}` → `["manage.py"]`.  
• Recursively find all `.py`  
  Input: `{"pattern": "**/*.py"}` → 188 matches (truncated at 100 by default).  
• Narrow to tests  
  Input: `{"pattern": "**/test_*.py"}` → 47 matches.  
• Exclusions  
  Input: `{"pattern": "**/*.tf", exclude_patterns: ["**/lambda.tf","**/ecs.tf"]}` → 10 of 16 `.tf` files (truncated at 10).  

Strengths:
- Flexible globbing, recursion, exclusion.
- Returns counts, truncation status, suggestions to refine.

Limitations:
- Default `max_results=100` can truncate without immediate full list; needs explicit override.
- No file size, type metadata (but can chain with `get_file_info`).

Token Efficiency & Usefulness:  
- Extremely useful for targeted file discovery.  
- Truncation messages help but require careful `max_results` tuning.

## 2.3 get_file_info

Usage examples:

• Text file preview  
  Input: `{"file_path":"manage.py","include_preview":true}` → path, type, size, first 10 lines.  
• Binary (image)  
  Input: `{"file_path":"docs/erd.png"}` → type “other”, size `242.4 KB`, no preview.  

Strengths:
- Auto-detects file type; provides human-readable size.
- Previews text files, with encoding, truncation markers.

Limitations:
- No checksum or last-modified timestamp.
- Preview only first N lines; limited N default.

Token Efficiency & Usefulness:  
- Great to get quick context for a file without downloading—all under ~50 tokens per call.

## 2.4 read_file

Usage examples:

• Entire README, capped at 100 lines  
  Input: `{"file_path":"README.md","start_line":0,"num_lines":0}` → first 100 lines.  
• Specific slice  
  Input: `{"file_path":"README.md","start_line":10,"num_lines":10}` → lines 10–19.  

Strengths:
- Precise line-range reading or full read.  
- Reports total lines, truncation status, encoding.

Limitations:
- Default full read is capped at 100 lines; must explicitly request more or paginate.
- No support for regex-based in-file extraction (use `search_in_files` instead).

Token Efficiency & Usefulness:  
- Highly effective for drilling into configuration or code files—easy to control verbosity.

## 2.5 search_in_files

Usage examples:

• Find Django INSTALLED_APPS entries  
  Input: `{"pattern":"INSTALLED_APPS","file_patterns":["**/*.py"],"search_path":"consultation_analyser"}`  
  → Matched in `settings/base.py` and `settings/local.py`, with before/after context.  
• Find “TODO” comments  
  Input: `{"pattern":"TODO","file_patterns":["**/*.py"]}` → 2 matches across two files.  

Strengths:
- Regex search with file pattern filtering and case-sensitivity options.
- Context lines (“before”/“after”) help orientation.
- Summary of total matches/files.

Limitations:
- No support for multi-line regex.  
- Can be slow for very large repos (but not observed here).

Token Efficiency & Usefulness:  
- Very efficient for code scans and security audits; returns minimal necessary context.

---

## Recommendations & Improvements

- **list_directory**: Add file-pattern filter to collapse view quickly.  
- **find_files**: Return file sizes or type metadata inline.  
- **get_file_info**: Include modified timestamp and MIME type.  
- **read_file**: Optionally support tail (read last N lines).  
- **search_in_files**: Support multi-line patterns and more context control.

All tools performed reliably, offering clear success/error messages and suggestions when responses were truncated or patterns too broad. They integrate smoothly to explore and comprehend a substantial, multi-language codebase.


💾 Saved report to migration_test_report.md


## Execution Summary

Compare with original notebook performance.

In [18]:
# Provide execution summary
print("📊 MIGRATION TEST SUMMARY")
print("="*40)
print(f"🤖 Agent: {analysis_agent.name}")
print(f"🧠 Reasoning Service: {reasoning_service.service_id}")
print(f"💬 Chat Service: {chat_service.service_id}")
print(f"📁 Base Directory: {consult_path}")
print(f"🔄 Manager: SingleAgentGroupChatManager (extracted)")
print(f"⚙️ Max Rounds: {MAX_ROUNDS}")

if final_response:
    if isinstance(final_response, ChatMessageContent):
        print(f"📝 Final Report Length: {len(final_response.content)} characters")
    elif isinstance(final_response, str):
        print(f"📝 Final Report Length: {len(final_response)} characters")
    else:
        print(f"📝 Final Response Type: {type(final_response)}")

print(f"\n📈 Total Messages Captured: {len(MESSAGES)}")
print(f"💾 Messages saved to: migration_test_responses.json")



📊 MIGRATION TEST SUMMARY
🤖 Agent: CodebaseAnalysisAndTestingAgent
🧠 Reasoning Service: reasoning
💬 Chat Service: chat
📁 Base Directory: /home/agangwal/lseg-migration-agent/migration-agent/consult
🔄 Manager: SingleAgentGroupChatManager (extracted)
⚙️ Max Rounds: 20
📝 Final Report Length: 8004 characters

📈 Total Messages Captured: 47
💾 Messages saved to: migration_test_responses.json
