### Test Gtihub MCP Server

In [1]:
from agent_framework.openai import OpenAIChatClient
from agent_framework import MCPStdioTool, ChatAgent, AgentExecutorResponse, Executor
import os
from dotenv import load_dotenv
load_dotenv()

True

#### Connect to local github MCP server in Docker Desktop

In [2]:
chat_client = OpenAIChatClient(base_url=os.getenv("BASE_URL"), api_key=os.getenv("OPENAI_API_KEY"), model_id=os.getenv("MODEL_ID"))
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN_FULL_PERMISIONS")
GITHUB_HOST = ""
toolsets = "repos,issues,pull_requests,actions,code_security,experiments"
print(f"GITHUB_TOKEN={GITHUB_TOKEN}")
github_mcp = MCPStdioTool(
    name="GitHubMCP",
    command="docker",
    args=[
        "run", "-i", "--rm",
        "-e", f"GITHUB_PERSONAL_ACCESS_TOKEN={GITHUB_TOKEN}", f"-e GITHUB_TOOLSETS={toolsets}",
        "ghcr.io/github/github-mcp-server"
    ],
    chat_client=chat_client,
)
await github_mcp.connect()



GITHUB_TOKEN=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


### Write comment to pull request to a repo by name and repo owner name using Github's MCP server

In [3]:
all_tools = [*github_mcp.functions]
instructions = """
You are a helpful assistant that helps me write comments to the 'project-detect-secrets-in-repo' github repository with OWNER 'iuf26'. As a response only return 'SUCCESS' if operation succeeded and otherwise return 'FAILURE' and reason for failure.
"""
agent = ChatAgent(
    chat_client=chat_client,
    instructions=instructions,
    name="ResearchAgent",
    tools=all_tools
 )
result = await agent.run("I would like to add the 'please remove this secret number 6 1 November' comment to the pull request number 1 to the 'leaky_sample.py' file at line number 6. If there is no review started yet please do create it.")
print(result)


SUCCESS


In [5]:
all_tools = [*github_mcp.functions]
instructions = """
You are a helpful assistant. Retrieve all files included in an open pull request from the GitHub repository 'iuf26/workshop-detect-secrets-in-repo'.
Respond only with a list of direct links (URLs) to the files changed or added in the pull request — no explanations or additional text.
"""
github_pr_agent = ChatAgent(
    chat_client=chat_client,
    instructions=instructions,
    name="PullRequestAgent",
    tools=all_tools
 )
result = await github_pr_agent.run("Get me all the files involved in the PR with number 1.")
print(result)

1. [leaky_sample_file.py](https://github.com/iuf26/workshop-detect-secrets-in-repo/blob/feat/iulia-add-secrets/leaky_sample_file.py)
2. [added_text_file.txt](https://github.com/iuf26/workshop-detect-secrets-in-repo/blob/feat/iulia-add-secrets/added_text_file.txt)


In [None]:
for elem in github_mcp.functions:
    print(elem.name)

add_comment_to_pending_review
add_issue_comment
add_sub_issue
assign_copilot_to_issue
create_branch
create_issue
create_or_update_file
create_pull_request
create_repository
delete_file
fork_repository
get_commit
get_file_contents
get_issue
get_issue_comments
get_label
get_latest_release
get_me
get_release_by_tag
get_tag
get_team_members
get_teams
list_branches
list_commits
list_issue_types
list_issues
list_label
list_pull_requests
list_releases
list_sub_issues
list_tags
merge_pull_request
pull_request_read
pull_request_review_write
push_files
remove_sub_issue
reprioritize_sub_issue
request_copilot_review
search_code
search_issues
search_pull_requests
search_repositories
search_users
update_issue
update_pull_request
update_pull_request_branch
AssignCodingAgent
IssueToFixWorkflow


### Paralel processing

In [None]:
import asyncio
import time
from agent_framework import Executor, WorkflowBuilder, handler, WorkflowContext, ConcurrentBuilder, ExecutorCompletedEvent, ExecutorInvokedEvent, WorkflowOutputEvent

class Producer(Executor):
    @handler
    async def start(self, _: str, ctx: WorkflowContext[int]):
        # produce 10 jobs
        for i in range(10):
            print(f"Send {i}")
            await ctx.send_message(i)
        print("Producer done")

class Worker(Executor):

    @handler
    async def handle_job(self, job_id: int, ctx: WorkflowContext[str]):
        print(f"Worker worker started job {job_id}")
        # await asyncio.sleep(2)  # simulate slow work
        # print(f"Worker worker finished job {job_id}")
        await ctx.send_message(f"Result {job_id}")

builder = WorkflowBuilder()
producer = Producer("producer")
worker = Worker("worker")
builder.set_start_executor(producer)
builder.add_edge(producer, worker)
workflow = builder.build()

#workflow = ConcurrentBuilder().participants([worker, producer]).build()

completion = None
async for event in workflow.run_stream("start"):
    match event:
        case ExecutorInvokedEvent() as invoke:
            print(f"Starting {invoke.executor_id}")
        case ExecutorCompletedEvent() as complete:
            print(f"Completed {complete.executor_id}: {complete.data}")
        case WorkflowOutputEvent() as output:
            print(f"Workflow produced output: {output.data}")
            break



Send 0
Send 1
Send 2
Send 3
Send 4
Send 5
Send 6
Send 7
Send 8
Send 9
Producer done
Starting producer
Completed producer: None
Worker worker started job 0
Starting worker
Worker worker started job 1
Completed worker: None
Worker worker started job 2
Starting worker
Worker worker started job 3
Completed worker: None
Worker worker started job 4
Starting worker
Worker worker started job 5
Completed worker: None
Worker worker started job 6
Starting worker
Worker worker started job 7
Completed worker: None
Worker worker started job 8
Starting worker
Worker worker started job 9
Completed worker: None
Starting worker
Completed worker: None
Starting worker
Completed worker: None
Starting worker
Completed worker: None
Starting worker
Completed worker: None
Starting worker
Completed worker: None


### Define Chunk processing research agent

In [6]:
input_test_chunks = [
  {
    "chunk": "import os\nDEBUG=True\nDATABASE_URL=\"postgresql://appuser:Sup3rS3cret!@db:5432/app\"\nJWT_SECRET=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.fake.payload\"\nADMIN_EMAIL=\"maria.popescu@example.ro\"\nADMIN_PHONE=\"+40-721-555-321\"\nSMTP_PASSWORD=\"p@55w0rd!\"\nLOG_LEVEL=\"INFO\"\nprint(\"config loaded\")",
    "original_lines_interval": [120, 128],
    "original_file": "src/app/config.py"
  },
  {
    "chunk": "# .env\nAWS_ACCESS_KEY_ID=AKIAZZZZEXAMPLE1234\nAWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYZZZexample\nS3_BUCKET=media-prod\nSTRIPE_WEBHOOK_SECRET=whsec_1e2b3c4d5e6f7g8h9i0j\nREDIS_URL=redis://:s3cr3tP@redis:6379/0\n# service creds\nSERVICE_EMAIL=ops.team@example.com\nSERVICE_PHONE=+40-745-000-111\nNODE_ENV=production",
    "original_lines_interval": [45, 52],
    "original_file": "services/api/.env"
  },
  {
    "chunk": "// config.js\nmodule.exports = {\n  stripeKey: \"sk_live_51NvEXAMPLEaBcD1234567890\",\n  headers: {\n    \"Authorization\": \"Bearer eyJraWQiOiJrZXkiLCJhbGciOiJIUzI1NiJ9.fake.token\",\n    \"X-Api-Key\": \"b7b2f0c6-1b24-4e1a-9c1a-12e34abcd567\"\n  },\n  supportEmail: \"support@example.com\",\n  logLevel: \"debug\"\n};",
    "original_lines_interval": [200, 209],
    "original_file": "web/src/config.js"
  },
  {
    "chunk": "apiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: app-config\ndata:\n  DB_DSN: \"mysql://admin:hunter2@mysql:3306/app\"\n  MAIL_USER: \"mailer@example.ro\"\n  MAIL_PASS: \"Tr1cky-P@ss!\"\n  OAUTH_CLIENT_SECRET: \"c0a89e5f-7d1f-4d0c-9b77-1a2b3c4d5e6f\"",
    "original_lines_interval": [31, 41],
    "original_file": "deploy/k8s/app-config.yaml"
  },
  {
    "chunk": "{\n  \"type\": \"service_account\",\n  \"project_id\": \"demo-proj-123\",\n  \"private_key_id\": \"f4f3c2e1d0b9a8f7e6d5c4b3a2918171\",\n  \"private_key\": \"-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBK...FAKE...\\n-----END PRIVATE KEY-----\\n\",\n  \"client_email\": \"svc-acc@demo-proj-123.iam.gserviceaccount.com\",\n  \"client_id\": \"109876543210987654321\",\n  \"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\"\n}",
    "original_lines_interval": [402, 410],
    "original_file": "infra/gcp/service-account.json"
  },
  {
    "chunk": "#!/usr/bin/env bash\nexport GITHUB_TOKEN=ghp_FAKE1234567890abcdef1234567890abcd\nexport SLACK_BOT_TOKEN=xoxb-123456789012-1234567890123-ABCdefGHIjklMNOpqr\ncurl -s -H \"Authorization: Bearer $GITHUB_TOKEN\" https://api.github.com/user\ncurl -u \"deployer:Sup3r-Secret!\" https://registry.example.com/v2/_catalog\nEMAIL_NOTIFY=\"andrei.ionescu@example.com\"\necho \"Done\"",
    "original_lines_interval": [88, 96],
    "original_file": "scripts/release.sh"
  },
  {
    "chunk": "-- seed.sql\nINSERT INTO users (full_name, email, phone, password_plain)\nVALUES ('Ioana Radu','ioana.radu@example.ro','+40 722 111 222','summer2025');\nINSERT INTO api_credentials (name, token)\nVALUES ('internal-bot','eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.fake');\n-- DO NOT COMMIT REAL DATA",
    "original_lines_interval": [12, 16],
    "original_file": "db/seed.sql"
  },
  {
    "chunk": "variable \"cloud_api_token\" {\n  type = string\n  default = \"tkn_live_3b2f1c4d5e6f7a8b9c0d\"\n}\nprovider \"example\" {\n  token = var.cloud_api_token\n}\noutput \"endpoint\" { value = example_service.url }",
    "original_lines_interval": [77, 84],
    "original_file": "infra/terraform/vars.tf"
  },
  {
    "chunk": "spring.datasource.url=jdbc:postgresql://db:5432/app?user=app&password=Str0ngP@ss\nspring.mail.username=mailer@example.ro\nspring.mail.password=Qwerty!234\njwt.signing.key=5a6b7c8d9e0f11223344556677889900\nmanagement.endpoints.web.exposure.include=health,info,metrics\nsupport.phone=+40-733-000-222",
    "original_lines_interval": [15, 20],
    "original_file": "service/src/main/resources/application-prod.properties"
  },
  {
    "chunk": "# notebook cell\nOPENAI_API_KEY=\"sk-live-abc123XYZ987fakefakefake\"\nTWILIO_ACCOUNT_SID=\"AC1234567890abcdef1234567890abcd\"\nTWILIO_AUTH_TOKEN=\"9d8c7b6a5e4f3d2c1b0a\"\nuser_name = \"Ciprian Istrate\"\nuser_email = \"ciprian.istrate@example.com\"\nprint(\"init done\")",
    "original_lines_interval": [260, 266],
    "original_file": "notebooks/exp01.ipynb"
  }
]


In [None]:
import hashlib
import uuid

from domain.models import TextChunk

NAMESPACE = uuid.UUID("876de125-4386-442b-9e88-d40dfbbb301d")  # pick once & keep
def stable_uuid(s: str) -> str:
    s = s.strip().lower()  # normalize to avoid accidental mismatches
    return str(uuid.uuid5(NAMESPACE, s))

def shard_for_chunk(chunk: TextChunk, total_agents: int) -> int:
    """
    Pick which worker (0..total_agents-1) should handle this chunk.

    How it works (in plain words):
    - Build a key from the file name and line range (e.g., "app.py|120|180").
    - Hash that key with SHA-256 (gives a big, stable number).
    - Take that number modulo total_agents to get a shard index.

    Inputs:
    - chunk: has `source_file: str` and `line_span: (start:int, end:int)`.
    - total_agents: number of workers; must be >= 1.

    Guarantees:
    - Same chunk → same shard index (deterministic).
    - Result r is an int with 0 <= r < total_agents.

    Example:
    >> shard_for_chunk(TextChunk(source_file="a.py", line_span=(10, 30)), total_agents=3)
    2
    """
    h = hashlib.sha256(f"{chunk.source_file}|{chunk.line_span}".encode()).digest()
    return int.from_bytes(h[:4], "big") % total_agents

In [16]:
from agent_framework import Executor, WorkflowBuilder, handler, WorkflowContext, ConcurrentBuilder, ChatAgent
from agent_framework.openai import OpenAIChatClient
from datetime import datetime
import json
from pydantic import BaseModel
from domain.models import TextChunk, LineComment, SecretsDetectorExecutorResponse, EmptySecretsDetectorExecutorResponseFactory
import os
from dotenv import load_dotenv
load_dotenv()
chat_client = OpenAIChatClient(base_url=os.getenv("BASE_URL"), api_key=os.getenv("OPENAI_API_KEY"), model_id=os.getenv("MODEL_ID"))
GITHUB_TOKEN = os.getenv("GITHUB_PAT")

class SecretsDetectorExec(Executor):
    agent: ChatAgent
    agent_instruction = """
        <instruction role="system">
        You are a code-secrets detector. Given a text CHUNK (with "\n" newlines) and its original line interval [START, END], return only a JSON array of findings. Flag lines that contain likely secrets (API keys/tokens, private keys, passwords, connection strings with creds, service-account JSON fields, auth headers) or PII (names paired with email/phone/IDs). Be precise; if unsure, don't flag. Ignore obvious placeholders.
        </instruction>
        <schema>
        Output exactly:
        [
        { "line_number": <int original line>, "comment": "<types comma-separated>. Please remove." }
        ]
        Return [] if nothing is found. No extra text.
        </schema>
        <procedure>
        1) Split CHUNK by "\n".
        2) For each line i (1-based), assess for secrets/PII using field names and context (e.g., "api_key", "token", "password", "private_key", DSN with user:pass, "Authorization: Bearer ...", service-account fields like private_key_id/private_key).
        3) If flagged, compute original line_number = START + i - 1.
        4) Emit JSON as per <schema>, comments short, no code excerpts.
        </procedure>
        <example>
        INPUT:
        START=4, END=7
        CHUNK:
        print("ok")
        "private_key_id": "f4f3c2e1d0b9a8f7e6d5c4b3a2918171",
        print("done")

        OUTPUT:
        [
        { "line_number": 5, "comment": "Private key identifier. Please remove." }
        ]
        </example>
    """

    def __init__(self, chat_client: OpenAIChatClient,  my_shard: int, total_agents: int, id: str = "secrets detector"):
        agent = chat_client.create_agent(
            instructions=self.agent_instruction,
            name=f"SecretsDetectorAgent_{id}",
        )
        self.id = id
        self.agent = agent
        self.my_shard = my_shard
        self.total_agents = total_agents
        super().__init__(agent=agent, id=id)
    
    def create_prompt_from_chunk(self, chunk: TextChunk):
        prompt = f"""
            Please investigate and detect secrets existent in the chunk taken from the line intervals of the file {chunk.source_file}.
            INPUT
            START={chunk.line_span[0]}, END={chunk.line_span[1]}
            CHUNK:
            {chunk.text}
        """
        return prompt

    
    @handler
    async def run(self, chunk: TextChunk,ctx: WorkflowContext[SecretsDetectorExecutorResponse]) -> None:
        print(shard_for_chunk(chunk, self.total_agents))
        if shard_for_chunk(chunk, self.total_agents) != self.my_shard:
            return
        prompt = self.create_prompt_from_chunk(chunk)
        key = stable_uuid(repr((chunk.source_file, chunk.line_span)))

        async with ctx.shared_state.hold():
            chunk_processed = await ctx.shared_state.get_within_hold(key)
            if chunk_processed:
                await ctx.send_message(EmptySecretsDetectorExecutorResponseFactory.get_empty_secrets_detector())
                return
            await ctx.shared_state.set_within_hold(key, True)
        response = await self.agent.run(prompt)
        identified_problematic_lines = [LineComment(line_number=elem["line_number"], comment=elem["comment"]) for elem in json.loads(response.text)]
        await ctx.set_shared_state(key, True)
        await ctx.send_message(SecretsDetectorExecutorResponse(comments=identified_problematic_lines, original_file=chunk.source_file, executor_agent=self.id))


class ChunksExporterExec(Executor):
    def __init__(self, id):
         self.id = id
         super().__init__(id=id)
    @handler
    async def run(self, _: str,ctx: WorkflowContext[TextChunk]) -> None:
        """Sends input test chunks"""
        for chunk in input_test_chunks:
            start, end = chunk["original_lines_interval"]
            text_chunk = TextChunk(chunk=str(chunk["chunk"]), 
                                             line_span=(int(start), int(end)), 
                                             source_file=str(chunk["original_file"]),
                                             )
            key = stable_uuid(repr((text_chunk.source_file, text_chunk.line_span)))
            await ctx.set_shared_state(key, False)
            await ctx.send_message(text_chunk)

class ChunksAgregatorExec(Executor):
    @handler
    async def run(self, detected_secrets: list[SecretsDetectorExecutorResponse] ,ctx: WorkflowContext[None]) -> None:
        """Sends input test chunks"""
        filtered_nonempty = [secret for secret in detected_secrets if not secret.is_empty()]
        print(datetime.now(), filtered_nonempty)

        

In [17]:
openai_chat_client = chat_client
total_agents=3
secrets_detector_1 = SecretsDetectorExec(openai_chat_client,id="SecretsDetector1", my_shard=0, total_agents=total_agents)
secrets_detector_2 = SecretsDetectorExec(openai_chat_client, id="SecretsDetector2", my_shard=1, total_agents=total_agents)
secrets_detector_3 = SecretsDetectorExec(openai_chat_client, id="SecretsDetector3", my_shard=2, total_agents=total_agents)
exporter = ChunksExporterExec(id="ChunkExporterAgent")
aggregator = ChunksAgregatorExec(id="ChunksAgregatorAgent")
builder = WorkflowBuilder()
builder.set_start_executor(exporter)
builder.add_fan_out_edges(exporter, [secrets_detector_1, secrets_detector_2, secrets_detector_3])
builder.add_fan_in_edges([secrets_detector_1,
                          secrets_detector_2,
                          secrets_detector_3],
                         aggregator)
workflow = builder.build()


In [18]:
await workflow.run("")

2
2
2
2
2
2
2
2
2
2
2
2
0
0
0
1
1
1
2
2
2
1
1
1
2
2
2
0
0
0
2025-11-04 20:41:54.161173 [SecretsDetectorExecutorResponse(comments=[LineComment(line_number=404, comment='Private key identifier. Please remove.'), LineComment(line_number=405, comment='Private key. Please remove.'), LineComment(line_number=406, comment='Client email. Please remove.')], original_file='infra/gcp/service-account.json', executor_agent='SecretsDetector1'), SecretsDetectorExecutorResponse(comments=[LineComment(line_number=89, comment='GitHub token. Please remove.'), LineComment(line_number=90, comment='Slack bot token. Please remove.'), LineComment(line_number=91, comment='Authorization header with GitHub token. Please remove.'), LineComment(line_number=92, comment='Username and password in connection string. Please remove.'), LineComment(line_number=93, comment='Email address. Please remove.')], original_file='scripts/release.sh', executor_agent='SecretsDetector2'), SecretsDetectorExecutorResponse(comments=[Line

[ExecutorInvokedEvent(executor_id=ChunkExporterAgent, data=None),
 ExecutorCompletedEvent(executor_id=ChunkExporterAgent, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector1, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector1, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector2, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector2, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector3, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector3, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector1, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector1, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector2, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector2, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector3, data=None),
 ExecutorCompletedEvent(executor_id=SecretsDetector3, data=None),
 ExecutorInvokedEvent(executor_id=SecretsDetector1, data=None),
 ExecutorCompletedEven