In [1]:
from llama_stack_client import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.lib.agents.react.agent import ReActAgent
from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
import sys
sys.path.append('.')
from src.client_tools import get_location
# for accessing the environment variables
import os
from dotenv import load_dotenv
load_dotenv()

# for communication with Llama Stack
from llama_stack_client import LlamaStackClient

# pretty print of the results returned from the model/agent
import sys
sys.path.append('.')
from src.utils import step_printer
from termcolor import cprint

from rich import print

In [2]:
base_url = os.getenv("REMOTE_BASE_URL")

client = LlamaStackClient(
    base_url=base_url
)

# model_id will later be used to pass the name of the desired inference model to Llama Stack Agents/Inference APIs
model_id = "granite-3.2-2b-instruct" # /mnt/models/DeepSeek-R1-0528-Qwen3-8B-Q4_K_M.gguf" # granite-3.2-2b-instruct

temperature = float(os.getenv("TEMPERATURE", 0.0))
if temperature > 0.0:
    top_p = float(os.getenv("TOP_P", 0.95))
    strategy = {"type": "top_p", "temperature": temperature, "top_p": top_p}
else:
    strategy = {"type": "greedy"}

max_tokens = int(os.getenv("MAX_TOKENS", 4096))

# sampling_params will later be used to pass the parameters to Llama Stack Agents/Inference APIs
sampling_params = {
    "strategy": strategy,
    "max_tokens": max_tokens,
}

stream_env = os.getenv("STREAM", "False")
# the Boolean 'stream' parameter will later be passed to Llama Stack Agents/Inference APIs
# any value non equal to 'False' will be considered as 'True'
stream = (stream_env != "False")

print(f"Inference Parameters:\n\tModel: {model_id}\n\tSampling Parameters: {sampling_params}\n\tstream: {stream}")

In [3]:
# Optional: Enter your MCP server URL here
github_mcp_url = os.getenv("REMOTE_GITHUB_MCP_URL") # Optional: enter your MCP server url here

# Get list of registered tools and extract their toolgroup IDs
registered_tools = client.tools.list()
registered_toolgroups = [tool.toolgroup_id for tool in registered_tools]

if "mcp::github" not in registered_toolgroups: # required
    client.toolgroups.register(
        toolgroup_id="mcp::github",
        provider_id="model-context-protocol",
        mcp_endpoint={"uri":ocp_github_url},
    )

print(registered_tools)

In [4]:
agent = ReActAgent(
            client=client,
            model=model_id,
            tools=["mcp::github"],
            response_format={
                "type": "json_schema",
                "json_schema": ReActOutput.model_json_schema(),
            },
            sampling_params=sampling_params,
        )

user_prompts = ["""Get file content using GitHub MCP owner=eformat, repo=welcome, path=README.md, ref=master
                Use the following principles to generate the code summary from the provided file content
                - Concisely summarize what the code is doing.
                - Include small code snippets to create a comprehensive summary
                Return the generated summary as follows:
                [Start Summary]
                <insert summary here>
                [End Summary]
                """
                ]

session_id = agent.create_session("web-session")
for prompt in user_prompts:
    print("\n"+"="*50)
    cprint(f"Processing user query: {prompt}", "blue")
    print("="*50)
    response = agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        session_id=session_id,
        stream=stream
    )
    if stream:
        for log in EventLogger().log(response):
            log.print()
    else:
        step_printer(response.steps) # print the steps of an agent's response in a formatted way. 

[34mProcessing user query: Get file content using GitHub MCP owner=eformat, repo=welcome, path=README.md, ref=master
                Use the following principles to generate the code summary from the provided file content
                - Concisely summarize what the code is doing.
                - Include small code snippets to create a comprehensive summary
                Return the generated summary as follows:
                [Start Summary]
                <insert summary here>
                [End Summary]
                [0m


[33minference> [0m[33m{[0m[33m
 [0m[33m "[0m[33mthought[0m[33m":[0m[33m "[0m[33mTo[0m[33m get[0m[33m the[0m[33m file[0m[33m content[0m[33m,[0m[33m I[0m[33m'll[0m[33m use[0m[33m the[0m[33m '[0m[33mget[0m[33m_[0m[33mfile[0m[33m_[0m[33mcontents[0m[33m'[0m[33m tool[0m[33m.[0m[33m After[0m[33m obtaining[0m[33m the[0m[33m content[0m[33m,[0m[33m I[0m[33m'll[0m[33m summarize[0m[33m the[0m[33m purpose[0m[33m of[0m[33m the[0m[33m README[0m[33m.[0m[33mmd[0m[33m file[0m[33m,[0m[33m including[0m[33m any[0m[33m relevant[0m[33m code[0m[33m snippets[0m[33m,[0m[33m and[0m[33m present[0m[33m it[0m[33m in[0m[33m the[0m[33m requested[0m[33m format[0m[33m.",[0m[33m
 [0m[33m "[0m[33maction[0m[33m":[0m[33m {[0m[33m
   [0m[33m "[0m[33mtool[0m[33m_[0m[33mname[0m[33m":[0m[33m "[0m[33mget[0m[33m_[0m[33mfile[0m[33m_[0m[33mcontents[0m[33m",[0m[33m
   [0m[33m "[0m