In [1]:
import asyncio

from semantic_kernel import Kernel
from semantic_kernel.utils.logging import setup_logging
from semantic_kernel.functions import kernel_function
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.contents import ChatMessageContent, FunctionCallContent, FunctionResultContent

from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
    AzureChatPromptExecutionSettings,
)

from semantic_kernel.connectors.mcp import MCPStreamableHttpPlugin

import logging




In [2]:

async def main():
    # Initialize the kernel
    kernel = Kernel()

    # Add Azure OpenAI chat completion
    chat_completion = OllamaChatCompletion(
        ai_model_id="gpt-oss:20b",
        host="http://ollama.home",
    )
    kernel.add_service(chat_completion)

    # Set up logging to see detailed information
    setup_logging()
    
    # Configure logging levels for different components
    logging.getLogger("semantic_kernel").setLevel(logging.INFO)
    logging.getLogger("semantic_kernel.kernel").setLevel(logging.INFO)
    logging.getLogger("semantic_kernel.connectors").setLevel(logging.INFO)
    
    # Set up a basic console handler if not already configured
    if not logging.getLogger().handlers:
        logging.basicConfig(
            level=logging.DEBUG,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )

    
    ff_server =  MCPStreamableHttpPlugin(
        name="ff_tools",
        url="http://192.168.86.103:8000/mcp",
    )
    await ff_server.connect()  

    kernel.add_plugin(ff_server)
    print("✅ MCP plugin added to kernel")

    # Enable planning
    execution_settings = AzureChatPromptExecutionSettings()
    execution_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()

    # Create a history of the conversation
    history = ChatHistory()

    print("🤖 Assistant ready! You can ask me to use any of the MCP tools listed above.")
    print("💡 Example: 'Can you help me with fantasy football data?'")
    print("Type 'exit' to quit.\n")

    # Initiate a back-and-forth chat
    userInput = None
    while True:
        # Collect user input
        userInput = "what teams are there"

        # Terminate the loop if the user says "exit"
        if userInput == "exit":
            break

        # Add user input to the history
        history.add_user_message(userInput)

        # Try streaming responses if the client supports it; otherwise fall back
        thread = None

                # This callback function will be called for each intermediate message,
        # which will allow one to handle FunctionCallContent and FunctionResultContent.
        # If the callback is not provided, the agent will return the final response
        # with no intermediate tool call steps.
        async def handle_streaming_intermediate_steps(message: ChatMessageContent) -> None:
            for item in message.items or []:
                if isinstance(item, FunctionResultContent):
                    print(f"Function Result:> {item.result} for function: {item.name}")
                elif isinstance(item, FunctionCallContent):
                    print(f"Function Call:> {item.name} with arguments: {item.arguments}")
                else:
                    print(f"{item}")

        # Accumulate content so we can add a single message to history at the end
        full_response = ""

        # Enforce streaming-only mode: require invoke_stream on the client
        if not hasattr(chat_completion, "get_streaming_chat_message_contents"):
            raise RuntimeError(
                "The configured chat_completion client does not support streaming (invoke_stream).\n"
                "This script is running in streaming-only mode. Use a streaming-capable client."
            )

        thread = None
        i = 0
        chunks = []
        try:
            response = chat_completion.get_streaming_chat_message_content(
                messages=userInput,
                thread=thread,
                on_intermediate_message=handle_streaming_intermediate_steps,
                chat_history=history,
                settings=execution_settings,
                kernel=kernel,
            )
                
            async for chunk in response:
                chunks.append(chunk)
                i+=1
                print(chunk, end="")

                if i > 90:
                    return chunks

                    break
                
                print(response)
                # thread = response.thread
                # if first_chunk:
                #     print(f"# {response.name}: ", end="", flush=True)
                #     first_chunk = False
                # print(response.content, end="", flush=True)
            print()
            # Newline after stream finishes
            print()

            if full_response:
                history.add_message(full_response)
        finally:
            # Clean up the thread on the remote service if provided
            if thread:
                try:
                    await thread.delete()
                except Exception:
                    # Best-effort cleanup; ignore errors
                    pass
    await ff_server.close()


test = await main()

✅ MCP plugin added to kernel
🤖 Assistant ready! You can ask me to use any of the MCP tools listed above.
💡 Example: 'Can you help me with fantasy football data?'
Type 'exit' to quit.

<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat

[2025-08-20 09:35:25 - semantic_kernel.connectors.ai.chat_completion_client_base:284 - INFO] processing 1 tool calls in parallel.
[2025-08-20 09:35:25 - semantic_kernel.kernel:412 - INFO] Calling ff_tools-get_teams function with args: {}
[2025-08-20 09:35:25 - semantic_kernel.functions.kernel_function:19 - INFO] Function ff_tools-get_teams invoking.
[2025-08-20 09:35:25 - semantic_kernel.functions.kernel_function:29 - INFO] Function ff_tools-get_teams succeeded.
[2025-08-20 09:35:25 - semantic_kernel.functions.kernel_function:53 - INFO] Function completed. Duration: 0.021738s


<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x000001DCBE2058A0>
<async_generator object ChatCompletio

[2025-08-20 09:35:27 - opentelemetry.context:157 - ERROR] Failed to detach context
Traceback (most recent call last):
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\trace\__init__.py", line 589, in use_span
    yield span
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\semantic_kernel\connectors\ai\chat_completion_client_base.py", line 271, in get_streaming_chat_message_contents
    yield messages
GeneratorExit

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\context\__init__.py", line 155, in detach
    _RUNTIME_CONTEXT.detach(token)
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\context\contextvars_context.py", line 53, in detach
    self._current_context.reset(token)
ValueError: <Token var=<Conte

In [3]:
test

[StreamingChatMessageContent(choice_index=0, inner_content=ChatResponse(model='gpt-oss:20b', created_at='2025-08-20T14:35:26.145893247Z', done=False, done_reason=None, total_duration=None, load_duration=None, prompt_eval_count=None, prompt_eval_duration=None, eval_count=None, eval_duration=None, message=Message(role='assistant', content='', thinking='User', images=None, tool_name=None, tool_calls=None)), ai_model_id='gpt-oss:20b', metadata={'model': 'gpt-oss:20b'}, content_type='message', role=<AuthorRole.ASSISTANT: 'assistant'>, name=None, items=[], encoding=None, finish_reason=None, status=None, function_invoke_attempt=0),
 StreamingChatMessageContent(choice_index=0, inner_content=ChatResponse(model='gpt-oss:20b', created_at='2025-08-20T14:35:26.154899647Z', done=False, done_reason=None, total_duration=None, load_duration=None, prompt_eval_count=None, prompt_eval_duration=None, eval_count=None, eval_duration=None, message=Message(role='assistant', content='', thinking=' asks', images

In [24]:
test[0].inner_content['message']


Message(role='assistant', content='', thinking='User', images=None, tool_name=None, tool_calls=None)

In [None]:
message_num = 0
for message in test:
    print(f"Message {message_num}:")
    if message.inner_content is not None and message.inner_content.get('message') is not None and message.inner_content['message'].thinking is not None:
        print(message.inner_content["message"].thinking)

    elif message.inner_content is not None and message.inner_content.get('message') is not None and message.inner_content['message'].tool_calls is not None:
        print(message.inner_content["message"].tool_calls)

    else:
        print(message)
    message_num += 1


Message 0:
User
Message 1:
 asks
Message 2:
:
Message 3:
 "
Message 4:
what
Message 5:
 teams
Message 6:
 are
Message 7:
 there
Message 8:
".
Message 9:
 Lik
Message 10:
ely
Message 11:
 wants
Message 12:
 list
Message 13:
 of
Message 14:
 teams
Message 15:
 in
Message 16:
 the
Message 17:
 league
Message 18:
.
Message 19:
 Use
Message 20:
 ff
Message 21:
_tools
Message 22:
_get
Message 23:
_
Message 24:
teams
Message 25:
.
Message 26:
[ToolCall(function=Function(name='ff_tools-get_teams', arguments={}))]
Message 27:

Message 28:
We
Message 29:
 need
Message 30:
 to
Message 31:
 answer
Message 32:
:
Message 33:
 "
Message 34:
what
Message 35:
 teams
Message 36:
 are
Message 37:
 there
Message 38:
"
Message 39:
 We
Message 40:
 have
Message 41:
 the
Message 42:
 list
Message 43:
 from
Message 44:
 the
Message 45:
 tool
Message 46:
.
Message 47:
 Should
Message 48:
 format
Message 49:
 nicely
Message 50:
.
Message 51:
Here
Message 52:
 are
Message 53:
 all
Message 54:
 the
Message 55:
 t