In [14]:
import asyncio

from semantic_kernel import Kernel
from semantic_kernel.utils.logging import setup_logging
from semantic_kernel.functions import kernel_function
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
from semantic_kernel.contents.chat_history import ChatHistory
from semantic_kernel.functions.kernel_arguments import KernelArguments
from semantic_kernel.contents import ChatMessageContent, FunctionCallContent, FunctionResultContent

from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
    AzureChatPromptExecutionSettings,
)

from semantic_kernel.connectors.mcp import MCPStreamableHttpPlugin

import logging




In [15]:

async def main():
    # Initialize the kernel
    kernel = Kernel()

    # Add Azure OpenAI chat completion
    chat_completion = OllamaChatCompletion(
        ai_model_id="gpt-oss:20b",
        host="http://ollama.home",
    )
    kernel.add_service(chat_completion)

    # Set up logging to see detailed information
    setup_logging()
    
    # Configure logging levels for different components
    logging.getLogger("semantic_kernel").setLevel(logging.INFO)
    logging.getLogger("semantic_kernel.kernel").setLevel(logging.INFO)
    logging.getLogger("semantic_kernel.connectors").setLevel(logging.INFO)
    
    # Set up a basic console handler if not already configured
    if not logging.getLogger().handlers:
        logging.basicConfig(
            level=logging.DEBUG,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )

    
    ff_server =  MCPStreamableHttpPlugin(
        name="ff_tools",
        url="http://192.168.86.103:8000/mcp",
    )
    await ff_server.connect()  

    kernel.add_plugin(ff_server)
    print("✅ MCP plugin added to kernel")

    # Enable planning
    execution_settings = AzureChatPromptExecutionSettings()
    execution_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()

    # Create a history of the conversation
    history = ChatHistory()

    print("🤖 Assistant ready! You can ask me to use any of the MCP tools listed above.")
    print("💡 Example: 'Can you help me with fantasy football data?'")
    print("Type 'exit' to quit.\n")

    # Initiate a back-and-forth chat
    userInput = None
    while True:
        # Collect user input
        userInput = "what teams are there"

        # Terminate the loop if the user says "exit"
        if userInput == "exit":
            break

        # Add user input to the history
        history.add_user_message(userInput)

                # This callback function will be called for each intermediate message,
        # which will allow one to handle FunctionCallContent and FunctionResultContent.
        # If the callback is not provided, the agent will return the final response
        # with no intermediate tool call steps.
        async def handle_streaming_intermediate_steps(message: ChatMessageContent) -> None:
            for item in message.items or []:
                if isinstance(item, FunctionResultContent):
                    print(f"Function Result:> {item.result} for function: {item.name}")
                elif isinstance(item, FunctionCallContent):
                    print(f"Function Call:> {item.name} with arguments: {item.arguments}")
                else:
                    print(f"{item}")

        # Accumulate content so we can add a single message to history at the end
        full_response = ""

        # Enforce streaming-only mode: require invoke_stream on the client
        if not hasattr(chat_completion, "get_streaming_chat_message_contents"):
            raise RuntimeError(
                "The configured chat_completion client does not support streaming (invoke_stream).\n"
                "This script is running in streaming-only mode. Use a streaming-capable client."
            )

        thread = None
        i = 0
        chunks = []
        try:
            response = chat_completion.get_streaming_chat_message_content(
                messages=userInput,
                thread=thread,
                on_intermediate_message=handle_streaming_intermediate_steps,
                chat_history=history,
                settings=execution_settings,
                kernel=kernel,
            )
                
            async for chunk in response:
                chunks.append(chunk)
                i+=1
                print(chunk, end="")

                if i > 90:
                    return chunks, ff_server

                    break
                
                print(response)
                # thread = response.thread
                # if first_chunk:
                #     print(f"# {response.name}: ", end="", flush=True)
                #     first_chunk = False
                # print(response.content, end="", flush=True)
            print()
            # Newline after stream finishes
            print()

            if full_response:
                history.add_message(full_response)
        finally:
            # Clean up the thread on the remote service if provided
            if thread:
                try:
                    await thread.delete()
                except Exception:
                    # Best-effort cleanup; ignore errors
                    pass
    await ff_server.close()


response, ff_server = await main()

✅ MCP plugin added to kernel
🤖 Assistant ready! You can ask me to use any of the MCP tools listed above.
💡 Example: 'Can you help me with fantasy football data?'
Type 'exit' to quit.

<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat

[2025-08-20 16:02:40 - semantic_kernel.connectors.ai.chat_completion_client_base:284 - INFO] processing 1 tool calls in parallel.


<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>


[2025-08-20 16:02:40 - semantic_kernel.kernel:412 - INFO] Calling ff_tools-get_teams function with args: {}
[2025-08-20 16:02:40 - semantic_kernel.functions.kernel_function:19 - INFO] Function ff_tools-get_teams invoking.
[2025-08-20 16:02:40 - semantic_kernel.functions.kernel_function:29 - INFO] Function ff_tools-get_teams succeeded.
[2025-08-20 16:02:40 - semantic_kernel.functions.kernel_function:53 - INFO] Function completed. Duration: 0.011755s


<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletionClientBase.get_streaming_chat_message_content at 0x0000015BF43DE5C0>
<async_generator object ChatCompletio

[2025-08-20 16:02:42 - opentelemetry.context:157 - ERROR] Failed to detach context
Traceback (most recent call last):
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\trace\__init__.py", line 589, in use_span
    yield span
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\semantic_kernel\connectors\ai\chat_completion_client_base.py", line 271, in get_streaming_chat_message_contents
    yield messages
GeneratorExit

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\context\__init__.py", line 155, in detach
    _RUNTIME_CONTEXT.detach(token)
  File "c:\Users\blaineperry\git\semantic_kernel_mcp_client\.venv\Lib\site-packages\opentelemetry\context\contextvars_context.py", line 53, in detach
    self._current_context.reset(token)
ValueError: <Token var=<Conte

In [16]:
response[0].inner_content['message']


Message(role='assistant', content='', thinking='User', images=None, tool_name=None, tool_calls=None)

In [None]:
chunk_num = 0
for chunk in response:
    print(f"chunk {chunk_num}:")
    if chunk.inner_content is not None and chunk.inner_content.get('chunk') is not None and chunk.inner_content['chunk'].thinking is not None:
        print(chunk.inner_content["chunk"].thinking, end="")

    elif chunk.inner_content is not None and chunk.inner_content.get('chunk') is not None and chunk.inner_content['chunk'].tool_calls is not None:
        print(chunk.inner_content["chunk"].tool_calls)
        tool_calls = chunk.inner_content["chunk"].tool_calls
        break

    else:
        print(chunk, end="")
    chunk_num += 1


chunk 0:
chunk 1:
chunk 2:
chunk 3:
chunk 4:
chunk 5:
chunk 6:
chunk 7:
chunk 8:
chunk 9:
chunk 10:
chunk 11:
chunk 12:
chunk 13:
chunk 14:
chunk 15:
chunk 16:
chunk 17:
chunk 18:
chunk 19:
chunk 20:
chunk 21:
chunk 22:
chunk 23:
chunk 24:
chunk 25:
chunk 26:
chunk 27:
chunk 28:
chunk 29:
chunk 30:
chunk 31:
chunk 32:
chunk 33:
chunk 34:
chunk 35:
chunk 36:
chunk 37:
chunk 38:
chunk 39:
chunk 40:
chunk 41:
chunk 42:
chunk 43:
chunk 44:
chunk 45:
chunk 46:
chunk 47:
chunk 48:
Herechunk 49:
’schunk 50:
 achunk 51:
 quickchunk 52:
 rundownchunk 53:
 ofchunk 54:
 allchunk 55:
 thechunk 56:
 teamschunk 57:
 inchunk 58:
 thechunk 59:
 leaguechunk 60:
 rightchunk 61:
 nowchunk 62:
:

chunk 63:
|chunk 64:
 Teamchunk 65:
 Namechunk 66:
 |chunk 67:
 Shortchunk 68:
 /chunk 69:
 Nickchunk 70:
namechunk 71:
 |chunk 72:
 Teamchunk 73:
 IDchunk 74:
 |
chunk 75:
|chunk 76:
-----------chunk 77:
|chunk 78:
----------------chunk 79:
--chunk 80:
|chunk 81:
---------chunk 82:
|
chunk 83:
|chunk 84:
 Americ

In [18]:
ff_server.list_tools()

AttributeError: 'MCPStreamableHttpPlugin' object has no attribute 'list_tools'