# Streaming Tool use

### Setup

Make sure you have `ipykernel` and `pip` pre-installed

In [2]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import json

from groq import Groq
from dotenv import load_dotenv

load_dotenv()
"Groq API key configured: " + os.environ["GROQ_API_KEY"][:10] + "..."

'Groq API key configured: gsk_7FdrzM...'

We will use the ```llama3-70b-8192``` model in this demo. Note that you will need a Groq API Key to proceed and can create an account [here](https://console.groq.com/) to generate one for free. Only Llama 3 models support parallel tool use at this time (05/07/2024).

We recommend using the 70B Llama 3 model, 8B has subpar consistency.

Please note that in this notebook we show how to deal with chunks that are provided by the client SDK directly. For many frameworks that support streaming tool calling like LangChain, this low-level handling is not required, you can simply enable streaming support.

This tutorial is useful if you want more control and understand lower level details of the implementation.

In [4]:
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
model = "llama3-70b-8192"

Let's define a dummy function we can invoke in our tool use loop

In [5]:
def get_weather(city: str):
    if city == "Madrid":
        return 35
    elif city == "San Francisco":
        return 18
    elif city == "Paris":
        return 20
    else:
        return 15

Now we define our messages and tools and run the completion request.

In [6]:
messages = [
    {"role": "system", "content": """You are a helpful assistant."""},
    {
        "role": "user",
        "content": "What is the weather in Paris, Tokyo and Madrid?",
    },
]
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Returns the weather in the given city in degrees Celsius",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The name of the city",
                    }
                },
                "required": ["city"],
            },
        },
    }
]

response = client.chat.completions.create(
    model=model, messages=messages, tools=tools, tool_choice="auto", max_tokens=4096, stream=True
)

In [7]:
def resp_to_result(response):
    tool_calls = []
    tool_call_buffers = {}
    content_response = ""

    for chunk in response:
        print(chunk)
        for choice in chunk.choices:
            if choice.delta.tool_calls:
                for tool_call in choice.delta.tool_calls:
                    index = tool_call.index
                    if index not in tool_call_buffers:
                        tool_call_buffers[index] = {
                            "id": tool_call.id,
                            "type": tool_call.type,
                            "function": {
                                "arguments": "",
                                "name": ""
                            }
                        }
                    if tool_call.function.arguments is not None:
                        tool_call_buffers[index]["function"]["arguments"] += tool_call.function.arguments
                    if tool_call.function.name is not None:
                        tool_call_buffers[index]["function"]["name"] += tool_call.function.name
            elif choice.delta.content:
                content_response += choice.delta.content

    tool_calls = list(tool_call_buffers.values())
    
    if tool_calls:
        return tool_calls
    else:
        return content_response

In [8]:
tool_calls = resp_to_result(response)

ChatCompletionChunk(id='chatcmpl-14f9b8e2-0b43-4587-b3c5-9274cbd9c658', choices=[Choice(delta=ChoiceDelta(content='', role='assistant', function_call=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1716471247, model='llama3-70b-8192', object='chat.completion.chunk', system_fingerprint=None, x_groq=XGroq(usage=None, id='req_01hyjthc9rfz2t31hma8xwj8kj'))
ChatCompletionChunk(id='chatcmpl-14f9b8e2-0b43-4587-b3c5-9274cbd9c658', choices=[Choice(delta=ChoiceDelta(content=None, role=None, function_call=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_h53x', function=ChoiceDeltaToolCallFunction(arguments='{"city":"Paris"}', name='get_weather'), type='function')]), finish_reason=None, index=0, logprobs=None)], created=1716471247, model='llama3-70b-8192', object='chat.completion.chunk', system_fingerprint='fp_753a4aecf6', x_groq=None)
ChatCompletionChunk(id='chatcmpl-14f9b8e2-0b43-4587-b3c5-9274cbd9c658', choices=[Choice(delta=ChoiceDelta(content=None, role=No

In [10]:
tool_calls

[{'id': 'call_h53x',
  'type': 'function',
  'function': {'arguments': '{"city":"Paris"}', 'name': 'get_weather'}},
 {'id': 'call_2nr3',
  'type': 'function',
  'function': {'arguments': '{"city":"Tokyo"}', 'name': 'get_weather'}},
 {'id': 'call_4py5',
  'type': 'function',
  'function': {'arguments': '{"city":"Madrid"}', 'name': 'get_weather'}}]

# Processing the tool calls

Now we process the assistant message and construct the required messages to continue the conversation. 

*Including* invoking each tool_call against our actual function.

In [13]:
messages.append(
    {
        "role": "assistant",
        "tool_calls": [
            {
                "id": tool_call["id"],
                "function": {
                    "name": tool_call["function"]["name"],
                    "arguments": tool_call["function"]["arguments"],
                },
                "type": tool_call["type"],
            }
            for tool_call in tool_calls
        ],
    }
)

available_functions = {
    "get_weather": get_weather,
}
for tool_call in tool_calls:
    function_name = tool_call["function"]["name"]
    function_to_call = available_functions[function_name]
    function_args = json.loads(tool_call["function"]["arguments"])
    function_response = function_to_call(**function_args)

    # Note how we create a separate tool call message for each tool call
    # the model is able to discern the tool call result through the tool_call_id
    messages.append(
        {
            "role": "tool",
            "content": json.dumps(function_response),
            "tool_call_id": tool_call["id"],
        }
    )

print(json.dumps(messages, indent=2))

[
  {
    "role": "system",
    "content": "You are a helpful assistant."
  },
  {
    "role": "user",
    "content": "What is the weather in Paris, Tokyo and Madrid?"
  },
  {
    "role": "assistant",
    "tool_calls": [
      {
        "id": "call_h53x",
        "function": {
          "name": "get_weather",
          "arguments": "{\"city\":\"Paris\"}"
        },
        "type": "function"
      },
      {
        "id": "call_2nr3",
        "function": {
          "name": "get_weather",
          "arguments": "{\"city\":\"Tokyo\"}"
        },
        "type": "function"
      },
      {
        "id": "call_4py5",
        "function": {
          "name": "get_weather",
          "arguments": "{\"city\":\"Madrid\"}"
        },
        "type": "function"
      }
    ]
  },
  {
    "role": "assistant",
    "tool_calls": [
      {
        "id": "call_h53x",
        "function": {
          "name": "get_weather",
          "arguments": "{\"city\":\"Paris\"}"
        },
        "type": "funct

Now we run our final completion with multiple tool call results included in the messages array.

**Note**

We pass the tool definitions again to help the model understand:

1. The assistant message with the tool call
2. Interpret the tool results.

In [15]:
response = client.chat.completions.create(
    model=model, messages=messages, tools=tools, tool_choice="auto", max_tokens=4096, stream=True
)

text = resp_to_result(response)
text

ChatCompletionChunk(id='chatcmpl-1f4da120-722a-4889-bc3a-c8bc6d2bde39', choices=[Choice(delta=ChoiceDelta(content='', role='assistant', function_call=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1716471352, model='llama3-70b-8192', object='chat.completion.chunk', system_fingerprint=None, x_groq=XGroq(usage=None, id='req_01hyjtmk3qfrkt3wqe8ndvf8n1'))
ChatCompletionChunk(id='chatcmpl-1f4da120-722a-4889-bc3a-c8bc6d2bde39', choices=[Choice(delta=ChoiceDelta(content='The', role=None, function_call=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1716471352, model='llama3-70b-8192', object='chat.completion.chunk', system_fingerprint='fp_c1a4bcec29', x_groq=None)
ChatCompletionChunk(id='chatcmpl-1f4da120-722a-4889-bc3a-c8bc6d2bde39', choices=[Choice(delta=ChoiceDelta(content=' weather', role=None, function_call=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], created=1716471352, model='llama3-70b-8192', object=

'The weather in Paris is 20°C, in Tokyo is 15°C, and in Madrid is 35°C.'