In [None]:
!pip install datasets
!pip install openai

In [1]:
from datasets import load_dataset

dataset = load_dataset("burtenshaw/function_calling_dataset", split="train")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
example = dataset[1]

# Functionary in VLLM server

In [3]:
import json
from openai import OpenAI

client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")


function = json.loads(example["function"].replace("```json","").replace("```",""))
function["type"] = "function"


output = client.chat.completions.create(
    model="meetkai/functionary-small-v2.2",
    messages = [
    {"role": "user", "content": example["instruction"]}
    ],
    tools = [function],
    tool_choice="auto"
)

In [4]:
output.choices[0].message.tool_calls[0].function

Function(arguments='{"query": "sedentary lifestyle effects", "indexName": "HealthToday"}', name='performSearch')

# Distilabel

In [5]:
from distilabel.llm import OpenAILLM
from distilabel.tasks import TextGenerationTask
from openai import OpenAI
import json


client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")

2024-03-01 09:26:08,280	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [21]:
# function = json.loads(example["function"].replace("```json","").replace("```",""))
# function["type"] = "function"


# output = client.chat.completions.create(
#     model="meetkai/functionary-small-v2.2",
#     messages = [
#     {"role": "user", "content": example["instruction"]}
#     ],
#     tools = [function],
#     tool_choice="auto"
# )

class FunctionResponseGeneratorTask(TextGenerationTask):

    @property
    def input_args_names(self):
        return ["instruction", "function"]

    def parse_output(self, output: str):
        return {"function_call" : output[0].model_dump()}

    def generate_prompt(self, instruction: str, function: str, **_):
        return instruction, function

In [22]:
from distilabel.llm.utils import LLMOutput

class FunctionUseOpenAILLM(OpenAILLM):

    def _make_messages(self, inputs):
        return [
            {"role": "user", "content": inputs["instruction"]}
        ]

    def _load_function(self, inputs):
        return json.loads(inputs["function"].replace("```json","").replace("```",""))

    def _generate(
        self,
        inputs,
        num_generations,
    ):
        outputs = []
        for input in inputs:
            function = self._load_function(input)
            messages = self._make_messages(input)
            chat_completions = self.client.chat.completions.create(
                messages=messages,
                model=self.model,
                n=num_generations,
                max_tokens=self.max_tokens,
                frequency_penalty=self.frequency_penalty,
                presence_penalty=self.presence_penalty,
                temperature=self.temperature,
                top_p=self.top_p,
                timeout=50,
                tools = [function],
                tool_choice="auto"
            )

            output = []
            for chat_completion in chat_completions.choices:
                try:
                    parsed_response = self.task.parse_output(
                        chat_completion.message.tool_calls
                    )
                except Exception as e:
                    # logger.error(f"Error parsing OpenAI response: {e}")
                    parsed_response = None
                output.append(
                    LLMOutput(
                        model_name=self.model_name,
                        prompt_used=messages,
                        raw_output=chat_completion.message.content,
                        parsed_output=parsed_response,
                    )
                )
            outputs.append(output)
        return outputs

llm = FunctionUseOpenAILLM(client=client, model="meetkai/functionary-small-v2.2", task=FunctionResponseGeneratorTask())

In [23]:
llm.generate(dataset.to_list()[:3])

[[{'model_name': 'meetkai/functionary-small-v2.2',
   'prompt_used': [{'role': 'user',
     'content': "Show me documents on 'nanotechnology advancements' from the 'ScienceJournal' index sorted by most recent first."}],
   'raw_output': None,
   'parsed_output': {'function_call': {'id': 'call_kSRn9XdYRk2kvHqhM6CQlaER',
     'function': {'arguments': '{"query": "nanotechnology advancements", "indexName": "ScienceJournal", "sortOrder": "desc"}',
      'name': 'performSearch'},
     'type': 'function',
     'index': None}}}],
 [{'model_name': 'meetkai/functionary-small-v2.2',
   'prompt_used': [{'role': 'user',
     'content': "Are there any recent publications from 'HealthToday' index about the effects of a sedentary lifestyle?"}],
   'raw_output': None,
   'parsed_output': {'function_call': {'id': 'call_A5K3G6tPwy1QqjJSaDVfvVcI',
     'function': {'arguments': '{"query": "sedentary lifestyle effects", "indexName": "HealthToday"}',
      'name': 'performSearch'},
     'type': 'function',

In [18]:
dataset.to_list()[:3]

[{'instruction': "Show me documents on 'nanotechnology advancements' from the 'ScienceJournal' index sorted by most recent first.",
  'function_call': '```json\n{\n    "name": "performSearch",\n    "arguments": {\n        "query": "nanotechnology advancements",\n        "indexName": "ScienceJournal",\n        "sortOrder": "most recent first"\n    }\n}\n```',
  'function': '```json\n{\n    "function": {\n        "name": "performSearch",\n        "description": "Executes a search query against a specified index or dataset and returns matching documents.",\n        "parameters": {\n            "properties": {\n                "query": {\n                    "type": "string",\n                    "description": "The search query input by the user to find relevant documents in the dataset.",\n                    "items": null\n                },\n                "indexName": {\n                    "type": "string",\n                    "description": "The name of the index to run the search