# Imports

In [2]:
%load_ext autoreload
%autoreload 2

from enum import StrEnum
import json
import os
from typing import Annotated

from huggingface_hub import AsyncInferenceClient
from huggingface_hub.inference._generated.types import ChatCompletionOutputToolCall
import lancedb
from openai import AsyncOpenAI
from openai.types.chat.chat_completion_message_tool_call import (
    ChatCompletionMessageToolCall,
)
import pandas as pd
from pydantic import BaseModel, Field
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv

from utils import create_tool_schema_for_function
from tool_types import ToolCallResult

load_dotenv()

True

# Retrieval Augmented Generation (RAG)


In [8]:
embedder = SentenceTransformer(
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    device="cpu",
)
db = lancedb.connect("./data/lance_db")
print(db.table_names())
tbl = db.open_table("movies")

['movies']


In [None]:
def query_movie_db(
    text: str,
    limit: int = 10,
) -> ToolCallResult:
    """
    Query the LanceDB movie database for movies with similar overviews to the input text.

    Args:
        text (str): The input text to query the database.
        limit (int, optional): The number of results to return. Defaults to 10.

    Returns:
        ToolCallResult: The result of the tool call.
    """
    q_emb = embedder.encode(text)
    df = (
        tbl.search(q_emb).limit(limit).to_pandas().drop(columns=["vector", "_distance"])
    )
    return {
        "llm_consumable": df.to_json(lines=True, orient="records"),
        "ui_displayable": df,
        "return_type": "dataframe",
    }


In [14]:
res = query_movie_db("air bud")
print(res["llm_consumable"])

{"budget":3500000,"genres":["Comedy"],"keywords":["chicago","alcohol","cataclysm","guitar","medicine","taxi driver","passenger","saxophone","stewardess","pilot","airplane","fear of flying","air controller","landing","autopilot","kiss","spoof","los angeles","alcohol abuse","aftercreditsstinger","anarchic comedy"],"original_language":"en","overview":"Alcoholic pilot, Ted Striker has developed a fear of flying due to wartime trauma, but nevertheless boards a passenger jet in an attempt to woo back his stewardess girlfriend. Food poisoning decimates the passengers and crew, leaving it up to Striker to land the plane with the help of a glue-sniffing air traffic controller and Striker's vengeful former Air Force captain, who must both talk him down.","popularity":46.116885,"release_date":331344000000,"revenue":83453539,"runtime":88.0,"spoken_languages":["English"],"title":"Airplane!","cast":[{"character":"Ted Striker","name":"Robert Hays"},{"character":"Elaine","name":"Julie Hagerty"},{"char

In [15]:
res["ui_displayable"]

Unnamed: 0,budget,genres,keywords,original_language,overview,popularity,release_date,revenue,runtime,spoken_languages,title,cast,release_year
0,3500000,[Comedy],"[chicago, alcohol, cataclysm, guitar, medicine...",en,"Alcoholic pilot, Ted Striker has developed a f...",46.116885,1980-07-02,83453539,88.0,[English],Airplane!,"[{'character': 'Ted Striker', 'name': 'Robert ...",1980
1,209000000,"[Thriller, Action, Adventure, Science Fiction]","[fight, u.s. navy, mind reading, hong kong, so...",en,"When mankind beams a radio signal into space, ...",64.928382,2012-04-11,303025485,131.0,"[English, ภาษาไทย]",Battleship,"[{'character': 'Lieutenant Alex Hopper', 'name...",2012
2,0,[Comedy],[independent film],en,“The Living Wake” is a dark comedy set in a ti...,0.383442,2007-01-01,0,91.0,[],The Living Wake,"[{'character': 'Mills', 'name': 'Jesse Eisenbe...",2007
3,2627000,"[Drama, Romance, War]","[pilot, airplane, ghost]",en,"Pete Sandidge (Tracy), a daredevil bomber pilo...",0.531444,1944-03-01,5363000,120.0,[English],A Guy Named Joe,"[{'character': 'Pete Sandidge', 'name': 'Spenc...",1944
4,6500000,"[Comedy, Drama]","[father son relationship, capitalism, based on...",en,The chief spokesperson and lobbyist Nick Naylo...,29.01153,2005-09-05,24793509,92.0,[English],Thank You for Smoking,"[{'character': 'Nick Naylor', 'name': 'Aaron E...",2005
5,3000000,"[Comedy, Drama, Romance]","[new york, new year's eve, lovesickness, age d...",en,Bud Baxter is a minor clerk in a huge New York...,22.889294,1960-06-15,25000000,125.0,[English],The Apartment,"[{'character': 'C.C. Baxter', 'name': 'Jack Le...",1960
6,25000000,[Drama],"[baseball, sport, duringcreditsstinger]",en,"In a last-ditch effort to save his career, spo...",17.312433,2014-05-09,38307627,124.0,"[हिन्दी, English]",Million Dollar Arm,"[{'character': 'J. B. Bernstein', 'name': 'Jon...",2014
7,0,"[Action, Comedy, Science Fiction]",[],en,,0.0206,2005-01-01,0,97.0,[English],The Helix... Loaded,[],2005
8,31000000,[Drama],"[confession, airplane, f word, hangover, airpl...",en,Commercial airline pilot Whip Whitaker has a p...,42.213765,2012-11-02,161772375,138.0,[English],Flight,"[{'character': 'Whip Whitaker', 'name': 'Denze...",2012
9,35000000,"[Animation, Family, Adventure]","[animation, animal, 3d]",en,The animated comedy tells the story of a lowly...,14.051852,2005-03-25,19478106,76.0,[English],Valiant,"[{'character': 'Valiant (voice)', 'name': 'Ewa...",2005


## Creating a JSON schema

We need to create a JSON schema for our tools so that the LLM knows what each tool does and how to use it.

In [16]:
# We create a BaseModel for the arguments to our function to easily create a JSON schema
class QueryMovieDB(BaseModel):
    text: str = Field(
        description="Query overviews of movies",
    )
    limit: int = Field(
        default=10,
        description="Number of results to return",
    )

In [17]:
schema = create_tool_schema_for_function(query_movie_db, QueryMovieDB)
schema

{'type': 'function',
 'function': {'name': 'query_movie_db',
  'description': 'Query the LanceDB movie database for movies with similar overviews to the input text.',
  'parameters': {'properties': {'text': {'description': 'Query overviews of movies',
     'title': 'Text',
     'type': 'string'},
    'limit': {'default': 10,
     'description': 'Number of results to return',
     'title': 'Limit',
     'type': 'integer'}},
   'required': ['text'],
   'title': 'QueryMovieDB',
   'type': 'object'}}}

## Hugging Face InferenceClient

* [Hugging Face InferenceClient Function Calling](https://huggingface.co/docs/hugs/en/guides/function-calling)

In [8]:
hf_token = os.getenv("HF_TOKEN")
if hf_token is None:
    raise ValueError("HF_TOKEN environment variable not set")

hf_client = AsyncInferenceClient(
    provider="sambanova",
    api_key=hf_token,
)
# hf_client = AsyncOpenAI(
#     base_url="https://router.huggingface.co/sambanova",
#     api_key=hf_token,
# )

In [None]:
messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about values. Ask for clarification if needed.",
    },
    {
        "role": "user",
        "content": "I'd like to watch a movie about a retired assassin who is forced back into the game.",
    },
]

response = await hf_client.chat_completion(
    model="meta-llama/Llama-3.3-70B-Instruct",
    messages=messages,
    tools=[schema],
    tool_choice="auto",  # allow the model to choose to call tool, if any; others options: "required": call one or more tools
)  # type: ignore
print(response.choices[0].message.tool_calls)

[ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments='{"limit":10,"text":"a retired assassin who is forced back into the game"}', name='query_movie_db', description=None), id='call_10af3fbbd04c4fa0b8', type='function')]


In [23]:
response.choices[0].message

ChatCompletionOutputMessage(role='assistant', content=None, tool_call_id=None, tool_calls=[ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments='{"limit":10,"text":"a retired assassin who is forced back into the game"}', name='query_movie_db', description=None), id='call_10af3fbbd04c4fa0b8', type='function')])

In [15]:
func = response.choices[0].message.tool_calls[0].function
func

ChatCompletionOutputFunctionDefinition(arguments='{"limit":10,"text":"a retired assassin who is forced back into the game"}', name='query_movie_db', description=None)

In [19]:
response.choices[0].message.tool_calls[0].id

'call_10af3fbbd04c4fa0b8'

## OpenAI

* [OpenAI Function Calling](https://platform.openai.com/docs/guides/function-calling?api-mode=chat)
* [Generous free tier](https://platform.openai.com/docs/models/gpt-4.1-nano)

![](https://i.ibb.co/JwZtC9px/Screenshot-2025-05-20-235653.png "GPT-4.1-nano")

In [26]:
oai_api_key = os.getenv("OPENAI_API_KEY")
if oai_api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable not set")
oai_client = AsyncOpenAI(api_key=oai_api_key)

In [25]:
messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about values. Ask for clarification if needed.",
    },
    {
        "role": "user",
        "content": "I'd like to watch a movie about a retired assassin who is forced back into the game.",
    },
]

response = await oai_client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=messages,
    tools=[schema],
    tool_choice="auto",
)
print(response.choices[0].message.tool_calls)

[ChatCompletionMessageToolCall(id='call_x0a8WGeqTzBEyvgoCfLQHf6F', function=Function(arguments='{"text":"retired assassin forced back into the game"}', name='query_movie_db'), type='function')]


In [26]:
tool_call = response.choices[0].message.tool_calls[0]
tool_call

ChatCompletionMessageToolCall(id='call_x0a8WGeqTzBEyvgoCfLQHf6F', function=Function(arguments='{"text":"retired assassin forced back into the game"}', name='query_movie_db'), type='function')

In [29]:
tool_call.function.name

'query_movie_db'

In [None]:
AVAILABLE_FUNCTIONS = {
    "query_movie_db": query_movie_db,
}


def call_function(name, args) -> ToolCallResult:
    func = AVAILABLE_FUNCTIONS.get(name)
    if not func:
        raise ValueError(f"Unknown function: {name}")
    try:
        # Call the function with the provided arguments
        return func(**args)
    except TypeError as e:  # Catches errors like missing/extra arguments
        error_msg = f"Error: Argument mismatch when calling tool '{name}' with arguments {args}. Details: {e}"
        print(error_msg)
        return {
            "llm_consumable": error_msg,
            "ui_displayable": error_msg,
            "return_type": "error_message",
        }
    except Exception as e:  # Catches other errors during tool execution
        error_msg = f"Error during execution of tool '{name}' with arguments {args}. Details: {e}"
        print(error_msg)
        return {
            "llm_consumable": error_msg,
            "ui_displayable": error_msg,
            "return_type": "error_message",
        }


def handle_tool_call(
    tool_call: ChatCompletionMessageToolCall | ChatCompletionOutputToolCall,
) -> ToolCallResult:
    tool_call_id = tool_call.id
    func = tool_call.function
    function_name = func.name
    arguments = json.loads(func.arguments)
    result = call_function(function_name, arguments)
    return

# Check for function calls in LLM response

In [None]:
tool_calls = response.choices[0].message.tool_calls
for tool_call in tool_calls:
    func = tool_call.function
    func_name = func.name
    tool_call_id = tool_call.id
    func_args = json.loads(func.arguments)
    result = call_function(func_name, func_args)
    messages.append(
        {
            "role": "tool",
            "tool_call_id": tool_call_id,
            "content": json.dumps(result),
        }
    )

In [46]:
response.choices[0].message.tool_calls[0].function.arguments

'{"limit":10,"text":"a retired assassin who is forced back into the game"}'

# Sentiment Analysis

[Structured Output](https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat)

In [5]:
class Polarity(StrEnum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"


class SentimentAnalysisOutput(BaseModel):
    polarity: Annotated[Polarity, "The sentiment polarity of the text"]
    confidence: Annotated[
        float,
        Field(
            description="The confidence score of the sentiment polarity between 0 and 1",
            ge=0.0,
            le=1.0,
        ),
    ]


print(json.dumps(SentimentAnalysisOutput.model_json_schema(), indent=2))

{
  "$defs": {
    "Polarity": {
      "enum": [
        "positive",
        "negative",
        "neutral"
      ],
      "title": "Polarity",
      "type": "string"
    }
  },
  "properties": {
    "polarity": {
      "$ref": "#/$defs/Polarity"
    },
    "confidence": {
      "description": "The confidence score of the sentiment polarity between 0 and 1",
      "maximum": 1.0,
      "minimum": 0.0,
      "title": "Confidence",
      "type": "number"
    }
  },
  "required": [
    "polarity",
    "confidence"
  ],
  "title": "SentimentAnalysisOutput",
  "type": "object"
}


In [None]:
SentimentAnalysisOutput(polarity="positive", confidence=1.1)

ValidationError: 1 validation error for SentimentAnalysisOutput
confidence
  Input should be less than or equal to 1 [type=less_than_equal, input_value=1.1, input_type=float]
    For further information visit https://errors.pydantic.dev/2.11/v/less_than_equal

## Prepare prompt

In [None]:
base_prompt = """\
Please analyze the sentiment (positive, negative, or neutral) of the following text and return the result in JSON format. \
The JSON should contain the following fields:
- polarity: The sentiment polarity of the text (positive, negative, or neutral)
- confidence: The confidence score of the sentiment polarity between 0 (not confident) and 1 (very confident)
The JSON should be formatted as follows:
{{
    "polarity": "positive",
    "confidence": 0.95
}}
Text: {text}
"""

text_to_analyze = "Cilantro is amazing on everything!"

messages = [
    {
        "role": "user",
        "content": base_prompt.format(text=text_to_analyze),
    }
]

## Hugging Face InferenceClient

We must create a JSON schema manually for the LLM to know how to format the output.

In [16]:
response = await hf_client.chat_completion(
    model="meta-llama/Llama-3.3-70B-Instruct",
    messages=messages,
    response_format={
        "type": "json_object",
        "value": SentimentAnalysisOutput.model_json_schema(),  # type: ignore
    }
)  # type: ignore
response

ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='stop', index=0, message=ChatCompletionOutputMessage(role='assistant', content='{\n    "polarity": "positive",\n    "confidence": 0.95\n}', tool_call_id=None, tool_calls=None), logprobs=None)], created=1747796113.2638903, id='000a2db1-d55a-46ef-be65-4360a3b536f0', model='Meta-Llama-3.3-70B-Instruct', system_fingerprint='fastcoe', usage=ChatCompletionOutputUsage(completion_tokens=104, prompt_tokens=145, total_tokens=249, completion_tokens_after_first_per_sec=370.2208590744592, completion_tokens_after_first_per_sec_first_ten=374.14636985304816, completion_tokens_per_sec=316.8629934596221, end_time=1747796113.2638445, is_last_response=True, prompt_tokens_details={'cached_tokens': 0}, start_time=1747796112.9353807, stop_reason='stop', time_to_first_token=0.05025148391723633, total_latency=0.32821756452052436, total_tokens_per_sec=758.6431285715952), object='chat.completion')

In [22]:
# raw response
response.choices[0].message.content

'{\n    "polarity": "positive",\n    "confidence": 0.95\n}'

In [19]:
# parse into dictionary
response_dict = json.loads(response.choices[0].message.content)
response_dict

{'polarity': 'positive', 'confidence': 0.95}

In [20]:
# parse into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput(**response_dict)
sentiment_result

SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.95)

In [None]:
# parse from string directly into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput.model_validate_json(
    response.choices[0].message.content
)
sentiment_result

SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.95)

In [24]:
str(sentiment_result.polarity)

'positive'

## OpenAI

We can directly pass in the Pydantic model to the function call. The client will do the work for us.

<div class="alert alert-block alert-warning">
⚠️ we use `beta.chat.completions.parse` instead of `.chat.completions.create`
</div>

In [28]:
response = await oai_client.beta.chat.completions.parse(
    messages=messages,
    model="gpt-4.1-nano",
    response_format=SentimentAnalysisOutput
)
response

ParsedChatCompletion[SentimentAnalysisOutput](id='chatcmpl-BZU3P7ColjzsCANXqjNkGqh6nEdd7', choices=[ParsedChoice[SentimentAnalysisOutput](finish_reason='stop', index=0, logprobs=None, message=ParsedChatCompletionMessage[SentimentAnalysisOutput](content='{"polarity":"positive","confidence":0.98}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None, parsed=SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.98)))], created=1747796707, model='gpt-4.1-nano-2025-04-14', object='chat.completion', service_tier='default', system_fingerprint='fp_8fd43718b3', usage=CompletionUsage(completion_tokens=12, prompt_tokens=216, total_tokens=228, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [31]:
# raw response
response.choices[0].message.content

'{"polarity":"positive","confidence":0.98}'

In [32]:
# parse into dictionary
response_dict = json.loads(response.choices[0].message.content)
response_dict

{'polarity': 'positive', 'confidence': 0.98}

In [33]:
# parse into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput(**response_dict)
sentiment_result

SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.98)

In [34]:
# parse from string directly into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput.model_validate_json(
    response.choices[0].message.content
)
sentiment_result

SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.98)

In [None]:
# access the parsed response directly
response.choices[0].message.parsed

SentimentAnalysisOutput(polarity=<Polarity.POSITIVE: 'positive'>, confidence=0.98)