# Imports

In [4]:
%load_ext autoreload
%autoreload 2

import asyncio
import datetime
import json
import os
from enum import StrEnum
from typing import Annotated

import lancedb
import pandas as pd
import tiktoken
import torch
import torch.nn.functional as F
from datasets import load_dataset, load_from_disk
from dotenv import load_dotenv
from huggingface_hub import AsyncInferenceClient
from huggingface_hub.inference._generated.types import ChatCompletionOutputToolCall
from loguru import logger
from pydantic import BaseModel, Field
from openai import OpenAI, AsyncOpenAI
from openai.types.chat.chat_completion_message_tool_call import (
    ChatCompletionMessageToolCall,
)
from sentence_transformers import SentenceTransformer
from tenacity import (
    RetryCallState,
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

from utils import create_tool_schema_for_function
from tool_types import ToolCallResult

load_dotenv()

try:
    from google.colab import userdata  # type: ignore
except ImportError:
    userdata = None

load_dotenv(override=True)
pd.set_option("display.max_colwidth", 0)

# Preparing dataset

In [4]:
# https://huggingface.co/datasets/AiresPucrs/tmdb-5000-movies
ds = load_dataset("AiresPucrs/tmdb-5000-movies", split="train")
ds

README.md:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

(…)-00000-of-00001-6db04ab1c75d6817.parquet:   0%|          | 0.00/13.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4803 [00:00<?, ? examples/s]

Dataset({
    features: ['id', 'budget', 'genres', 'homepage', 'keywords', 'original_language', 'original_title', 'overview', 'popularity', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'vote_average', 'vote_count', 'cast', 'crew'],
    num_rows: 4803
})

In [5]:
# Remove missing overview
ds = ds.filter(lambda x: bool(x["overview"]))
ds

Filter:   0%|          | 0/4803 [00:00<?, ? examples/s]

Dataset({
    features: ['id', 'budget', 'genres', 'homepage', 'keywords', 'original_language', 'original_title', 'overview', 'popularity', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title', 'vote_average', 'vote_count', 'cast', 'crew'],
    num_rows: 4800
})

In [6]:
def preprocess(example: dict) -> dict:
    example["genres"] = [g["name"] for g in json.loads(example["genres"])]
    example["keywords"] = [k["name"] for k in json.loads(example["keywords"])]
    example["release_date"] = datetime.datetime.strptime(
        d if (d := example["release_date"]) else "1970-01-01", "%Y-%m-%d"
    ).date()
    example["release_year"] = example["release_date"].year
    example["spoken_languages"] = [
        sl["name"] for sl in json.loads(example["spoken_languages"])
    ]
    example["cast"] = [
        {
            "name": c["name"],
            "character": c["character"],
        }
        for c in json.loads(example["cast"])
    ]
    # example["production_companies"] = [
    #     pc["name"] for pc in json.loads(example["production_companies"])
    # ]
    # example["production_countries"] = [
    #     pc["name"] for pc in json.loads(example["production_countries"])
    # ]
    # example["crew"] = [
    #     {
    #         "name": c["name"],
    #         "job": c["job"],
    #     }
    #     for c in json.loads(example["crew"])
    # ]
    return example


In [7]:
ds = ds.map(
    preprocess,
    remove_columns=[
        "id",
        "homepage",
        "production_companies",
        "production_countries",
        "status",
        "tagline",
        "vote_count",
        "vote_average",
        "crew",
        "original_title",
    ],
    num_proc=4,
)
ds[0]



Map (num_proc=4):   0%|          | 0/4800 [00:00<?, ? examples/s]



{'budget': 4000000,
 'genres': ['Crime', 'Comedy'],
 'keywords': ['hotel',
  "new year's eve",
  'witch',
  'bet',
  'hotel room',
  'sperm',
  'los angeles',
  'hoodlum',
  'woman director',
  'episode film'],
 'original_language': 'en',
 'overview': "It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.",
 'popularity': 22.87623,
 'release_date': datetime.date(1995, 12, 9),
 'revenue': 4300000,
 'runtime': 98.0,
 'spoken_languages': ['English'],
 'title': 'Four Rooms',
 'cast': [{'character': 'Ted the Bellhop', 'name': 'Tim Roth'},
  {'character': 'Man', 'name': 'Antonio Banderas'},
  {'character': 'Angela', 'name': 'Jennifer Beals'},
  {'character': 'Elspeth', 'name': 'Madonna'},
  {'character': 'Margaret', 'name': 'Marisa Tomei'},
  {'character': 'Leo', 'name': 'Bruce Willis'},
  {'character': 'Cheste

# Embeddings

In [6]:
queries = [
    "I want to watch an exciting superhero movie",
    "我想看一部超級英雄電影",
]

movies = [
    "A movie about a group of friends who go on a road trip",
    "A romantic comedy about a couple who meet at a wedding",
    "An autobiography of George Washington, the first president of the United States",
    "Spider-Man is fighting against the Green Goblin in another universe",
]

## sentence-transformers

* [Official Documentation](https://sbert.net/)
* Models supporting `sentence-transformers`: https://huggingface.co/models?library=sentence-transformers
* Models for `sentence-similarity`: https://huggingface.co/models?pipeline_tag=sentence-similarity


In [8]:
embedder = SentenceTransformer(
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    device="cpu",
)

### Compute embeddings

In [None]:
query_embeddings = embedder.encode(queries)
movie_embeddings = embedder.encode(movies)
print(query_embeddings.shape, movie_embeddings.shape)
print(f"Query: {query_embeddings[0][:5]}")
print(f"Movie: {movie_embeddings[0][:5]}")

(2, 384) (4, 384)
Query: [-0.4084644  -0.24543129 -0.600559   -0.11258549  0.19218642]
Movie: [ 0.30984214 -0.12824544 -0.33090717 -0.2385715   0.33415732]


### Computing similarity + retrieving top k

In [None]:
# Compute cosine similarities
similarities = embedder.similarity(query_embeddings, movie_embeddings)
similarities

tensor([[ 0.2611,  0.1753, -0.0321,  0.3556],
        [ 0.2784,  0.1435,  0.0242,  0.3834]])

In [12]:
YELLOW = "\033[33m"
END = "\033[0m"
# Output the pairs with their score
for idx_i, sentence1 in enumerate(queries):
    print(sentence1)
    for idx_j, sentence2 in enumerate(movies):
        print(f" - {sentence2: <30}: {YELLOW}{similarities[idx_i][idx_j]:.4f}{END}")

I want to watch an exciting superhero movie
 - A movie about a group of friends who go on a road trip: [33m0.2611[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.1753[0m
 - An autobiography of George Washington, the first president of the United States: [33m-0.0321[0m
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.3556[0m
我想看一部超級英雄電影
 - A movie about a group of friends who go on a road trip: [33m0.2784[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.1435[0m
 - An autobiography of George Washington, the first president of the United States: [33m0.0242[0m
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.3834[0m


In [141]:
torch.topk(similarities[0], k=4)

torch.return_types.topk(
values=tensor([ 0.3556,  0.2611,  0.1753, -0.0321]),
indices=tensor([3, 0, 1, 2]))

In [13]:
for idx_i, sentence1 in enumerate(queries):
    print(sentence1)
    for idx_j in torch.topk(similarities[idx_i], k=4).indices:
        print(f" - {movies[idx_j]: <30}: {YELLOW}{similarities[idx_i][idx_j]:.4f}{END}")

I want to watch an exciting superhero movie
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.3556[0m
 - A movie about a group of friends who go on a road trip: [33m0.2611[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.1753[0m
 - An autobiography of George Washington, the first president of the United States: [33m-0.0321[0m
我想看一部超級英雄電影
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.3834[0m
 - A movie about a group of friends who go on a road trip: [33m0.2784[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.1435[0m
 - An autobiography of George Washington, the first president of the United States: [33m0.0242[0m


## OpenAI

[Documentation](https://platform.openai.com/docs/guides/embeddings?lang=python)

![](https://i.redd.it/lpf0u9nbj7w41.jpg)

In [None]:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key and userdata:
    # If running in Google Colab, try to get the API key from userdata
    api_key = userdata.get("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable is not set")

client = AsyncOpenAI(api_key=api_key, max_retries=5)  # async instead of sync

### Compute embeddings

In [15]:
res = await client.embeddings.create(input=queries[0], model="text-embedding-3-small")
embedding = res.data[0].embedding
print(len(embedding))
print(embedding[:5])

1536
[-0.010025657713413239, 0.007190898526459932, -0.08766952902078629, 0.010150458663702011, -0.025221630930900574]


In [16]:
print(f"Total tokens: {res.usage.total_tokens}")

Total tokens: 8


In [17]:
# Convert to torch.Tensor for cosine similarity
_query_embeddings = await asyncio.gather(
    *[
        client.embeddings.create(input=query, model="text-embedding-3-small")
        for query in queries
    ]
)
query_embeddings = torch.Tensor(
    [embedding.data[0].embedding for embedding in _query_embeddings]
)
_movie_embeddings = await asyncio.gather(
    *[
        client.embeddings.create(input=movie, model="text-embedding-3-small")
        for movie in movies
    ]
)
movie_embeddings = torch.Tensor(
    [embedding.data[0].embedding for embedding in _movie_embeddings]
)

In [18]:
torch.Tensor(movie_embeddings)

tensor([[ 0.0125,  0.0218, -0.0479,  ...,  0.0026, -0.0386,  0.0079],
        [-0.0326,  0.0203, -0.0673,  ...,  0.0057, -0.0081, -0.0111],
        [ 0.0083, -0.0117,  0.0179,  ...,  0.0175, -0.0041, -0.0108],
        [-0.0362, -0.0462,  0.0150,  ..., -0.0224,  0.0127,  0.0131]])

### Computing similarity + retrieving top k

In [None]:
similarities = F.cosine_similarity(
    query_embeddings.unsqueeze(1), movie_embeddings.unsqueeze(0), dim=2
)
similarities

tensor([[0.2881, 0.2119, 0.0501, 0.3344],
        [0.2241, 0.1764, 0.0754, 0.2568]])

In [21]:
YELLOW = "\033[33m"
END = "\033[0m"
for idx_i, sentence1 in enumerate(queries):
    print(sentence1)
    for idx_j in torch.topk(similarities[idx_i], k=4).indices:
        print(f" - {movies[idx_j]: <30}: {YELLOW}{similarities[idx_i][idx_j]:.4f}{END}")

I want to watch an exciting superhero movie
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.3344[0m
 - A movie about a group of friends who go on a road trip: [33m0.2881[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.2119[0m
 - An autobiography of George Washington, the first president of the United States: [33m0.0501[0m
我想看一部超級英雄電影
 - Spider-Man is fighting against the Green Goblin in another universe: [33m0.2568[0m
 - A movie about a group of friends who go on a road trip: [33m0.2241[0m
 - A romantic comedy about a couple who meet at a wedding: [33m0.1764[0m
 - An autobiography of George Washington, the first president of the United States: [33m0.0754[0m


### Calculate tokens and price

[OpenAI pricing](https://platform.openai.com/docs/pricing)

In [24]:
enc = tiktoken.encoding_for_model("text-embedding-3-small")

In [27]:
encoded = enc.encode(queries[0])
print(f"Total tokens: {len(encoded)}")
print(encoded)

Total tokens: 8
[40, 1390, 311, 3821, 459, 13548, 46244, 5818]


In [28]:
# price per 1M tokens
model_to_price = {
    "text-embedding-3-small": 0.02,
    "text-embedding-3-large": 0.13,
}


def get_token_count_and_price(
    texts: list[str], model: str = "text-embedding-3-small"
) -> tuple[int, float]:
    if model not in model_to_price:
        raise ValueError(f"Model {model} not supported")
    enc = tiktoken.encoding_for_model(model)
    token_count = sum(len(e) for e in enc.encode_batch(texts))
    price_per_1m_tokens = model_to_price[model]
    price = (token_count / 1_000_000) * price_per_1m_tokens
    return token_count, price

In [29]:
get_token_count_and_price(movies * 10, model="text-embedding-3-small")

(480, 9.600000000000001e-06)

### Handling rate limits

In [None]:
def log_backoff_attempt(retry_state: RetryCallState) -> None:
    """
    Logs a message before a retry attempt, detailing the attempt number,
    the exception, and the wait time.
    """
    attempt_num = retry_state.attempt_number
    exception = retry_state.outcome.exception() if retry_state.outcome else "N/A"
    wait_time = retry_state.next_action.sleep if retry_state.next_action else 0.0
    func_name = retry_state.fn.__name__ if retry_state.fn else "N/A"

    logger.info(
        f"Backing off for function '{func_name}': "
        f"Attempt {attempt_num} failed due to '{exception.__class__.__name__}: {exception}'. "
        f"Waiting {wait_time:.2f} seconds before next attempt."
    )


@retry(
    wait=wait_random_exponential(min=1, max=60),
    stop=stop_after_attempt(6),
    before_sleep=log_backoff_attempt,
)
async def embedding_with_backoff(**kwargs):
    return await client.embeddings.create(**kwargs)

# Setting up a vector database

[LanceDB documentation](https://lancedb.github.io/lancedb/basic/)

## Creating a LanceDB table

In [9]:
# We'll use the "overview" column as the text to embed
ds[0]

{'budget': 4000000,
 'genres': ['Crime', 'Comedy'],
 'keywords': ['hotel',
  "new year's eve",
  'witch',
  'bet',
  'hotel room',
  'sperm',
  'los angeles',
  'hoodlum',
  'woman director',
  'episode film'],
 'original_language': 'en',
 'overview': "It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.",
 'popularity': 22.87623,
 'release_date': datetime.date(1995, 12, 9),
 'revenue': 4300000,
 'runtime': 98.0,
 'spoken_languages': ['English'],
 'title': 'Four Rooms',
 'cast': [{'character': 'Ted the Bellhop', 'name': 'Tim Roth'},
  {'character': 'Man', 'name': 'Antonio Banderas'},
  {'character': 'Angela', 'name': 'Jennifer Beals'},
  {'character': 'Elspeth', 'name': 'Madonna'},
  {'character': 'Margaret', 'name': 'Marisa Tomei'},
  {'character': 'Leo', 'name': 'Bruce Willis'},
  {'character': 'Cheste

In [10]:
overviews = ds["overview"]
print(len(overviews))
overviews[:5]

4800


["It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.",
 'Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.',
 "Nemo, an adventurous young clownfish, is unexpectedly taken from his Great Barrier Reef home to a dentist's office aquarium. It's up to his worrisome father Marlin and a friendly but forgetful fish Dory to bring Nemo home -- meeting vegetarian sharks, surfer dude turtles, hypnotic jellyfish, hungry seagulls, and more along the way.",
 "A man with a low IQ has accomplished great things in his life and been present during significant his

In [11]:
ds.features

{'budget': Value(dtype='int64', id=None),
 'genres': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'keywords': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'original_language': Value(dtype='string', id=None),
 'overview': Value(dtype='string', id=None),
 'popularity': Value(dtype='float64', id=None),
 'release_date': Value(dtype='date32', id=None),
 'revenue': Value(dtype='int64', id=None),
 'runtime': Value(dtype='float64', id=None),
 'spoken_languages': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
 'title': Value(dtype='string', id=None),
 'cast': [{'character': Value(dtype='string', id=None),
   'name': Value(dtype='string', id=None)}],
 'release_year': Value(dtype='int64', id=None)}

## sentence-transformers

In [12]:
overview_embeddings = embedder.encode(overviews)
overview_embeddings[0][:5]

array([ 0.07971882, -0.22588535, -0.08258647, -0.06734925,  0.0794584 ],
      dtype=float32)

In [14]:
overview_embeddings.shape

(4800, 384)

In [36]:
ds = ds.add_column(name="vector", column=overview_embeddings.tolist())

### Save results to disk

In [38]:
ds.save_to_disk("./data/dataset_processed_1")

Saving the dataset (0/1 shards):   0%|          | 0/4800 [00:00<?, ? examples/s]

#### Save results to Google Drive

In [None]:
from google.colab import drive

drive.mount("/content/drive")
!mkdir -p "/content/drive/My Drive/genai4h-wk14"
!cp -r "./data/dataset_processed" "/content/drive/My Drive/genai4h-wk14/"

### Load results from disk

In [39]:
ds = load_from_disk("./data/dataset_processed")
ds[0]

{'budget': 4000000,
 'genres': ['Crime', 'Comedy'],
 'keywords': ['hotel',
  "new year's eve",
  'witch',
  'bet',
  'hotel room',
  'sperm',
  'los angeles',
  'hoodlum',
  'woman director',
  'episode film'],
 'original_language': 'en',
 'overview': "It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.",
 'popularity': 22.87623,
 'release_date': datetime.date(1995, 12, 9),
 'revenue': 4300000,
 'runtime': 98.0,
 'spoken_languages': ['English'],
 'title': 'Four Rooms',
 'cast': [{'character': 'Ted the Bellhop', 'name': 'Tim Roth'},
  {'character': 'Man', 'name': 'Antonio Banderas'},
  {'character': 'Angela', 'name': 'Jennifer Beals'},
  {'character': 'Elspeth', 'name': 'Madonna'},
  {'character': 'Margaret', 'name': 'Marisa Tomei'},
  {'character': 'Leo', 'name': 'Bruce Willis'},
  {'character': 'Cheste

In [40]:
df = ds.to_pandas()
df.iloc[0]

budget               4000000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            

In [41]:
db = lancedb.connect("./data/lance_db")

In [42]:
tbl = db.create_table("movies", data=df, mode="overwrite")
tbl.create_fts_index("overview", replace=True)

### OpenAI

[Slow embeddings?](https://community.openai.com/t/embeddings-api-extremely-slow/1135044)

In [30]:
overviews = ds["overview"]
get_token_count_and_price(overviews, model="text-embedding-3-small")

(311562, 0.00623124)

In [None]:
_overview_embeddings = await asyncio.gather(
    *[
        embedding_with_backoff(input=overview, model="text-embedding-3-small")
        # client.embeddings.create(input=overview, model="text-embedding-3-small")
        for overview in overviews
    ]
)
overview_embeddings = [
    embedding.data[0].embedding for embedding in _overview_embeddings
]
overview_embeddings[0][:5]

In [None]:
df = ds.to_pandas()
df["vector"] = overview_embeddings

# Vector search

* [Vector search](https://lancedb.github.io/lancedb/search/)
* [Hybrid search](https://lancedb.github.io/lancedb/hybrid_search/hybrid_search/)
* [Keyword search](https://lancedb.github.io/lancedb/fts/)  (needs tokenization)

Hybrid and keyword search may not work as well for cross lingual search.

## Load embedding model
<div class="alert alert-block alert-warning">
⚠️ Must use the same model as the one used to create the embeddings
</div>

In [25]:
embedder = SentenceTransformer(
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    device="cpu",
)

In [46]:
db = lancedb.connect("./data/lance_db/")
tbl = db.open_table("movies")
tbl.create_fts_index("overview", replace=True)

## Embed the query

In [47]:
q_en = "I want to watch a romantic comedy"
q_zh = "我想看一部浪漫喜劇"

q_en_embedding = embedder.encode(q_en)
q_zh_embedding = embedder.encode(q_zh)
print(len(q_en_embedding), len(q_zh_embedding))
print(q_en_embedding[:5], q_zh_embedding[:5])

384 384
[-0.3266221  -0.6309779   0.14315045 -0.11667657  0.0302001 ] [-0.23116109 -0.4500395   0.09482071  0.01837625 -0.08190719]


## Querying the database

In [16]:
random_vector = torch.randn(10).numpy()
random_vector

array([ 0.3624173 , -0.39165464,  0.6183053 ,  0.5983386 ,  1.8258102 ,
        0.33074635,  1.4305156 , -0.64978415, -1.469294  ,  1.6759685 ],
      dtype=float32)

<div class="alert alert-block alert-warning">
⚠️ This will fail because the embedding dimension does not match 👇

In [18]:
tbl.search(random_vector).limit(5).to_pandas()

RuntimeError: lance error: Invalid user input: query dim(10) doesn't match the column vector vector dim(384), /root/.cargo/registry/src/index.crates.io-6f17d22bba15001f/lance-0.26.0/src/dataset/scanner.rs:756:25

In [48]:
tbl.search(q_en_embedding).select(["overview"]).limit(5).to_pandas()

Unnamed: 0,overview,_distance
0,"A modern reimagining of the classic romantic comedy, this contemporary version closely follows new love for two couples as they journey from the bar to the bedroom and are eventually put to the test in the real world.",20.804733
1,"Spoof of romantic comedies which focuses on a man (Campbell), his crush (Hannigan), his parents (Coolidge, Willard), and her father (Griffin).",21.273169
2,Romantic comedy. A small town teenager's angst about sexual inexperience drives a comic quest for love and understanding on a birthday to end all birthdays.,22.505938
3,"Dramatic comedy about two unlikely people who find each other while looking for love. Judith Nelson (Holly Hunter) is suddenly single after discovering her husband of fifteen years, a successful doctor (Martin Donovan), has been having an affair with a younger woman. Judith stews, plans, plots and fantasizes, but she can't decide what to do with her life until she goes out to a night club to see singer Liz Bailey (Queen Latifah), who is full of advice on life and love. While out on the town, Judith is suddenly kissed by a total stranger, which opens her eyes to new possibilities ... which is when she notices Pat (Danny De Vito), the elevator operator in her building.",24.331314
4,"A romantic comedy centered on Dexter and Emma, who first meet during their graduation in 1988 and proceed to keep in touch regularly. The film follows what they do on July 15 annually, usually doing something together.",24.688421


In [23]:
tbl.search(q_zh_embedding).select(["overview", "original_title"]).limit(5).to_pandas()

Unnamed: 0,overview,original_title,_distance
0,"Spoof of romantic comedies which focuses on a man (Campbell), his crush (Hannigan), his parents (Coolidge, Willard), and her father (Griffin).",Date Movie,13.745854
1,"A modern reimagining of the classic romantic comedy, this contemporary version closely follows new love for two couples as they journey from the bar to the bedroom and are eventually put to the test in the real world.",About Last Night,13.997502
2,Romantic comedy. A small town teenager's angst about sexual inexperience drives a comic quest for love and understanding on a birthday to end all birthdays.,16 to Life,15.334779
3,"Dramatic comedy about two unlikely people who find each other while looking for love. Judith Nelson (Holly Hunter) is suddenly single after discovering her husband of fifteen years, a successful doctor (Martin Donovan), has been having an affair with a younger woman. Judith stews, plans, plots and fantasizes, but she can't decide what to do with her life until she goes out to a night club to see singer Liz Bailey (Queen Latifah), who is full of advice on life and love. While out on the town, Judith is suddenly kissed by a total stranger, which opens her eyes to new possibilities ... which is when she notices Pat (Danny De Vito), the elevator operator in her building.",Living Out Loud,16.062124
4,"A sparkling comedic chronicle of a middle-class young man’s romantic misadventures among New York City’s debutante society. Stillman’s deft, literate dialogue and hilariously highbrow observations earned this debut film an Academy Award nomination for Best Original Screenplay. Alongside the wit and sophistication, though, lies a tender tale of adolescent anxiety.",Metropolitan,16.759302


# Function calling

We'll use RAG to demonstrate LLM function calling.

In [5]:
embedder = SentenceTransformer(
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    device="cpu",
)
db = lancedb.connect("./data/lance_db")
print(db.table_names())
tbl = db.open_table("movies")

['movies']


In [6]:
def query_movie_db(
    text: str,
    limit: int = 10,
) -> ToolCallResult:
    """
    Query the LanceDB movie database for movies with similar overviews to the input text.

    Args:
        text (str): The input text to query the database.
        limit (int, optional): The number of results to return. Defaults to 10.

    Returns:
        ToolCallResult: The result of the tool call.
    """
    q_emb = embedder.encode(text)
    df = (
        tbl.search(q_emb).limit(limit).to_pandas().drop(columns=["vector", "_distance"])
    )
    return {
        "llm_consumable": df.to_json(lines=True, orient="records"),
        "ui_displayable": df,
        "return_type": "dataframe",
    }


In [7]:
res = query_movie_db("air bud")
print(res["llm_consumable"])

{"budget":3500000,"genres":["Comedy"],"keywords":["chicago","alcohol","cataclysm","guitar","medicine","taxi driver","passenger","saxophone","stewardess","pilot","airplane","fear of flying","air controller","landing","autopilot","kiss","spoof","los angeles","alcohol abuse","aftercreditsstinger","anarchic comedy"],"original_language":"en","overview":"Alcoholic pilot, Ted Striker has developed a fear of flying due to wartime trauma, but nevertheless boards a passenger jet in an attempt to woo back his stewardess girlfriend. Food poisoning decimates the passengers and crew, leaving it up to Striker to land the plane with the help of a glue-sniffing air traffic controller and Striker's vengeful former Air Force captain, who must both talk him down.","popularity":46.116885,"release_date":331344000000,"revenue":83453539,"runtime":88.0,"spoken_languages":["English"],"title":"Airplane!","cast":[{"character":"Ted Striker","name":"Robert Hays"},{"character":"Elaine","name":"Julie Hagerty"},{"char

In [8]:
res["ui_displayable"].iloc[0]

budget               3500000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            

## Creating a JSON schema

We need to create a JSON schema for our tools so that the LLM knows what each tool does and how to use it.

In [11]:
# We create a BaseModel for the arguments to our function to easily create a JSON schema
class QueryMovieDB(BaseModel):
    text: str = Field(
        description="Query overviews of movies",
    )
    limit: int = Field(
        default=10,
        description="Number of results to return",
    )

In [12]:
schema = create_tool_schema_for_function(query_movie_db, QueryMovieDB)
schema

{'type': 'function',
 'function': {'name': 'query_movie_db',
  'description': 'Query the LanceDB movie database for movies with similar overviews to the input text.',
  'parameters': {'properties': {'text': {'description': 'Query overviews of movies',
     'title': 'Text',
     'type': 'string'},
    'limit': {'default': 10,
     'description': 'Number of results to return',
     'title': 'Limit',
     'type': 'integer'}},
   'required': ['text'],
   'title': 'QueryMovieDB',
   'type': 'object'}}}

## Hugging Face InferenceClient

* [Hugging Face InferenceClient Function Calling](https://huggingface.co/docs/hugs/en/guides/function-calling)

In [13]:
hf_token = os.getenv("HF_TOKEN")
if hf_token is None:
    raise ValueError("HF_TOKEN environment variable not set")

hf_client = AsyncInferenceClient(
    provider="fireworks-ai",
    api_key=hf_token,
)

In [14]:
messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about values. Ask for clarification if needed.",
    },
    {
        "role": "user",
        "content": "I'd like to watch a movie about a retired assassin who is forced back into the game. /no_think",
    },
]

response = await hf_client.chat_completion(
    model="Qwen/Qwen3-235B-A22B",
    messages=messages,
    tools=[schema],
    tool_choice="auto",  # allow the model to choose to call tool, if any; others options: "required": call one or more tools
)  # type: ignore
print(response.choices[0].message.tool_calls)


[ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments='{"text": "A retired assassin who is forced back into the game", "limit": 10}', name='query_movie_db', description=None), id='call_GtVrMe6Uc8tzbV4wP4JM3wsQ', type='function', index=0)]


In [None]:
response.choices[0].message

ChatCompletionOutputMessage(role='assistant', content='<think>\n\n</think>\n\n', tool_call_id=None, tool_calls=[ChatCompletionOutputToolCall(function=ChatCompletionOutputFunctionDefinition(arguments='{"text": "a retired assassin who is forced back into the game", "limit": 10}', name='query_movie_db', description=None), id='call_s2FqSYpNMohv6sJluEiafAeO', type='function', index=0)])

In [60]:
str(response.choices[0].message.tool_calls[0].function)

'ChatCompletionOutputFunctionDefinition(arguments=\'{"text": "a retired assassin who is forced back into the game", "limit": 10}\', name=\'query_movie_db\', description=None)'

In [56]:
func = response.choices[0].message.tool_calls[0].function
func


ChatCompletionOutputFunctionDefinition(arguments='{"text": "a retired assassin who is forced back into the game", "limit": 10}', name='query_movie_db', description=None)

In [58]:
print(response.choices[0].message.tool_calls[0].id)
print(func.name)
print(json.loads(func.arguments))

call_s2FqSYpNMohv6sJluEiafAeO
query_movie_db
{'text': 'a retired assassin who is forced back into the game', 'limit': 10}


In [None]:
func.call

### Streaming kind of a hassle

In [None]:
response = await hf_client.chat_completion(
    model="Qwen/Qwen3-235B-A22B",
    messages=messages,
    tools=[schema],
    tool_choice="auto",  # allow the model to choose to call tool, if any; others options: "required": call one or more tools
    stream=True,
)  # type: ignore
chunks = []
async for chunk in response:
    chunks.append(chunk)
    print(chunk)

ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(role='assistant', content=None, tool_call_id=None, tool_calls=None), index=0, finish_reason=None, logprobs=None)], created=1747809409.1515565, id='b03f4b31-5eb4-42f4-8376-6fa1715113a5', model='Meta-Llama-3.3-70B-Instruct', system_fingerprint='fastcoe', usage=None, object='chat.completion.chunk')
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(role='assistant', content=None, tool_call_id=None, tool_calls=[ChatCompletionStreamOutputDeltaToolCall(function=ChatCompletionStreamOutputFunction(arguments='{"limit":10,"text":"a retired assassin who is forced back into the game"}', name='query_movie_db'), id='call_130dc102c3264ccba4', index=None, type='function')]), index=0, finish_reason='tool_calls', logprobs=None)], created=1747809409.1515565, id='b03f4b31-5eb4-42f4-8376-6fa1715113a5', model='Meta-Llama-3.3-70B-Instruct', system_

In [15]:
messages = [
    {
        "role": "system",
        "content": "You are a helpful assistant. Only query the database if you are sure it is needed.",
    },
    {
        "role": "user",
        "content": "I want to watch a movie about superheroes. /no_think",
    },
]
response = await hf_client.chat.completions.create(
    model="Qwen/Qwen3-235B-A22B",
    messages=messages,
    tools=[schema],
    tool_choice="auto",  # allow the model to choose to call tool, if any; others options: "required": call one or more tools
    stream=True,
)  # type: ignore
chunks = []
async for chunk in response:
    chunks.append(chunk)
    print(chunk)

ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(role='assistant', content=None, tool_call_id=None, tool_calls=None), index=0, finish_reason=None, logprobs=None)], created=1747836758, id='bf6552b7-d531-48ca-b440-43ce7d408827', model='accounts/fireworks/models/qwen3-235b-a22b', system_fingerprint=None, usage=None, object='chat.completion.chunk')
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(role=None, content='<think>\n\n', tool_call_id=None, tool_calls=None), index=0, finish_reason=None, logprobs=None)], created=1747836758, id='bf6552b7-d531-48ca-b440-43ce7d408827', model='accounts/fireworks/models/qwen3-235b-a22b', system_fingerprint=None, usage=None, object='chat.completion.chunk')
ChatCompletionStreamOutput(choices=[ChatCompletionStreamOutputChoice(delta=ChatCompletionStreamOutputDelta(role=None, content='</think>\n\n<tool_call>\n{"', tool_call_id=None, tool_calls=N

## OpenAI

* [OpenAI Function Calling](https://platform.openai.com/docs/guides/function-calling?api-mode=chat)
* [Generous free tier](https://platform.openai.com/docs/models/gpt-4.1-nano)

![](https://i.ibb.co/JwZtC9px/Screenshot-2025-05-20-235653.png "GPT-4.1-nano")

In [None]:
oai_api_key = os.getenv("OPENAI_API_KEY")
if oai_api_key is None:
    raise ValueError("OPENAI_API_KEY environment variable not set")
oai_client = AsyncOpenAI(api_key=oai_api_key)


In [None]:
messages = [
    {
        "role": "system",
        "content": "Don't make assumptions about values. Ask for clarification if needed.",
    },
    {
        "role": "user",
        "content": "I'd like to watch a movie about a retired assassin who is forced back into the game.",
    },
]

response = await oai_client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=messages,
    tools=[schema],
    tool_choice="auto",
)
print(response.choices[0].message.tool_calls)


In [None]:
tool_call = response.choices[0].message.tool_calls[0]
tool_call


In [None]:
AVAILABLE_FUNCTIONS = {
    "query_movie_db": query_movie_db,
}


def call_function(name, args) -> ToolCallResult:
    func = AVAILABLE_FUNCTIONS.get(name)
    if not func:
        raise ValueError(f"Unknown function: {name}")
    try:
        # Call the function with the provided arguments
        return func(**args)
    except TypeError as e:  # Catches errors like missing/extra arguments
        error_msg = f"Error: Argument mismatch when calling tool '{name}' with arguments {args}. Details: {e}"
        print(error_msg)
        return {
            "llm_consumable": error_msg,
            "ui_displayable": error_msg,
            "return_type": "error_message",
        }
    except Exception as e:  # Catches other errors during tool execution
        error_msg = f"Error during execution of tool '{name}' with arguments {args}. Details: {e}"
        print(error_msg)
        return {
            "llm_consumable": error_msg,
            "ui_displayable": error_msg,
            "return_type": "error_message",
        }


def handle_tool_call(
    tool_call: ChatCompletionMessageToolCall | ChatCompletionOutputToolCall,
) -> ToolCallResult:
    tool_call_id = tool_call.id
    func = tool_call.function
    function_name = func.name
    arguments = json.loads(func.arguments)
    result = call_function(function_name, arguments)
    return


## Check for function calls in LLM response

In [None]:
tool_calls = response.choices[0].message.tool_calls
for tool_call in tool_calls:
    func = tool_call.function
    func_name = func.name
    tool_call_id = tool_call.id
    func_args = json.loads(func.arguments)
    result = call_function(func_name, func_args)
    messages.append(
        {
            "role": "tool",
            "tool_call_id": tool_call_id,
            "content": json.dumps(result),
        }
    )


# Structured outputs

We can ensure that the LLM output in a specific format using `structured outputs`

In [None]:
class Polarity(StrEnum):
    POSITIVE = "positive"
    NEGATIVE = "negative"
    NEUTRAL = "neutral"


class SentimentAnalysisOutput(BaseModel):
    polarity: Annotated[Polarity, "The sentiment polarity of the text"]
    confidence: Annotated[
        float,
        Field(
            description="The confidence score of the sentiment polarity between 0 and 1",
            ge=0.0,
            le=1.0,
        ),
    ]


print(json.dumps(SentimentAnalysisOutput.model_json_schema(), indent=2))

In [None]:
# This will throw an error because the confidence is greater than 1
SentimentAnalysisOutput(polarity="positive", confidence=1.1)

## Prepare prompt

In [None]:
base_prompt = """\
Please analyze the sentiment (positive, negative, or neutral) of the following text and return the result in JSON format. \
The JSON should contain the following fields:
- polarity: The sentiment polarity of the text (positive, negative, or neutral)
- confidence: The confidence score of the sentiment polarity between 0 (not confident) and 1 (very confident)
The JSON should be formatted as follows:
{{
    "polarity": "positive",
    "confidence": 0.95
}}
Text: {text}
"""

text_to_analyze = "Cilantro is amazing on everything!"

messages = [
    {
        "role": "user",
        "content": base_prompt.format(text=text_to_analyze),
    }
]

## Hugging Face InferenceClient

We must create a JSON schema manually for the LLM to know how to format the output.

In [None]:
response = await hf_client.chat_completion(
    model="Qwen/Qwen3-235B-A22B",
    messages=messages,
    response_format={
        "type": "json_object",
        "value": SentimentAnalysisOutput.model_json_schema(),  # type: ignore
    },
)  # type: ignore
response

In [None]:
# raw response
response.choices[0].message.content


In [None]:
# parse into dictionary
response_dict = json.loads(response.choices[0].message.content)
response_dict


In [None]:
# parse into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput(**response_dict)
sentiment_result


In [None]:
# parse from string directly into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput.model_validate_json(
    response.choices[0].message.content
)
sentiment_result


In [None]:
str(sentiment_result.polarity)


## OpenAI


[Structured Output Documentation](https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat)

We can directly pass in the Pydantic model to the function call. The client will do the work for us.

<div class="alert alert-block alert-warning">
⚠️ we use `beta.chat.completions.parse` instead of `.chat.completions.create`
</div>

In [None]:
response = await oai_client.beta.chat.completions.parse(
    messages=messages, model="gpt-4.1-nano", response_format=SentimentAnalysisOutput
)
response

In [None]:
# raw response
response.choices[0].message.content


In [None]:
# parse into dictionary
response_dict = json.loads(response.choices[0].message.content)
response_dict


In [None]:
# parse into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput(**response_dict)
sentiment_result


In [None]:
# parse from string directly into SentimentAnalysisOutput
sentiment_result = SentimentAnalysisOutput.model_validate_json(
    response.choices[0].message.content
)
sentiment_result


In [None]:
# access the parsed response directly
response.choices[0].message.parsed


In [2]:
def test():
    """this returns 1"""
    return 1

In [3]:
type(test)

function