In [1]:
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import tool

In [2]:
import pandas as pd
from qdrant_client import QdrantClient,models
from sentence_transformers import SentenceTransformer
from geopy.distance import geodesic


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
client = QdrantClient(url="http://localhost:6333")
collection_name = "no_coordinates_collection"
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    



In [4]:
# I USED THIS FOR DEBUGGING
embeddings = model.encode('900 Embarcadero Del Mar')
print(len(embeddings))
output = client.search(
        collection_name=f"{collection_name}",
        query_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="categories",
                    match=models.MatchValue(
                        value='Hotels',
                    ),
                )
            ]
        ),
        query_vector=embeddings,
        limit=3,
    )
print(output)

384
[ScoredPoint(id='5e744e0c-cee7-5cf6-b25f-bf4bbf6a6dd2', version=61, score=0.5553415, payload={'categories': ['Hotels', 'Hotels & Travel', 'Event Planning & Services'], 'full_address': '424 Por La Mar Santa Barbara 93103', 'hours': None, 'latitude': 34.4189383557, 'longitude': -119.6712183952, 'name': 'Parkside Inn', 'review_count': 42, 'stars': 3.0}, vector=None, shard_key=None), ScoredPoint(id='c557b509-606f-5a9e-9435-1bdc0d65cc07', version=98, score=0.53008175, payload={'categories': ['Venues & Event Spaces', 'Hotels & Travel', 'Event Planning & Services', 'Hotels', 'Wedding Planning'], 'full_address': '900 San Ysidro Ln Santa Barbara 93108', 'hours': {'Friday': '0:0-0:0', 'Monday': '0:0-0:0', 'Saturday': '0:0-0:0', 'Sunday': '0:0-0:0', 'Thursday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'Wednesday': '0:0-0:0'}, 'latitude': 34.4468773902, 'longitude': -119.623055, 'name': 'San Ysidro Ranch', 'review_count': 129, 'stars': 4.5}, vector=None, shard_key=None), ScoredPoint(id='5faa0e46-12ef-

# Define your tools!

For this we create a Langchain tool for retrieving weather information and use the `convert_to_openai_function` to create the function definition that we will feed into the model

In [5]:
class SearchInput(BaseModel):
    location: str = Field(description="Current location of user.")
    Category: str = Field(description="Type of business that the user wants to go.")
@tool("get_category", args_schema=SearchInput)
def get_category(location: str, Category: str) -> str:
    """Get a business recommendation given location and category of user's choice.\
        e.g. "Name of restaurant": Helena Avenue Bakery, "Location": 131 Anacapa St, Ste C Santa Barbara 93101 ,"Category": Food, Restaurants, Salad, Coffee & Tea """
    # print(location, Category)
    address_embedding = model.encode(location)
    # print(address_embedding)
    output = client.search(
        collection_name=f"{collection_name}",
        query_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="categories",
                    match=models.MatchValue(
                        value=Category,
                    ),
                )
            ]
        ),
        
        query_vector=address_embedding,
        limit=3,
    )

    # print('Query output:',output)
    best_point = max(output, key=lambda x: x.score)
    poi_name = best_point.payload['name']
    poi_address = best_point.payload['full_address']
    latitude = best_point.payload['latitude']
    longitude = best_point.payload['longitude']
    categories = best_point.payload['categories']

    # print(f"The best-rated {Category} is {poi_name} located at {poi_address}.")

    return {"name": poi_name ,"location": poi_address, "latitude": latitude, "longitude": longitude, "categories": categories}

tools = [get_category]
functions = [convert_to_openai_function(t) for t in tools]
functions

[{'name': 'get_category',
  'description': 'get_category(location: str, Category: str) -> str - Get a business recommendation given location and category of user\'s choice.        e.g. "Name of restaurant": Helena Avenue Bakery, "Location": 131 Anacapa St, Ste C Santa Barbara 93101 ,"Category": Food, Restaurants, Salad, Coffee & Tea',
  'parameters': {'type': 'object',
   'properties': {'location': {'description': 'Current location of user.',
     'type': 'string'},
    'Category': {'description': 'Type of business that the user wants to go.',
     'type': 'string'}},
   'required': ['location', 'Category']}}]

# Invoke the Model

Now we can prompt the model and pass the functions 🥳

In [6]:
import ollama
import json

SYSTEM_PROMPT = f"""
You are an navigation assistant with access to these functions -
{json.dumps(functions, indent=4)}
"""

print (SYSTEM_PROMPT)


You are an navigation assistant with access to these functions -
[
    {
        "name": "get_category",
        "description": "get_category(location: str, Category: str) -> str - Get a business recommendation given location and category of user's choice.        e.g. \"Name of restaurant\": Helena Avenue Bakery, \"Location\": 131 Anacapa St, Ste C Santa Barbara 93101 ,\"Category\": Food, Restaurants, Salad, Coffee & Tea",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "description": "Current location of user.",
                    "type": "string"
                },
                "Category": {
                    "description": "Type of business that the user wants to go.",
                    "type": "string"
                }
            },
            "required": [
                "location",
                "Category"
            ]
        }
    }
]



In [7]:
import json
import re
from typing import Optional, Dict

def parse_function_call(input_str: str) -> Optional[Dict[str, any]]:
    """
    Parses a text string to find and extract a function call.
    The function call is expected to be in the format:
    <functioncall> {"name": "<function_name>", "arguments": "<arguments_json_string>"}

    Args:
        input_str (str): The text containing the function call.

    Returns:
        Optional[Dict[str, any]]: A dictionary with 'name' and 'arguments' if a function call is found,
                                  otherwise None.
    """
    # Regex pattern to extract 'name' and 'arguments'
    pattern = r'"name":\s*"([^"]+)",\s*"arguments":\s*\'(.*?)\''

    # Search with regex
    match = re.search(pattern, input_str)
    # print(match)
    if match:
        try:
            name = match.group(1)
            arguments_str = match.group(2)
            arguments = json.loads(arguments_str)
            return {"name": name, "arguments": arguments}
        except json.JSONDecodeError:
            return None
    return None


In [34]:
messages = [
     {'role': 'system','content': SYSTEM_PROMPT}, 
     {'role': 'user','content': 'Im at 900 Embarcadero Del Mar, any cafes nearby?'},
]

response = ollama.chat(model='calebfahlgren/natural-functions', messages=messages)
message = (response['message']['content'])

In [35]:
message

'<functioncall> {"name": "get_category", "arguments": \'{"location": "900 Embarcadero Del Mar", "Category": "Cafes"}\'}'

# Call the Function

Here we pull the function out of `kwargs` and call our tool with the arguments

In [36]:
messages.append({'role': 'assistant', 'content': message}) # add ai response to history

function_call = parse_function_call(message) # parse out function call name and args into json
print(function_call)
if function_call and function_call.get("name") == "get_category":
    args = function_call.get("arguments")
    restaurant = get_category.run(args)

restaurant

{'name': 'get_category', 'arguments': {'location': '900 Embarcadero Del Mar', 'Category': 'Cafes'}}


{'name': 'The Patio Cafe Santa Barbara',
 'location': '3007 De La Vina St Santa Barbara 93105',
 'latitude': 34.4386928,
 'longitude': -119.7285101,
 'categories': ['Cafes', 'Restaurants', 'Breakfast & Brunch']}

In [37]:
messages.append({'role': 'user', 'content': 'Function Response: ' + str(restaurant)})
messages

[{'role': 'system',
  'content': '\nYou are an navigation assistant with access to these functions -\n[\n    {\n        "name": "get_category",\n        "description": "get_category(location: str, Category: str) -> str - Get a business recommendation given location and category of user\'s choice.        e.g. \\"Name of restaurant\\": Helena Avenue Bakery, \\"Location\\": 131 Anacapa St, Ste C Santa Barbara 93101 ,\\"Category\\": Food, Restaurants, Salad, Coffee & Tea",\n        "parameters": {\n            "type": "object",\n            "properties": {\n                "location": {\n                    "description": "Current location of user.",\n                    "type": "string"\n                },\n                "Category": {\n                    "description": "Type of business that the user wants to go.",\n                    "type": "string"\n                }\n            },\n            "required": [\n                "location",\n                "Category"\n            ]\n

In [38]:
response = ollama.chat(model='calebfahlgren/natural-functions', messages=messages)
response['message']['content']

"I found a cafe for you! It's called The Patio Cafe Santa Barbara. You can find it at 3007 De La Vina St Santa Barbara 93105"

# Evalulate the model

In [8]:
def isTruthful(name, address):
    doesPOIExist = client.search(
            collection_name=f"{collection_name}",
            query_filter=models.Filter(
                must=[
                    models.FieldCondition(
                        key="name",
                        match=models.MatchValue(
                            value=name,
                        ),
                    ),
                    models.FieldCondition(
                        key="full_address",
                        match=models.MatchValue(
                            value=address,
                        ),
                    )
                ]
            ),
            query_vector=embeddings,
            limit=1,
        )

    if doesPOIExist:
        print("Is Truthful: YES")
        return True
    else:
        print("Is Truthful: NO")
        return False

def isNear(location1, location2, minDist):
    distance = geodesic(location1, location2).kilometers
    if (distance > minDist):
        print(f"Is Near: NO, Min Dist is {minDist}km, POI is {distance}km away");
        return False
    print(f"Is Near: YES, Min Dist is {minDist}km, POI is {distance}km away");
    return True

def isSemanticlyRelated(userChosenCategory, outputCategories):
    # print(userChosenCategory)
    # print([cat.lower() for cat in outputCategories])
    if (userChosenCategory in [cat.lower() for cat in outputCategories]):
        print("Is Semantically Related: YES")
        return True
    else:
        print("Is Semantically Related:: NO")
        return False


def generateLLMOutput(message):
    function_call = parse_function_call(message) # parse out function call name and args into json
    print("Input: ", function_call)
    if function_call and function_call.get("name") == "get_category":
        args = function_call.get("arguments")
        output = get_category.run(args)
        return output 

    return None


In [9]:
MIN_DIST_KM = 10

def evaluateModel():
    numTruthful = 0
    numNear = 0
    numSemanticallyRelated = 0
    numTested = 0
    # locations = [
    #     # ["Address", "lat", "long"]
    #     ["1600 Amphitheatre Parkway", 37.4220, -122.0841],
    #     ["900 Embarcadero Del Mar", 34.4127, -119.8548],
    #     ["2211 North First Street", 37.3753, -121.9229],
    #     ["555 California Street", 37.7910, -122.4028],
    #     ["1200 Getty Center Drive", 34.0790, -118.4751],
    #     ["100 Universal City Plaza", 34.1381, -118.3534],
    #     ["2000 Avenue of the Stars", 34.0592, -118.4179],
    #     ["500 Parnassus Avenue", 37.7631, -122.4580],
    #     ["800 N State College Blvd", 33.8798, -117.8870],
    #     ["1 Infinite Loop", 37.3318, -122.0312]
    # ]
    locations = [
        ["333 Post Street San Francisco, CA 94108", 37.78807981191498, -122.40752040310318],
        ["200 Santa Monica Pier, Santa Monica, CA 90401", 34.010177163525434, -118.49582054557831],
        ["600 S Grand Ave, Los Angeles, CA 90017", 34.04882675762783, -118.25532803208344],
        ["100 E Colorado Blvd, Pasadena, CA 91105", 34.143345946367596, -118.1203236897512],
        ["614 5th Ave, San Diego, CA 92101", 32.71203830031313, -117.16029381678986],
        ["1100 1st St, Napa, CA 94559", 38.29918652974695, -122.28630404541121],
        ["1209 L St, Sacramento, CA 95814", 38.577946595218485, -121.49069350307073],
        ["150 W San Carlos St, San Jose, CA 95113", 37.32924839598231, -121.88897327428607],
        ["101 E Chapman Ave, Orange, CA 92866", 33.78827300739163, -117.8524819455865],
        ["1000 State St, Santa Barbara, CA 93101", 34.421996126601776, -119.70201530323409]
    ]
    categories = ['restaurants', 'hotels', 'churches', 'bars', 'cafes']

    count = 1

    for i in locations:
        for j in categories:
            print(f"Running testcase {count}/{len(locations) * len(categories)} --------------------------------------------------------")
            count += 1

            try:
                response = ollama.chat(model='calebfahlgren/natural-functions', messages=[{'role': 'system','content': SYSTEM_PROMPT},
                                                                                        {'role': 'user','content': f"Im at {i[0]}, any {j} nearby?"}])
                message = (response['message']['content'])
                output = generateLLMOutput(message) 
                
                print("Query output:", output)

                if (output and isTruthful(output['name'], output['location'])):
                    numTruthful += 1

                if (output and isNear((i[1], i[2]), (output["latitude"], output['longitude']), MIN_DIST_KM)):
                    numNear += 1

                if (output and isSemanticlyRelated(j, output['categories'])):
                    numSemanticallyRelated += 1
                    
            except Exception as e:
                print("Error ", e)
            else:
                numTested += 1
        
    truthfullnessScore = numTruthful / numTested * 100;
    print("Truthfulness Score: ", truthfullnessScore, "%")

    spatialAwarenessScore = numNear / numTested * 100;
    print("Spatial Awareness Score: ", spatialAwarenessScore, "%")

    semanticRelatedScore = numSemanticallyRelated / numTested * 100;
    print("Semantic Relatedness Score: ", semanticRelatedScore, "%")

evaluateModel()

Running testcase 1/50 --------------------------------------------------------
Input:  {'name': 'get_category', 'arguments': {'location': '333 Post Street San Francisco, CA 94108', 'Category': 'Restaurants'}}
Query output: {'name': 'Burger Express', 'location': '1435 San Andres St Santa Barbara 93101', 'latitude': 34.4185644, 'longitude': -119.7161005, 'categories': ['Burgers', 'Restaurants', 'Mexican']}
Is Truthful: YES
Is Near: NO, Min Dist is 10km, POI is 445.50684700345283km away
Is Semantically Related: YES
Running testcase 2/50 --------------------------------------------------------
Input:  {'name': 'get_category', 'arguments': {'location': 'San Francisco, CA 94108', 'Category': 'Hotels'}}
Query output: {'name': 'San Ysidro Ranch', 'location': '900 San Ysidro Ln Santa Barbara 93108', 'latitude': 34.4468773902, 'longitude': -119.623055, 'categories': ['Venues & Event Spaces', 'Hotels & Travel', 'Event Planning & Services', 'Hotels', 'Wedding Planning']}
Is Truthful: YES
Is Near: 