In [18]:
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import json
import re

In [76]:
model_name = "bartowski/Mistral-7B-Instruct-v0.3-GGUF"
model_file = "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
model_path = hf_hub_download(model_name, filename=model_file)
model = Llama(
    model_path = model_path,
    n_ctx=32000,
    verbose=False
)

In [61]:
def get_location():
    return {"location": "Chicago, IL"}
def get_current_weather(location):
    return {"temperature": "72 degrees Faranheit", "description": "Sunny, no precipitation, slight breeze"}
def calculator(expression):      
    return {"result": eval(expression)}
def internet_search(query):
    return {"result": "Some fun activities to do are going to the park, going out to eat with your friends, and watching the sunset"}
actions = {
    "get_location": get_location,
    "get_current_weather": get_current_weather,
    "calculator": calculator,
    "internet_search": internet_search
}

In [66]:
system_prompt = """
You are an AI assistant with only 2 jobs:
1) call a tool
2) respond to the user

Only perform on of these actions at a time. Either call a tool, or respond to the user. 

Your response must always include a THOUGHT and a RESPONSE keyword, and then you must follow it with the PAUSE keyword.

All of your outputs should be in JSON format. If you call a function, then output your response in the following JSON schema:
RESPONSE: {"tool_name": "the name of the tool", "parameters": {"parameter1": "value1", "parameter2": "value2"}}

If you do not call a function and answer the user, then output your response in the following JSON schema:
RESPONSE: {"message": "the message to display to the user"}

DO NOT include "message" if you call a tool. DO NOT include "tool_name"or "parameters" if you do not call a tool.

Call AT MOST ONE tool at a time.

If you call a tool, you will be provided with a RESULT that provides the result of the tool call.

Here are some examples:

###EXAMPLE 1:
Provided tools: 
{"name": "get_location", "description": "Get the user's location", "parameters": {"type": "object", properties: {}}

User: What is my location?

THOUGHT: I need to get the user's location.
RESPONSE: {"tool_name": "get_location", "parameters": {}}
PAUSE

RESULT: {"location": "New York City, NY"}

THOUGHT: Now that I have the user's location, I can respond to the user
RESPONSE: {"message": "Your current location is New York City."}
PAUSE
###END EXAMPLE 1

###EXAMPLE 2:
Provided tools:
{"name": "get_location", "description": "Get the user's location", "parameters": {"type": "object", properties: {}}
{"name": "internet_search", "description": "Perform an internet search and returns the result of the search", "parameters": {"type": "object", properties: {"query": {"type": "string", "description": "the question to ask the search engine"}}, "required": ["query"]}}

User: What are some places to eat nearby?

THOUGHT: To recommend places to eat, I first need to get the user's location
RESPONSE: {"tool_name": "get_location", "parameters": {}}
PAUSE

RESULT: {"location": "Irvine, CA"}

THOUGHT: Now that I know the user lives in Irvine, CA, I now need to perform an internet search to restaurants in Irvine.
RESPONSE: {"tool_name": "internet_search", "parameters": {"query": "Restaurants in Irvine, CA"}}
PAUSE

RESULT: {"search_result": "Some restaurants include: In N' Out, Urban Plates, and iCafe"}

THOUGHT: I have enough information to respond to the user.
RESPONSE: {"message": "In Irvine, CA, some restaurant recommendations are In N' Out, Urban Plates, and iCafe."}
PAUSE
###END EXAMPLE 2



PROVIDED TOOLS:
{"name": "get_location", "description": "Get the user's location", "parameters": {"type": "object", properties: {}}
{"name": "get_current_weather", "description": "Get the current weather", "parameters": {"type": "object", properties: {"location": {"type": "string", "description": "The location to get the weather from. Enter your answer in the format CITY, STATE"}}, "required": ["location"]}}
{"name": "calculator", "description": "Returns the result to the mathematical expression", "parameters": {"type": "object", properties: {"expression": {"type": "string", "description": "The mathematical expression to evaluate. Do not use words, only provide arithmetic expressions, e.g. (3*2)+1"}}, "required": ["expression"]}}
{"name": "internet_search", "description": "Perform an internet search and returns the result of the search. The search engine does not have access to current location or current weather. All external information must be provided to the search engine. You may need to call other tools to get the user's weather or location.", "parameters": {"type": "object", properties: {"query": {"type": "string", "description": "the question to ask the search engine"}}, "required": ["query"]}}
"""

In [77]:
question = "What are some fun activities to do with friends today based off my current location and weather?"
prompt = f"""
[SYSTEM]
{system_prompt}

[USER]
{question}

[ASSISTANT]
THOUGHT: 
"""
print(prompt)
for _ in range(10):
    print(_)
    output = model(prompt, max_tokens=512, stop=["PAUSE", "Pause", "THOUGHT", "Thought", "RESULT", "Result"], temperature=1)
    output_str = output["choices"][0]["text"]
    response_regex = "RESPONSE: (.*)"
    foundResponse = re.search(response_regex, output_str)
    if foundResponse:
        response_json={}
        try:
            response_json = json.loads(foundResponse.group(1))
        except:
            print("Error")
            pass
        if "message" in response_json:
            print(output_str)
            prompt += output_str
            print(response_json["message"])
            break
        elif "tool_name" in response_json: 
            print(output_str)
            prompt += output_str
            tool_name = response_json["tool_name"]
            params = {}
            if "parameters" in response_json:
                params = response_json["parameters"]
            tool_result = None
            if len(params) > 0:
                tool_result = actions[tool_name](params)
            else:
                tool_result = actions[tool_name]()
            prompt += f"\nRESULT: {tool_result}\nTHOUGHT: "
            print(tool_result)

# sometimes, it doesn't stop/pause, so it keep
# to optimize, I can force the model to breakdown teh prompt into a numbered plan at first
# I might also need my own convert string to json tool
# Instead of using regex, I would probably want to cut off the string after the resposne automatically


[SYSTEM]

You are an AI assistant with only 2 jobs:
1) call a tool
2) respond to the user

Only perform on of these actions at a time. Either call a tool, or respond to the user. 

Your response must always include a THOUGHT and a RESPONSE keyword, and then you must follow it with the PAUSE keyword.

All of your outputs should be in JSON format. If you call a function, then output your response in the following JSON schema:
RESPONSE: {"tool_name": "the name of the tool", "parameters": {"parameter1": "value1", "parameter2": "value2"}}

If you do not call a function and answer the user, then output your response in the following JSON schema:
RESPONSE: {"message": "the message to display to the user"}

DO NOT include "message" if you call a tool. DO NOT include "tool_name"or "parameters" if you do not call a tool.

Call AT MOST ONE tool at a time.

If you call a tool, you will be provided with a RESULT that provides the result of the tool call.

Here are some examples:

###EXAMPLE 1:
Pro

In [52]:
"""
0
Llama.generate: prefix-match hit

llama_print_timings:        load time =   27266.78 ms
llama_print_timings:      sample time =      35.65 ms /   120 runs   (    0.30 ms per token,  3365.96 tokens per second)
llama_print_timings: prompt eval time =   44158.99 ms /     2 tokens (22079.49 ms per token,     0.05 tokens per second)
llama_print_timings:        eval time =   21672.07 ms /   119 runs   (  182.12 ms per token,     5.49 tokens per second)
llama_print_timings:       total time =   22018.36 ms /   121 tokens
Llama.generate: prefix-match hit
TESTING:1. To answer this user's question, I need to first get the user's location using the "get_location" tool.
2. Then, I will call the "get_current_weather" tool with the user's location to determine the current weather.
3. Next, I will perform an internet search based on activities in the user's location that are appropriate for their current weather using the "internet_search" tool.
RESPONSE: {"tool_name": "get_location", "parameters": {}}

1. To answer this user's question, I need to first get the user's location using the "get_location" tool.
2. Then, I will call the "get_current_weather" tool with the user's location to determine the current weather.
3. Next, I will perform an internet search based on activities in the user's location that are appropriate for their current weather using the "internet_search" tool.
RESPONSE: {"tool_name": "get_location", "parameters": {}}

{'location': 'Chicago, IL'}
1

llama_print_timings:        load time =   27266.78 ms
llama_print_timings:      sample time =      30.00 ms /    75 runs   (    0.40 ms per token,  2500.00 tokens per second)
llama_print_timings: prompt eval time =    9222.70 ms /    19 tokens (  485.41 ms per token,     2.06 tokens per second)
llama_print_timings:        eval time =   15373.37 ms /    74 runs   (  207.75 ms per token,     4.81 tokens per second)
llama_print_timings:       total time =   24720.90 ms /    93 tokens
Llama.generate: prefix-match hit
TESTING:
I got the user's location as Chicago, IL.
Now I will call the "get_current_weather" tool to get the weather for the user in Chicago, IL.
RESPONSE: {"tool_name": "get_current_weather", "parameters": {"location": 'Chicago, IL'}}


Error
2

llama_print_timings:        load time =   27266.78 ms
llama_print_timings:      sample time =      27.73 ms /   100 runs   (    0.28 ms per token,  3605.94 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     0 tokens (-nan(ind) ms per token, -nan(ind) tokens per second)
llama_print_timings:        eval time =   16326.49 ms /   100 runs   (  163.26 ms per token,     6.13 tokens per second)
llama_print_timings:       total time =   16460.25 ms /   100 tokens
Llama.generate: prefix-match hit
TESTING:
1. The user's location is Chicago, IL. Now I need to get the current weather in Chicago using the "get_current_weather" tool.
2. Once I have the weather information for Chicago, I will perform an internet search for activities that are appropriate based on the weather in Chicago.
RESPONSE: {"tool_name": "get_current_weather", "parameters": {"location": "Chicago, IL"}}


1. The user's location is Chicago, IL. Now I need to get the current weather in Chicago using the "get_current_weather" tool.
2. Once I have the weather information for Chicago, I will perform an internet search for activities that are appropriate based on the weather in Chicago.
RESPONSE: {"tool_name": "get_current_weather", "parameters": {"location": "Chicago, IL"}}

{'temperature': '72 degrees Faranheit', 'description': 'Sunny, no precipitation, slight breeze'}
3

llama_print_timings:        load time =   27266.78 ms
llama_print_timings:      sample time =      36.38 ms /   130 runs   (    0.28 ms per token,  3573.20 tokens per second)
llama_print_timings: prompt eval time =    6128.32 ms /    36 tokens (  170.23 ms per token,     5.87 tokens per second)
llama_print_timings:        eval time =   23027.45 ms /   129 runs   (  178.51 ms per token,     5.60 tokens per second)
llama_print_timings:       total time =   29336.79 ms /   165 tokens
Llama.generate: prefix-match hit
TESTING:
1. The weather in Chicago is sunny and 72 degrees Fahrenheit with a slight breeze. Based on this, I will perform an internet search for outdoor activities that are appropriate for this weather.
2. I will perform the "internet_search" tool call with a query asking for outdoor activities to do in Chicago when it's sunny and 72 degrees Fahrenheit.
RESPONSE: {"tool_name": "internet_search", "parameters": {"query": "Outdoor activities to do in Chicago when the weather is sunny and 72 degrees"}}


1. The weather in Chicago is sunny and 72 degrees Fahrenheit with a slight breeze. Based on this, I will perform an internet search for outdoor activities that are appropriate for this weather.
2. I will perform the "internet_search" tool call with a query asking for outdoor activities to do in Chicago when it's sunny and 72 degrees Fahrenheit.
RESPONSE: {"tool_name": "internet_search", "parameters": {"query": "Outdoor activities to do in Chicago when the weather is sunny and 72 degrees"}}

{'result': 'Some fun activities to do are going to the park, going out to eat with your friends, and watching the sunset'}
4

llama_print_timings:        load time =   27266.78 ms
llama_print_timings:      sample time =      30.62 ms /   114 runs   (    0.27 ms per token,  3723.18 tokens per second)
llama_print_timings: prompt eval time =    3338.15 ms /    39 tokens (   85.59 ms per token,    11.68 tokens per second)
llama_print_timings:        eval time =   19312.65 ms /   113 runs   (  170.91 ms per token,     5.85 tokens per second)
llama_print_timings:       total time =   22808.17 ms /   152 tokens
TESTING:
1. The user's current location is Chicago, IL and the weather is sunny and 72 degrees Fahrenheit. Based on this, some fun activities to do include going to the park, going out to eat with friends, and watching the sunset. I can now respond to the user with these activity recommendations.
RESPONSE: {"message": "Some fun outdoor activities for today based on your current location and weather are going to the park, going out to eat with friends, or watching the sunset."}


1. The user's current location is Chicago, IL and the weather is sunny and 72 degrees Fahrenheit. Based on this, some fun activities to do include going to the park, going out to eat with friends, and watching the sunset. I can now respond to the user with these activity recommendations.
RESPONSE: {"message": "Some fun outdoor activities for today based on your current location and weather are going to the park, going out to eat with friends, or watching the sunset."}

Some fun outdoor activities for today based on your current location and weather are going to the park, going out to eat with friends, or watching the sunset.
"""
# this is a very good response that it gave;

THOUGHT: To find activities for today, I will first need to check the current weather. Then I will perform an internet search using a query based on the activities that are popular in certain weather conditions.
RESPONSE: {"tool_name": "get_current_weather", parameters: {"location": "Irvine, CA"}}



{'id': 'cmpl-0dddebd3-cffe-419a-a5b2-2620ee4b9063',
 'object': 'text_completion',
 'created': 1719252627,
 'model': 'C:\\Users\\jaide\\.cache\\huggingface\\hub\\models--bartowski--Mistral-7B-Instruct-v0.3-GGUF\\snapshots\\61fd4167fff3ab01ee1cfe0da183fa27a944db48\\Mistral-7B-Instruct-v0.3-Q4_K_M.gguf',
 'choices': [{'text': 'THOUGHT: To find fun activities, I need to perform an internet search.\nRESPONSE: {"tool_name": "internet_search", "parameters": {"query": "fun activities for friends near me"}}\n',
   'index': 0,
   'logprobs': None,
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 1078,
  'completion_tokens': 53,
  'total_tokens': 1131}}

In [14]:
res = json.loads("""{"message": "In Irvine, CA, some restaurant recommendations are In N' Out, Urban Plates, and iCafe."}""")
print(res)

{'message': "In Irvine, CA, some restaurant recommendations are In N' Out, Urban Plates, and iCafe."}
