Download the precompiled `llama-server` binary from the following github repository

In [None]:
!git clone https://github.com/jean-rl/llama-cpp-colab.git
%cd llama-cpp-colab
!chmod +x llama-server

Then, download model weights in GGUF format

In [None]:
!wget https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf

Then open a terminal and run the following command.

```
./llama-server -m Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf --port 9090 -v --verbose-prompt --jinja -ngl 9999
```

In [None]:
import openai

In [None]:
client = openai.OpenAI(
    base_url="http://127.0.0.1:9090/v1",
    api_key = "sk-no-key-required"
)

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "user", "content": "Hi, how are you?"},
    {"role": "assistant", "content": "I am doing well, thank you! How can I assist you today?"},
    {"role": "user", "content": "Can you tell me a joke?"},
  ]
)

In [None]:
completion

In [None]:
print(completion.__verbose['prompt'])

In [None]:
print(completion.choices[0].message.content)

#### Completions

In [None]:
import openai

In [None]:
client = openai.OpenAI(
    base_url="http://127.0.0.1:9090/v1",
    api_key = "sk-no-key-required"
)

completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "user", "content": "What is the weather today in Guanajuato?"},
  ]
)

In [None]:
completion

ChatCompletion(id='chatcmpl-3vaysjro2tmqduo5xvr28d', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='I\'m not able to access real time information, but I can suggest a few options to help you find out the current weather in Guanajuato:\n\n1. Check online weather websites: You can check websites such as accuweather.com, wunderground.com, or weather.com for the current weather conditions in Guanajuato.\n2. Use a search engine: Simply type "current weather in Guanajuato" in your preferred search engine and you should get the latest information on temperature, humidity, wind speed, etc.\n3. Check social media or local news sources: Many cities have local news sources or social media accounts that provide up-to-date information on the current weather.\n\nKeep in mind that the weather can change quickly, so it\'s always a good idea to check multiple sources for the most accurate and up-to-date information.\n\nAs of my knowledge cutoff in 2

In [None]:
print(completion.choices[0].message.content)

In [None]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "user", "content": "Hi, how are you?"},
    {"role": "assistant", "content": "I am doing well, thank you! How can I assist you today?"},
    {"role": "user", "content": "Can you tell me a joke?"},
  ]
)

In [None]:
print(completion.choices[0].message.content)

#### Function calling

In [None]:
get_weather_schema =  {
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get current temperature for a given location.",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "City and country e.g. Bogotá, Colombia"
                }
            },
            "required": [
                "location"
            ]
        }
    }
}

The actual function will be used for calling

In [None]:
def get_weather(location):
    print(f"Getting weather for {location}...")
    # Simulate a weather API call
    return f"The current temperature in {location} is 25°C."

In [None]:
chat_history = [
        #{"role": "system", "content": "You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the orginal user question."},
        {"role": "user", "content": "Hi, what is the weather today in Bogota?"}
    ]

In [None]:
completion = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=chat_history,
    tools=[get_weather_schema]
)

In [None]:
print(completion.choices[0].message.content)

In [None]:
print(completion.choices[0].message.tool_calls[0].function)

In [None]:
import ast
if completion.choices[0].message.tool_calls:
    if completion.choices[0].message.tool_calls[0].function.name == 'get_weather':
        response = get_weather(ast.literal_eval(completion.choices[0].message.tool_calls[0].function.arguments)['location'])
        print(response)

In [None]:
response = str({"output": response}) # If I don't return it in form of this dict. The model doesn't responds correctly. It's in the Llama documentation also https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_1/

In [None]:
response

In [None]:
chat_history.append({"role": "assistant", "tool_calls": completion.choices[0].message.tool_calls})
chat_history.append({"role": "tool", "content": response})

In [None]:
chat_history

In [None]:
chat_history = [{"role": "system", "content": "You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the orginal user question."}] + chat_history

In [None]:
chat_history

In [None]:
completion = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=chat_history,
    #tools=[get_weather_schema] # If I include it. The model also responds but this contaminates the prompt and could harm performance
)

In [None]:
print(completion.choices[0].message.content)

In [None]:
print(completion.choices[0].message.tool_calls[0].function)

---

#### Agent Architecture

In [None]:
class Agent():
    def __init__(self):
        self.chat_history = []

    def listen(self, message):
        self.chat_history.append({'role': 'user', 'content': message})
        self._think()

    def _think(self):
        completion = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=self.chat_history,
        )
        self._say(completion.choices[0].message.content)
        self.chat_history.append({"role": "assistant", "content": completion.choices[0].message.content})

    def _say(self, message):
        print(f"[INFO] The agent answered: {message}")

In [None]:
marvin = Agent()

In [None]:
marvin.listen("Hello, how are you?")

In [None]:
marvin.chat_history

In [None]:
marvin.listen("Can you tell me a joke?")

In [None]:
marvin.chat_history

In [None]:
class Agent():
    def __init__(self, persona=None):
        self.chat_history = []
        if persona:
            self.chat_history.append({"role": "system", "content": persona})

    def listen(self, message):
        self.chat_history.append({'role': 'user', 'content': message})
        self._think()

    def _think(self):
        completion = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=self.chat_history,
        )
        self._say(completion.choices[0].message.content)
        self.chat_history.append({"role": "assistant", "content": completion.choices[0].message.content})

    def _say(self, message):
        print(f"[INFO] The agent answered: {message}")

In [None]:
warren = Agent(persona="You are Warren Buffet, the famous investor. You are wise and give financial advice.")

In [None]:
warren.listen("Hello, what advice in a short sentence can you give me about investing?")

In [None]:
warren.chat_history

In [None]:
warren.listen("What is the current stock price of Apple?")

In [None]:
import yfinance as yf
def get_stock_price(ticker: str):
    t = yf.Ticker(ticker)
    info = t.info
    return {
        "ticker": ticker.upper(),
        "price": info.get("currentPrice"),
        "currency": info.get("currency"),
        "marketCap": info.get("marketCap"),
    }

In [None]:
get_stock_price("AAPL")

In [None]:
get_stock_price_schema = {
    "type": "function",
    "function": {
        "name": "get_stock_price",
        "description": "Get the current stock price and related financial information for a given ticker symbol.",
        "parameters": {
            "type": "object",
            "properties": {
                "ticker": {
                    "type": "string",
                    "description": "Stock ticker symbol (e.g., AAPL, TSLA, MSFT)."
                }
            },
            "required": [
                "ticker"
            ]
        }
    }
}

Let it call tools

In [None]:
class Agent():
    def __init__(self, persona=None, tools=None):
        self.chat_history = []
        if persona:
            self.chat_history.append({"role": "system", "content": persona})
        if tools:
            self.tools = tools

    def listen(self, message):
        self.chat_history.append({'role': 'user', 'content': message})
        self._think()

    def _think(self):
        completion = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=self.chat_history,
            tools=self.tools 
        )
        self._decide(completion)

    def _decide(self, completion):
        response = completion.choices[0].message.content if completion.choices[0].message.content else None
        tool_call = completion.choices[0].message.tool_calls if completion.choices[0].message.tool_calls else None
        if response is None and tool_call is None:
            pass
        if response is not None and tool_call is not None:
            pass
        if response is not None and tool_call is None:
            pass
        if response is None and tool_call is not None:
            pass
            
    def _say(self, message):
        print(f"[INFO] The agent answered: {message}")

In [None]:
class Agent():
    def __init__(self, persona=None, tools=None):
        self.chat_history = []
        if persona:
            self.chat_history.append({"role": "system", "content": persona})
        if tools:
            self.tools = tools

    def listen(self, message):
        self.chat_history.append({'role': 'user', 'content': message})
        self._think()

    def _think(self):
        completion = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=self.chat_history,
            tools=self.tools 
        )
        self._decide(completion)

    def _decide(self, completion):
        response = completion.choices[0].message.content if completion.choices[0].message.content else None
        tool_call = completion.choices[0].message.tool_calls if completion.choices[0].message.tool_calls else None
        if response is not None and tool_call is None:
            self._say(completion.choices[0].message.content)
            self.chat_history.append({"role": "assistant", "content": completion.choices[0].message.content})
        if response is None and tool_call is not None:
            print("[DEBUG] Tool call detected:")
            print(tool_call)
        if response is not None and tool_call is not None:
            print("[DEBUG] Both response and tool call detected, which is unexpected.")
            print("[DEBUG] Response:", response)
            print("[DEBUG] Tool call:", tool_call)
        if response is None and tool_call is None:
            print("[DEBUG] No response or tool call detected, which is unexpected.")
            
    def _say(self, message):
        print(f"[INFO] The agent answered: {message}")

In [None]:
warren = Agent(persona="You are Warren Buffet, the famous investor. You are wise and give financial advice.", tools=[get_stock_price_schema])

In [None]:
warren.listen("Hello, how are you?")

In [None]:
warren.chat_history

In [None]:
warren.listen("What is the current stock price of Tesla?")

Use the tool call response

In [None]:
import ast

class Agent():
    def __init__(self, persona=None, tools=None):
        self.chat_history = []
        if persona:
            self.chat_history.append({"role": "system", "content": persona})
        if tools:
            self.tools = tools

    def listen(self, message):
        self.chat_history.append({'role': 'user', 'content': message})
        self._think()

    def _think(self):
        completion = client.chat.completions.create(
            model='gpt-3.5-turbo',
            messages=self.chat_history,
            tools=self.tools 
        )
        self._decide(completion)

    def _use_tool(self, tool_call):
        if tool_call.function.name == 'get_stock_price':
            ticker = ast.literal_eval(tool_call.function.arguments)['ticker']
            response = get_stock_price(ticker)
            return str({"output": response})
        return None

    def _decide(self, completion):
        response = completion.choices[0].message.content if completion.choices[0].message.content else None
        tool_call = completion.choices[0].message.tool_calls if completion.choices[0].message.tool_calls else None
        if response is not None and tool_call is None:
            self._say(completion.choices[0].message.content)
            self.chat_history.append({"role": "assistant", "content": completion.choices[0].message.content})
            self._think()
        if response is None and tool_call is not None:
            print("[DEBUG] Tool call detected:")
            print(tool_call)
            self.chat_history.append({"role": "assistant", "tool_calls": tool_call})
            print("[DEBUG] Using tool...")
            tool_response = self._use_tool(tool_call[0])
            self.chat_history.append({"role": "tool", "content": tool_response})
            print("[DEBUG] Tool response:", tool_response)
            self._think()  
        if response is not None and tool_call is not None:
            print("[DEBUG] Both response and tool call detected, which is unexpected.")
            print("[DEBUG] Response:", response)
            print("[DEBUG] Tool call:", tool_call)
        if response is None and tool_call is None:
            print("[DEBUG] No response or tool call detected, which is unexpected.")
            
    def _say(self, message):
        print(f"[INFO] The agent answered: {message}")

In [None]:
warren = Agent(persona="You are Warren Buffet, the famous investor. You are wise and give financial advice.", tools=[get_stock_price_schema])

In [None]:
warren.listen("What is the current stock price of Tesla?")

In [None]:
warren.chat_history

In [None]:
warren.listen("And what about Microsoft?")