# Multiple Agents Interacting

In [3]:
import llama_cpp

In [17]:
class LLM: 

    def __init__(self, model_type: str, info: dict):
        """Sets two class variables for the history and the llm to call.

        Parameters
        ----------
        info: dict - any information that might be needed for the model
        model_type: str - self-explanatory
        """
        self.call_llm = None
        self.history = self._get_key('history', info)
        match model_type:
            case 'local':
                path = self._get_key('path', info)
                llm = llama_cpp.Llama(model_path=path, max_tokens=10, temperature=0.6)
                self.update_history = lambda prompt: self.history + f'\nUser: {prompt}\n'
                self.call_llm = lambda history: llm(history)

            case 'OpenAPI':
                raise NotImplementedError("OpenAPI TODO")
    
    def _get_key(self, key: str, info: dict):
        """Gets the key from the dictionary or raises an exception.
        """
        if key not in info:
            raise Exception(f'You need to specify {key} for this model type.')
        return info[key]
            
    def __call__(self, prompt: str):
        """Overriden operator to aide in ease of calling the model. For example,
        if llm = LLM(), then you can call llm(<some string here>) and this method will be called.
        This method will get the next model response (modeling a chat bot), add it to the history,
        and then returns the whole history.
        """
        self.history = self.update_history(prompt)
        model_response = self.call_llm(self.history)['choices'][0]['text']
        model_response = self._parse_response(model_response)
        self.history += model_response
        return self.history

    def _parse_response(self, response: str):
        """Very specific for local model - not sure if this will be necessary in the future.
        """
        answer = ""
        for token in response:
            if token == '\n': break
            answer += token
        return answer

If the cell below gives you errors, you need to make sure the path is correct. The branch local-llm in the github has a notebook that can lead you through getting the model running on your machine.

In [13]:
llm = LLM('local', {'path':"../open_llama_3b/ggml-model-f16.gguf", 
                    'history':"""Ground Truth: The bot is a helpful assistant.\n\nUser: I might ask you for help later.\nBot:Okay, I will help you."""
                    })


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [14]:
llm("Will you be helpful?")

'Ground Truth: The bot is a helpful assistant.\n\nUser: I might ask you for help later.\nBot:Okay, I will help you.\nUser: Will you be helpful?\nBot: Yes'

In [15]:
llm("Are you sure?")

Llama.generate: prefix-match hit


'Ground Truth: The bot is a helpful assistant.\n\nUser: I might ask you for help later.\nBot:Okay, I will help you.\nUser: Will you be helpful?\nBot: Yes\nUser: Are you sure?\nBot: I am sure.'