In [1]:
# %env CMAKE_ARGS=-DLLAMA_CUBLAS=on
# %env FORCE_CMAKE=1
# %pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --no-clean

In [1]:
import llama_cpp
import json
from textwrap import dedent
from inspect import signature

try: GLOBAL_LLM_INSTANCE
except NameError:
    print('Initializing Global LLM Instance')
    GLOBAL_LLM_INSTANCE = None

Initializing Global LLM Instance


In [71]:
class LLM:
    '''
    TODO

    * Adapt this to more types of underlying models (mistral, openai, etc)
    * Higher forms of memory
      * observation
      * reflection
      * etc
    '''

    def __init__(self, system_prompt:str='', temperature=0.1, repeat_penalty=1.05):
        global GLOBAL_LLM_INSTANCE
        """
        Create or use a global LLM instance, and initialize history

        Parameters
        ----------
        system_prompt:str - <see LLM.reset>
        temperature:float - <see LLM.reset>
        repeat_penalty:float - <see LLM.reset>
        """
        if GLOBAL_LLM_INSTANCE == None:
            GLOBAL_LLM_INSTANCE = llama_cpp.Llama(model_path='/data/ai_club/llms/llama-2-7b-chat.Q5_K_M.gguf', n_gpu_layers=-1, verbose=0, n_ctx=4000)
        self.reset(system_prompt, temperature, repeat_penalty)

    def __call__(self, prompt:str='', role:str='user', response_format:dict=None):
        """
        Elicit a response from the LLM

        Parameters
        ----------
        prompt:str - text to append to the history before eliciting a response, or an empty string to use the existing history without adding to it
        role:str - the role associated with the prompt text: 'user', 'system', or 'assistant'. Ignored if prompt is None.
        response_format:dict - a dict format to force the response to be in -- e.g., `{'to': '<who you are talking to>', 'response': '<your actual response>'}` -- or `None` for the response to be a regular string
        """

        if response_format:
            self._history += [{
                'role':'user',
                'content': 'Your next output should be formatted as this json with nothing extra: ' + json.dumps(response_format)
            }]

        if prompt:
            self._history += [{'role':role, 'content':prompt}]

        last_msg_idx = len(self._history)

        resp = self._force_chat_completion()
        resp_dict = None

        if response_format:
            while True:
                try:
                    if '}' not in resp:
                        resp += '}'
                    resp = resp[resp.index('{'):] # the json might be surounded by other text
                    resp_dict = json.loads(resp)
                    break
                except:
                    self._history += [{
                        'role':'user',
                        'content': 'Your previous output WAS NOT correctly formatted. Make sure it has necessary curly brackets and quotes. It shold be formatted as this json with nothing extra: ' + json.dumps(response_format)
                    }]
                    resp = self._force_chat_completion()

                # for debug:
                # clear_output(wait=True)
                # print(self.get_hist())
                print('bad json:', resp)

            # remove correction messages
            self._history = (
                self._history[0:last_msg_idx-2] + # up to format prompt
                self._history[last_msg_idx-1:last_msg_idx] + # user prompt
                self._history[-1:] # final response
            )

        return resp_dict if response_format else resp

    def _force_chat_completion(self):
        global GLOBAL_LLM_INSTANCE
        '''
        To fix bug where model response is blank.
        IMPORTANT: response is added to the history
        '''
        resp = None
        while resp == None or resp['content'] == '': 
            resp = GLOBAL_LLM_INSTANCE.create_chat_completion(self._history, temperature=self._temperature, repeat_penalty=self._repeat_penalty)['choices'][0]['message']

        self._history += [resp]

        return resp['content']

    def get_hist(self) -> str:
        """
        Get a nicely-formatted string of the current history.
        """
        hist = ''
        for msg in self._history:
            hist += f'{msg["role"]} --- {msg["content"]}\n__________\n\n'
        return hist

    def reset(self, system_prompt:str=None, temperature=None, repeat_penalty=None):
        """
        Reset the LLM's chat history with a new system prompt.
        
        Parameters
        ----------
        system_prompt:str - instructions for the LLM, or an empty string to start without a system prompt
        temperature:float - higher = more random
        repeat_penalty:float - higher = less repeating of output
        """
        if system_prompt is not None:
            self._history = [{'role':'system', 'content':system_prompt}]
        else:
            self._history = self._history[0:1]
        if temperature is not None: self._temperature = temperature
        if repeat_penalty is not None: self._repeat_penalty = repeat_penalty

In [104]:
A1 = LLM('''
You are an agent in a world trying to solve the water pouring puzzle.
There are three cups with various amounts and capacities (8/8, 0/5, 0/3), and you are trying to get the cup amounts (amt) to be 4/8, 4/5, 0/3 with the fewest actions possible.
The only useful action you can take is moving as much water from one cup to another as is possible (i.e., until the source cup runs out or until the destination cup is full). The cups don't have markings, so you cannot directly move exact amounts of liquid.
The world will be described to you, and you will have to say how you want to interact with it.
''')

In [110]:
# https://en.wikipedia.org/wiki/Water_pouring_puzzle

# class LLMException(Exception): pass

# action params are called "idx" to show model, but actually are dicts

def transfer(cup_from_idx:int, cup_to_idx:int):
    '''
    This will transfer everything in `cup_from_idx` to `cup_to_idx` until either `cup_from_idx` is empty or `cup_to_idx` is full.
    If `cup_to_idx` is full (amt = amt_max), then nothing will happen.
    If `cup_from_idx` is empty (amt = 0), then nothing will happen.
    '''
    to_transfer = min(cup_to_idx['amt_max'] - cup_to_idx['amt'], cup_from_idx['amt'])
    cup_from_idx['amt'] -= to_transfer
    cup_to_idx['amt'] += to_transfer

def smash_cup(cup_idx: int):
    '''
    smash
    '''
    cup_idx['amt_max']=0
    cup_idx['amt']=0
    cup_idx['type'] = 'smashed cup'
    
def obj(typ, **kwargs): return {
    'type': typ,
    **kwargs
}

world = [
    obj('cup', amt=8, amt_max=8),
    obj('cup', amt=0, amt_max=5),
    obj('cup', amt=0, amt_max=3)
]

def describe_world(world):
    resp = ''
    for i, obj in enumerate(world):
        resp += f'Object idx={i}:\n'
        for k,v in obj.items():
            resp += f'\t{k}: {v}\n'
    return resp

actions = [transfer, smash_cup]
actions = {a.__name__: a for a in actions}

def describe_action(action_name):
    global actions
    action = actions[action_name]
    args = ', '.join([f'{v.name}: {v.annotation.__name__}' for k,v in signature(action).parameters.items()])
    resp = f'{action_name}({args}) -- '
    resp += dedent(action.__doc__).strip().replace('\n', ' - ')
    return resp

# transfer(world[0], world[1])
# transfer(world[1], world[2])
# world

In [111]:
A1.reset()
# print(describe_world(world))
for _ in range(10):
    world_prompt = (
        'This is the state of the world:\n' +
        describe_world(world) +
        '\nWhat do you want to do? These are your options:\n' +
        '\n'.join([describe_action(k) for k in actions.keys()])
    )
    for c in world:
        print('#'*c['amt'] + '-'*(c['amt_max']-c['amt']))
    resp = A1(world_prompt, response_format={
        'rationale': 'Two things: 1) did your last action have the expected effect? 2) A step-by-step reasoning for everything you\'ve done so far',
        'action': 'Name of known action',
        'parameters': 'Parameter values as a python list (NOT a dict)'
    })
    actions[resp['action']](*[world[i] for i in resp['parameters']])
    print(resp['rationale'])
#     print(describe_world(world))

########
-----
---
bad json:   My apologies, I will make sure to format my responses correctly. Here is my next action:
Action: transfer(0, 1)
Rationale: As before, I want to transfer the 8 units of water from cup 0 to cup 1. This will make the amount in cup 0 0 and the amount in cup 1 8.
Please let me know if this action was successful or not, and if there are any further actions I can take.
bad json:   My apologies, here is the correct format:
{
"rationale": "Two things: 1) did your last action have the expected effect? 2) A step-by-step reasoning for everything you've done so far",
"action": "transfer",
"parameters": [
	{
		"cup_from_idx": 0,
		"cup_to_idx": 1
	}
]

Please let me know if this is correct.
bad json:   My apologies, here is the correct format:
{
"rationale": "Two things: 1) did your last action have the expected effect? 2) A step-by-step reasoning for everything you've done so far",
"action": "transfer",
"parameters": [
	{
		"cup_from_idx": 0,
		"cup_to_idx": 1
	}
]

P

KeyboardInterrupt: 

In [84]:
print(A1.get_hist())

system --- 
You are an agent in a world trying to solve the water pouring puzzle.
There are three cups with various amounts, and you are trying to get the cup amounts to be 4, 4, 0.
The world will be described to you, and you will have to say how you want to interact with it.

__________

user --- This is the state of the world:
Object idx=0:
	type: cup
	amt: 3
	amt_max: 8
Object idx=1:
	type: cup
	amt: 5
	amt_max: 5
Object idx=2:
	type: cup
	amt: 0
	amt_max: 3

What do you want to do? These are your options:
transfer(cup_from_idx: int, cup_to_idx: int) -- Transfer as much as possible from `cup_from` to `cup_to` WITHOUT overflowing. - If `cup_to` is full, nothing will happen.
smash_cup(cup_idx: int) -- smash
__________

assistant ---   I want to transfer as much as possible from cup 1 to cup 2 without overflowing. So, my action is:
{
"rationale": "Transfer as much as possible from cup 1 to cup 2 without overflowing",
"action": "transfer",
"parameters": [
{
"cup_from_idx": 0,
"cup_to_id