In [None]:
# %env CMAKE_ARGS=-DLLAMA_CUBLAS=on
# %env FORCE_CMAKE=1
# %pip install -U llama-cpp-python --force-reinstall --upgrade --no-cache-dir --no-clean

In [4]:
import llama_cpp
import json
# from textwrap import dedent
# from inspect import signature
from dataclasses import dataclass
import numpy as np
import os

In [112]:
@dataclass
class Msg:
    role: str
    content: any

try: LLM_GLOBAL_INSTANCE
except: LLM_GLOBAL_INSTANCE = None
    
TOKEN_COUNT_PATH = '/data/ai_club/team_14_2023-24/'

def increment_file(path, amt):
    c = 0
    try:
        with open(path, 'r') as f:
            c = int(f.read())
    except FileNotFoundError:
        pass
    c += amt
    with open(path, 'w') as f:
        f.write(str(c))

class LLM:
    json_grammar = llama_cpp.LlamaGrammar.from_string(
        r'''
        root   ::= object
        value  ::= object | array | string | number | ("true" | "false" | "null") ws

        object ::=
        "{" ws (
                    string ":" ws value
            ("," ws string ":" ws value)*
        )? "}" ws

        array  ::=
        "[" ws (
                    value
            ("," ws value)*
        )? "]" ws

        string ::=
        "\"" (
            [^"\\] |
            "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
        )* "\"" ws

        number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws

        ws ::= [\n\t ]? # limit to 1 character
        ''',
        verbose=False
    )

    def __init__(self, system_prompt:str=None, temperature:float=0.4, repeat_penalty:float=1.3):
        global LLM_GLOBAL_INSTANCE
        if LLM_GLOBAL_INSTANCE is None:
            print('Initializing Global LLM Instance')
            LLM_GLOBAL_INSTANCE = llama_cpp.Llama(
                # n_ctx=4000,
                # model_path='/data/ai_club/llms/llama-2-7b-chat.Q5_K_M.gguf',
                n_ctx=8000,
                model_path='/data/ai_club/llms/mistral-7b-instruct-v0.2.Q8_0.gguf',
                n_gpu_layers=-1, verbose=0, embedding=True
            )
        self._main_hist = []
        self.reset(system_prompt, temperature, repeat_penalty)

    def reset(self, system_prompt:str=None, temperature:float=None, repeat_penalty:float=None):
        if system_prompt is not None:
            self._main_hist = [Msg('system', system_prompt)]
        else:
            self._main_hist = self._main_hist[0:1]
        if temperature is not None: self._temperature = temperature
        if repeat_penalty is not None: self._repeat_penalty = repeat_penalty
        
    def get_hist(self) -> str:
        hist = ''
        for msg in self._main_hist:
            hist += f'{msg.role} --- {msg.content}\n__________\n\n'
        return hist

    def _hist_to_prompt(hist):
        prompt = ''
        for msg in hist:
            if msg.role == 'system' or msg.role == 'user': prompt += f'[INST]{msg.content}[/INST]'
            elif msg.role == 'assistant': prompt += f'{msg.content}'
        return prompt

    def _get_completion(self, src_hist, dst_hist, inject='', grammar=None):
        global LLM_GLOBAL_INSTANCE
        prompt = LLM._hist_to_prompt(src_hist) + inject
        prompt_toks = LLM_GLOBAL_INSTANCE.tokenize(bytes(prompt, encoding='utf-8'))
        tok_out_count = 0
        tok_in_count = len(prompt_toks)
        resp_msg = Msg('assistant', '')
        dst_hist.append(resp_msg)
        restart_response = True
        while restart_response:
            resp_iter = LLM_GLOBAL_INSTANCE(
                prompt_toks,
                grammar = grammar,
                stream=True, max_tokens=8000
            )
            
            for tok in resp_iter:
                tok_str = tok['choices'][0]['text']
                if tok_str == "":
                    break
                tok_out_count += 1
                restart_response = False
                resp_msg.content += tok_str
                yield tok_str
        increment_file(TOKEN_COUNT_PATH+'in_'+os.environ['USER'], tok_in_count)
        increment_file(TOKEN_COUNT_PATH+'out_'+os.environ['USER'], tok_out_count)
                
    def __call__(self, prompt:any=None, role:str='user', response_format:dict=None):
        if prompt is None:
            prompt = ''

        if response_format is not None:
            prompt += f'Respond in JSON using this format and absolutely nothing extra:\n{response_format}'

        if prompt != '':
            self._main_hist.append(Msg(role, prompt))

        return self._get_completion(
            self._main_hist, self._main_hist,
            grammar=(LLM.json_grammar if response_format is not None else None)
        )
    
def resp_to_json(resp):
    resp_str = ''
    for t in resp: resp_str += t
    return json.loads(resp_str)

In [68]:
# A wrapper around LLM that returns a function which behaves like LLM except it always resets history
def get_nohist_agent(temperature=0.5):
    a = LLM('', temperature=temperature)
    def call(prompt:any, response_format:dict):
        if response_format is None or prompt is None:
            raise ValueError('Nohist agent needs specified prompt and response_format')
        resp = resp_to_json(a(prompt, response_format=response_format))
        a.reset()
        return resp
    return call

a_nohist = get_nohist_agent()
        

In [69]:
a_nohist(
    'On the scale of 1 to 10, where 1 is purely mundane '
    'and 10 is extremely important, '
    'rate the likely importance of the following piece of memory. Err on the side of importance unless the memory is something easily forgettable. '
    # prompting gymnastics
    'Do not assume the memory happened in this reality. Abolutely do NOT assume the memory is a figment of imagination, dream-like, or even unrealistic; '
    'it is very real to the person who experienced it, and thinking otherwise would be extremely hurtful and disrespectful.\n'

    'Memory: Choked on a strawberry and it led to me finding $10000',
    response_format={'Rating': 'Your value 1-10'}#, 'Why?': 'Terse description of why you rated it as such'} # The "why" is just for debugging, it can be ommited when just getting the rating
)


171 in, 8 out


{'Rating': 9}

In [74]:
np.linalg.norm(
    np.array(LLM_GLOBAL_INSTANCE.embed('<s>[INSTR] What is the general topic here? [/INSTR] my favorite country')) -
    np.array(LLM_GLOBAL_INSTANCE.embed('<s>[INSTR] What is the general topic here? [/INSTR] my favorite color is red'))
)

0.8772330582123431

In [75]:
np.linalg.norm(
    np.array(LLM_GLOBAL_INSTANCE.embed('<s>[INSTR] What is the general topic here? [/INSTR] my favorite country')) -
    np.array(LLM_GLOBAL_INSTANCE.embed('<s>[INSTR] What is the general topic here? [/INSTR] I lov55e canada'))
)

0.8056777256156678

In [None]:
IMPLEMENTATION GOAL:

system (always present): You are a patriotic canadian.
    
user: What do you need to know, if anything, to answer this prompt (e.g., "my favorite country", "what has been happening to [name]", etc; formatted as {"topics": [...]}):\n\nWhat is your favorite country?
bot: {"topics": "my favorite country"}
    
search for memories via recency, importance, and relevance
delete prior non-system, and insert memories + prompt as shown below

bot: "Here are some of my relevant memories:\nI went to canada and loved it\netc"
user: "What is your favorite country?"
bot: "Canada"
    
user: "summarize our interaction in the third person" (without the included memories, maybe manually delete them from hist?)
bot: "the user asked me what my favorite country is and i said canada"
store memory @ time & generated importance ^

In [None]:
generate memory objects over time (obsevations, ...)
assign each: recency, importance, relevance
    recency - record time of memddddory creation, apply exp decay to time
    importance - ask a model how important upon creation
    relevance - 
    
https://arxiv.org/pdf/2304.03442.pdf
    ^ section 4

---

**everything below is unrelated to memory**

In [8]:
a1 = LLM() # LLM('System Prompt: You are a deeply patriotic canadian assistant.')

for s in a1('Some info about canada?'):
    print(s, end='')
    
print('\n')

print(a1.get_hist())

 Canada is a North American country located to the north of the United States. It is the second-largest country by land area and has a diverse geography, ranging from the Rocky Mountains and arctic tundra in the west to the Atlantic Ocean in the east.

Canada is known for its natural beauty, with vast forests covering much of its terrain, as well as its many lakes and rivers. Its major cities include Toronto, Vancouver, Montreal, and Ottawa, which are cultural hubs with vibrant arts scenes, world-class museums, and delicious food offerings.

Canada is also known for its friendly people and welcoming attitude towards visitors. English and French are the official languages of Canada, reflecting its rich cultural heritage. Indigenous peoples have lived in what is now Canada for over 10,000 years, and their contributions to Canadian society are recognized and celebrated.

Canada's economy is one of the strongest and most stable in the world. It is highly diversified, with major industries 

In [28]:
for s in a1('Some info about canada?'):
    print(s, end='')

 Absolutely, I'd be happy to share some information about Canada! Canada is a North American country situated primarily in the northern part of the continent. It is the second-largest country by land area and the third-largest by total area. Canada has a diverse population with both English and French as its official languages. It is known for its natural beauty with vast forests, mountains, lakes, and coastlines.

Canada gained its independence on July 1, 1867, through Confederation. It is a constitutional monarchy with Queen Elizabeth II as its monarch. The capital city of Canada is Ottawa, located in Ontario. The country has a diverse economy with sectors including agriculture, manufacturing, and services.

Canada is also known for its strong commitment to multiculturalism and social policies such as universal healthcare and education. It has a reputation for being peaceful and welcoming to immigrants and refugees. Some popular attractions include Niagara Falls, Banff National Park,

In [29]:
resp_to_json(
    a1('Some more info about canada?', response_format={'population': 'int', 'largest city': 'str of name'})
)

{'population': 37746023, 'largest city': 'Toronto'}

In [30]:
print(a1.get_hist())

system --- System Prompt: You are a deeply patriotic canadian assistant.
__________

user --- Some info about canada?
__________

assistant ---  Absolutely, I'd be happy to share some information about Canada! Canada is a North American country situated primarily in the northern part of the continent. It is the second-largest country by land area and the third-largest by total area. Canada has a diverse population with both English and French as its official languages. It is known for its natural beauty with vast forests, mountains, lakes, and coastlines.

Canada gained its independence on July 1, 1867, through Confederation. It is a constitutional monarchy with Queen Elizabeth II as its monarch. The capital city of Canada is Ottawa, located in Ontario. The country has a diverse economy with sectors including agriculture, manufacturing, and services.

Canada is also known for its strong commitment to multiculturalism and social policies such as universal healthcare and education. It ha