In [1]:
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate

import os
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager

# Handle data input
import ipywidgets as widgets
from IPython.display import display

LLM_MODELS = os.environ["LLM_MODELS"]

In [2]:
model_path = LLM_MODELS + "mistral-7b-instruct-v0.1.Q3_K_M.gguf"
n_gpu_layers = 12
n_batch = 128
callback_manager = CallbackManager([])

In [3]:
def myLlamaCpp(model: str):
    """
    Create an instance of LlamaCpp with the given model path.

    Args:
        model (str): The path to the LlamaCpp model.

    Returns:
        LlamaCpp: An instance of the LlamaCpp class.
    """
    llm = LlamaCpp(model_path=model,
        n_gpu_layers=n_gpu_layers,
        n_ctx=4096,
        stop=[],
        # stop=["</s>"],  # If used, the message will stop early
        max_tokens=1000,
        n_threads=8,
        temperature=2.0,  # also works with 0.0 (0.01 is safer)
        f16_kv=True,
        n_batch=n_batch,
        callback_manager=callback_manager,
        verbose=False,
        grammar_path="json_only_reply.gbnf",
    )
    return llm

In [6]:
def initialize_chain(instructions, memory=None):
    if memory is None:
        memory = ConversationBufferWindowMemory()
        print("memory: ", memory)
        memory.ai_prefix = "Assistant"

    template = f"""
    Instructions: {instructions}
    {{{memory.memory_key}}}
    Human: {{human_input}}
    Assistant:"""

    prompt = PromptTemplate(
        input_variables=["history", "human_input"], template=template
    )

    
    llm1 = myLlamaCpp(model_path)

    chain = LLMChain(
        # llm=OpenAI(temperature=0),
        llm = llm1,
        prompt=prompt,
        verbose=False,
        memory=ConversationBufferWindowMemory(),
    )
    return chain


def initialize_meta_chain():
    meta_template = """
    Assistant has just had the below interactions with a User. Assistant followed their "Instructions" closely. Your job is to critique the Assistant's performance and then revise the Instructions so that Assistant would quickly and correctly respond in the future.

    ####

    {chat_history}

    ####

    Please reflect on these interactions.

    You should first critique Assistant's performance. What could Assistant have done better? What should the Assistant remember about this user? Are there things this user always wants? Indicate this with "Critique: ...".

    You should next revise the Instructions so that Assistant would quickly and correctly respond in the future. Assistant's goal is to satisfy the user in as few interactions as possible. Assistant will only see the new Instructions, not the interaction history, so anything important must be summarized in the Instructions. Don't forget any important details in the current Instructions! Indicate the new Instructions by "Instructions: ...".
    """

    meta_prompt = PromptTemplate(
        input_variables=["chat_history"], template=meta_template
    )

    llm2 = myLlamaCpp(model_path)

    meta_chain = LLMChain(
        llm=llm2,
        prompt=meta_prompt,
        verbose=True,
    )
    return meta_chain


def get_chat_history(chain_memory):
    memory_key = chain_memory.memory_key
    chat_history = chain_memory.load_memory_variables(memory_key)[memory_key]
    return chat_history


def get_new_instructions(meta_output):
    delimiter = "Instructions: "
    new_instructions = meta_output[meta_output.find(delimiter) + len(delimiter) :]
    return new_instructions

In [8]:
def main(task, max_iters=3, max_meta_iters=5):
    failed_phrase = "task failed"
    success_phrase = "task succeeded"
    key_phrases = [success_phrase, failed_phrase]

    instructions = "None"
    for i in range(max_meta_iters):
        print(f"[Episode {i+1}/{max_meta_iters}]")
        chain = initialize_chain(instructions, memory=None)
        output = chain.predict(human_input=task)
        for j in range(max_iters):
            print(f"(Step {j+1}/{max_iters})")
            print(f"Assistant: {output}")
            print("Human: ")
            # human_input = input()
            text_box = widgets.Text("Human: ")
            human_input = text_box.value
            if any(phrase in human_input.lower() for phrase in key_phrases):
                break
            output = chain.predict(human_input=human_input)
        if success_phrase in human_input.lower():
            print("You succeeded! Thanks for playing!")
            return
        meta_chain = initialize_meta_chain()
        meta_output = meta_chain.predict(chat_history=get_chat_history(chain.memory))
        print(f"Feedback: {meta_output}")
        instructions = get_new_instructions(meta_output)
        print(f"New Instructions: {instructions}")
        print("\n" + "#" * 80 + "\n")
    print("You failed! Thanks for playing!")

In [9]:
task = "Provide a systematic argument for why we should always eat pasta with olives."
main(task)

[Episode 1/5]
memory:  
root ::= restricted-json 
restricted-json ::= object-list 
object-list ::= object 
object ::= [{] ws reply-key [:] ws string ws [}] 
ws ::= ws_10 
reply-key ::= ["] [R] [e] [p] [l] [y] ["] 
string ::= ["] string_9 ["] 
string_7 ::= [^"\] | [\] string_8 
string_8 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_9 ::= string_7 string_9 | 
ws_10 ::= [ <U+0009><U+000A><U+000D>] ws_10 | 


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/erlebach/data/llm_models/mistral-7b-instruct-v0.1.Q3_K_M.gguf (version GGUF V2)
llama_model_loader: - tensor    0:                token_embd.weight q3_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q3_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q3_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q3_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q3_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight q5_K   

(Step 1/3)
Assistant: {
        
        "Reply": "Here are three reasons to always eat pasta with olives:"


        }
Human: 
(Step 2/3)
Assistant: {"Reply": "One reason to always eat pasta with olives is because they add a depth of flavor that elevates the dish. The briny, slightly sweet taste of the olives pairs well with the savory notes of pasta and tomatoes, creating a rich and balanced meal."}
Human: 
(Step 3/3)
Assistant: {"Reply": "Another reason to always eat pasta with olives is their nutritional benefits. Olives are high in healthy fats, particularly monounsaturated fatty acids, which have been shown to lower cholesterol levels and reduce the risk of heart disease."}
Human: 


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/erlebach/data/llm_models/mistral-7b-instruct-v0.1.Q3_K_M.gguf (version GGUF V2)
llama_model_loader: - tensor    0:                token_embd.weight q3_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q3_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q3_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q5_K     [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q3_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q3_K     [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight q5_K   

root ::= restricted-json 
restricted-json ::= object-list 
object-list ::= object 
object ::= [{] ws reply-key [:] ws string ws [}] 
ws ::= ws_10 
reply-key ::= ["] [R] [e] [p] [l] [y] ["] 
string ::= ["] string_9 ["] 
string_7 ::= [^"\] | [\] string_8 
string_8 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_9 ::= string_7 string_9 | 
ws_10 ::= [ <U+0009><U+000A><U+000D>] ws_10 | 


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Assistant has just had the below interactions with a User. Assistant followed their "Instructions" closely. Your job is to critique the Assistant's performance and then revise the Instructions so that Assistant would quickly and correctly respond in the future.

    ####

    Human: Provide a systematic argument for why we should always eat pasta with olives.
AI: {
        
        "Reply": "Here are three reasons to always eat pasta with olives:"


        }
Human: Human: 
AI: {"Reply": "One re

ValidationError: 1 validation error for PromptTemplate
__root__
  Single '}' encountered in format string (type=value_error)