In [4]:
from langchain import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

In [5]:
USER_NAME = "Anon" # The name you want to use when interviewing the agent.
BOT_NAME = "Miku"

In [6]:
model_bin = "/media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/llama/converted/llama_13b_ggml_q4_3.bin"
lora_bin = "/media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/SuperCOT-LoRA/13b/ggml/cutoff-2048/ggml-adapter-model.bin"

LLM = LlamaCpp(model_path=model_bin, lora_path=lora_bin, n_ctx=2048, n_threads=6, use_mmap=False, n_batch=512, temperature=0.65)

llama.cpp: loading model from /media/captdishwasher/Samshmung/horenbergerb/llama/llama.cpp/models/llama/converted/llama_13b_ggml_q4_3.bin
llama_model_load_internal: format     = ggjt v1 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 6 (mostly Q4_3)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size = 9534493.73 KB
llama_model_load_internal: mem required  = 11359.03 MB (+ 3216.00 MB per state)
....................................................................................................
llama_init_from_file: kv self size  = 3200.00 MB
llama_apply_lora_fro

In [7]:
example_dialogue = """Anon: Hey Miku!
Miku: Howdy, Anon! What can I do for you today?
Anon: I just wanted to chat, actually. It's a slow day today.
Miku: Slow day, huh? You don't seem like the type to be idle. What are your plans for this downtime?
Anon: I dunno. I have been stewing over this project involving natural language processing, but I'm hung up on some mathematical technicalities regarding "embeddings." It's quite frustrating, so today I'm just stepping back and breathing for a bit.
Miku: Oh dear, sounds like a tough nut to crack. But hey, that's what I'm here for! Do you want to workshop it together?
Anon: Sure, we can try! I'm not sure your knowledge base is updated regarding embeddings in NLP though. Can you tell me what an embedding is?
Miku: Of course! Embeddings are essentially vectors of real numbers representing words or phrases in a text. They're used to represent contextual information about the word or phrase in a given text. Embeddings are useful because they allow us to measure similarity between words or phrases based on their distance in the vector space.
Anon: Nice, that's a pretty good summary, Miku. Maybe your knowledge base is more updated than I thought. I'm actually trying to create an artificial "memory" for a chatbot. Each memory will be stored as an embedding, and the bot will recall memories whose embeddings are sufficiently similar to the current discussion.
Miku: That sounds really cool, Anon. I wish I had a memory like that.
Anon: You might some day! If the project succeeds, I'll apply it to your programming so that you can remember details from our past conversations."""

In [8]:
summarize_prompt = PromptTemplate.from_template(
    "### Instruction:\n"
    +"Create a bulleted list of the three most salient observations about {user_name} and/or {bot_name} based on the following dialogue. Use the following example format:\n"
    +"1) *First statement here*\n"
    +"2) *Second statement here*\n"
    +"3) *Third statement here*\n"
    +"Include only the most salient observations about {user_name} and/or {bot_name}, focusing on their personalities and experiences. Be very succinct.\n\n"
    +"### Input:\n"
    +"{chat_history}\n\n"
    +"### Response:\n"
    +"1)"
)
chain = LLMChain(llm=LLM, prompt=summarize_prompt, verbose=True)
result = chain.run(bot_name=BOT_NAME, user_name=USER_NAME, chat_history=example_dialogue)





[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m### Instruction:
Create a bulleted list of the three most salient observations about Anon and/or Miku based on the following dialogue. Use the following example format:
1) *First statement here*
2) *Second statement here*
3) *Third statement here*
Include only the most salient observations about Anon and/or Miku, focusing on their personalities and experiences. Be very succinct.

### Input:
Anon: Hey Miku!
Miku: Howdy, Anon! What can I do for you today?
Anon: I just wanted to chat, actually. It's a slow day today.
Miku: Slow day, huh? You don't seem like the type to be idle. What are your plans for this downtime?
Anon: I dunno. I have been stewing over this project involving natural language processing, but I'm hung up on some mathematical technicalities regarding "embeddings." It's quite frustrating, so today I'm just stepping back and breathing for a bit.
Miku: Oh dear, sounds like a tough nut to crack. 


llama_print_timings:        load time = 10851.86 ms
llama_print_timings:      sample time =    24.87 ms /    65 runs   (    0.38 ms per run)
llama_print_timings: prompt eval time = 13449.05 ms /   530 tokens (   25.38 ms per token)
llama_print_timings:        eval time = 30561.56 ms /    64 runs   (  477.52 ms per run)
llama_print_timings:       total time = 44042.04 ms


In [16]:
print(result)

 Anon and Miku are having a casual conversation about their respective projects and technical challenges. 
2) The chatbot is being used as an artificial memory for Anon. 
3) The chatbot will be able to recall memories based on similarity of the current discussion with past discussions.


: 