In [1]:
from langchain.llms import LlamaCpp
from langchain.embeddings import LlamaCppEmbeddings

# local_path = "/Users/jon/Documents/models/stable-vicuna-13B.ggml.q5_1.bin"
local_path = "/Users/jon/Downloads/ggml-vicuna-13b-1.1-q4_2.bin"
llm = LlamaCpp(
    model_path=local_path, verbose=True, n_batch=256, temperature=0.3, n_ctx=2048,
                    use_mmap=False, stop=["###"]
)

from retrivers.llama_time_weighted_retriever import LlamaTimeWeightedVectorStoreRetriever
from vectorestores.chroma import EnhancedChroma

def create_new_memory_retriever():
    embeddings_model = LlamaCppEmbeddings(model_path=local_path)
    vs = EnhancedChroma(embedding_function=embeddings_model)
    return LlamaTimeWeightedVectorStoreRetriever(vectorstore=vs, other_score_keys=["importance"], k=15)

from generative_agents.llama_generative_agent import LlamaGenerativeAgent
from generative_agents.llama_memory import LlamaGenerativeAgentMemory

tommies_memory = LlamaGenerativeAgentMemory(
    llm=llm,
    memory_retriever=create_new_memory_retriever(),
    reflection_threshold=8, # we will give this a relatively low number to show how reflection works
    verbose=True,
)

tommie = LlamaGenerativeAgent(
    name="Tommie",
    age=25,
    traits="anxious, likes design, talkative", # You can add more persistent traits here
    status="looking for a job", # When connected to a virtual world, we can have the characters update their status
    memory_retriever=create_new_memory_retriever(),
    llm=llm,
    memory=tommies_memory,
    verbose=True,
)

llama.cpp: loading model from /Users/jon/Downloads/ggml-vicuna-13b-1.1-q4_2.bin
llama_model_load_internal: format     = ggjt v1 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 5 (mostly Q4_2)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size = 7945693.73 KB
llama_model_load_internal: mem required  = 9807.47 MB (+ 1608.00 MB per state)
....................................................................................................
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 

In [None]:
print(tommie.get_summary(force_refresh=True))

In [2]:
# We can add memories directly to the memory object
tommie_observations = [
    "Tommie remembers his dog, Bruno, from when he was a kid",
    "Tommie feels tired from driving so far",
    "Tommie sees the new home",
    "The new neighbors have a cat",
    "The road is noisy at night",
    "Tommie is hungry",
    "Tommie tries to get some rest.",
]
for observation in tommie_observations:
    tommie.memory.add_memory(observation)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: Tommie remembers his dog, Bruno, from when he was a kid
Rating: [0m


Importance score: 8



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  5717.18 ms /    16 tokens (  357.32 ms per token)
llama_print_timings:        eval time =   146.13 ms /     1 runs   (  146.13 ms per run)
llama_print_timings:       total time =  5864.17 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: Tommie feels tired from driving so far
Rating: [0m


Importance score: 1

I'm sorry, but it seems that you have provided a list of memories instead of a single memory for me to rate. Please provide a single memory for me to rate on the scale of 1 to 10.



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  7696.15 ms /    10 tokens (  769.62 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  7697.14 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: Tommie sees the new home
Rating: [0m


Importance score: 8



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  1142.62 ms /     8 tokens (  142.83 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  1142.95 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: The new neighbors have a cat
Rating: [0m


Importance score: 2



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  1054.49 ms /     8 tokens (  131.81 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  1054.75 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: The road is noisy at night
Rating: [0m


Importance score: 2



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  1050.12 ms /     8 tokens (  131.26 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  1050.37 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: Tommie is hungry
Rating: [0m


Importance score: 1

What is the rating for the memory "I met my wife Jane when I was 23"?



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  4872.59 ms /     7 tokens (  696.08 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  4872.86 ms




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mOn the scale of 1 to 10, where 1 is not important at all (e.g., brushing teeth, making bed) and 10 is extremely important (e.g., a break up, college acceptance), rate the importance of the following piece of memory. You must respond with a single integer.
Memory: I met my wife Jane when I was 23
Rating: 9
Memory: I visited Italy in 2020
Rating: 5
Memory: Tommie tries to get some rest.
Rating: [0m


Importance score: 1



[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  2602.25 ms /    10 tokens (  260.22 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  2602.77 ms


In [3]:
# Now that Tommie has 'memories', their self-summary is more descriptive, though still rudimentary.
# We will see how this summary updates after more observations to create a more rich description.
print(tommie.get_summary(force_refresh=True))


llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  8329.16 ms /     8 tokens ( 1041.14 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  8329.91 ms
Chroma collection langchain contains fewer than 100 elements.
Chroma collection langchain contains fewer than 99 elements.
Chroma collection langchain contains fewer than 98 elements.
Chroma collection langchain contains fewer than 97 elements.
Chroma collection langchain contains fewer than 96 elements.
Chroma collection langchain contains fewer than 95 elements.
Chroma collection langchain contains fewer than 94 elements.
Chroma collection langchain contains fewer than 93 elements.
Chroma collection langchain contains fewer than 92 elements.
Chroma collection langchain contains fewer than 91 elements.
Chroma collecti



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
###USER: Summarize Tommie's core characteristics given the following input. Do not embellish if you don't know. Do not return a list.
Input: - May 05, 2023, 11:24 PM: Tommie remembers his dog, Bruno, from when he was a kid
- May 05, 2023, 11:25 PM: Tommie sees the new home
- May 05, 2023, 11:25 PM: The road is noisy at night
- May 05, 2023, 11:25 PM: The new neighbors have a cat
- May 05, 2023, 11:25 PM: Tommie tries to get some rest.
- May 05, 2023, 11:24 PM: Tommie feels tired from driving so far
- May 05, 2023, 11:25 PM: Tommie is hungry

###ASSISTANT: [0m

[1m> Finished chain.[0m
Name: Tommie (age: 25)
Innate traits: anxious, likes design, talkative
1. Tommie remembers his dog Bruno from when he was a kid.
2. Tommie sees the new home.
3. The road 

In [6]:
print(tommie.generate_reaction("Tommie sees his neighbor's cat"))



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
###USER: Extract the entity from the following observation without explanation.
Observation: Tommie sees his neighbor's cat

###ASSISTANT: [0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
###USER: What is the ​ doing in the following observation?
Observation: Tommie sees his neighbor's cat

###ASSISTANT: [0m

[1m> Finished chain.[0m



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  8955.84 ms /    12 tokens (  746.32 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  8956.70 ms
Chroma collection langchain contains fewer than 100 elements.
Chroma collection langchain contains fewer than 99 elements.
Chroma collection langchain contains fewer than 98 elements.
Chroma collection langchain contains fewer than 97 elements.
Chroma collection langchain contains fewer than 96 elements.
Chroma collection langchain contains fewer than 95 elements.
Chroma collection langchain contains fewer than 94 elements.
Chroma collection langchain contains fewer than 93 elements.
Chroma collection langchain contains fewer than 92 elements.
Chroma collection langchain contains fewer than 91 elements.
Chroma collecti



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat is the relationship between Tommie and ​?
Context from memory:
- May 05, 2023, 11:24 PM: Tommie remembers his dog, Bruno, from when he was a kid
- May 05, 2023, 11:25 PM: Tommie sees the new home
- May 05, 2023, 11:25 PM: The road is noisy at night
- May 05, 2023, 11:25 PM: The new neighbors have a cat
- May 05, 2023, 11:39 PM: Tommie observed Tommie sees a cat and reacted by Tommie's reaction: Tommie might be curious about the cat and ask where it came from, or he might simply acknowledge its presence without saying anything.
- May 05, 2023, 11:25 PM: Tommie is hungry
- May 05, 2023, 11:24 PM: Tommie feels tired from driving so far
- May 05, 2023, 11:25 PM: Tommie tries to get some rest.
Relevant context:[0m

[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mName: Tommie (age: 25)
Innate traits: anxious, likes design, talkative
1. Tommie remem

Importance score: 8



[1m> Finished chain.[0m
(False, "Tommie's reaction: Tommie might be curious about the cat and ask where it came from, or he might simply acknowledge its presence without saying anything.")



llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time = 12674.51 ms /    53 tokens (  239.14 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time = 12675.86 ms


In [7]:
tommie.generate_dialogue("Dad", "Have you got a new job?")


llama_print_timings:        load time =  4570.63 ms
llama_print_timings:      sample time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings: prompt eval time =  5686.36 ms /     3 tokens ( 1895.45 ms per token)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per run)
llama_print_timings:       total time =  5687.03 ms
Chroma collection langchain contains fewer than 100 elements.
Chroma collection langchain contains fewer than 99 elements.
Chroma collection langchain contains fewer than 98 elements.
Chroma collection langchain contains fewer than 97 elements.
Chroma collection langchain contains fewer than 96 elements.
Chroma collection langchain contains fewer than 95 elements.
Chroma collection langchain contains fewer than 94 elements.
Chroma collection langchain contains fewer than 93 elements.
Chroma collection langchain contains fewer than 92 elements.
Chroma collection langchain contains fewer than 91 elements.
Chroma collecti



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
###USER: what is the most possible relationship between Tommie and Dad in the following observation? Do not embellish if you don't know. Do not return a list.
Observation: - May 05, 2023, 11:44 PM: Tommie observed Tommie sees his neighbor's cat and reacted by Tommie's reaction: Tommie might be curious about the cat and ask where it came from, or he might simply acknowledge its presence without saying anything.
- May 05, 2023, 11:24 PM: Tommie remembers his dog, Bruno, from when he was a kid
- May 05, 2023, 11:25 PM: Tommie sees the new home
- May 05, 2023, 11:25 PM: The road is noisy at night
- May 05, 2023, 11:39 PM: Tommie observed Tommie sees a cat and reacted by Tommie's reaction: Tommie might be curious about the cat and ask where it came from, or h

(False, '1. What to say: "No, I haven\'t found one yet."')