In [9]:
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp, GPT4All
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
n_gpu_layers = 1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.
n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.


In [None]:
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/Users/francescobassignana/models/meta-models/llama-2-13b-chat/13b-llama-ggml-model-q4_0.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    max_tokens=2048,
    callback_manager=callback_manager,
)

In [10]:
path = "/Users/francescobassignana/Library/Application Support/nomic.ai/GPT4All/mistral-7b-openorca.Q4_0.gguf"

llm = GPT4All(
    model=path,
)

# SummaryMemory

In [11]:
from langchain.chains.conversation.memory import ConversationSummaryMemory

memory = ConversationSummaryMemory(llm=llm)

# BufferMemory

In [None]:
from langchain.chains.conversation.memory import ConversationBufferMemory

memory = ConversationBufferMemory()

# BufferWindowMemory

In [6]:
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

#only keep the last 2 iterations
memory = ConversationBufferWindowMemory(k=2)

# ConversationSummaryBufferMemory

In [3]:
from langchain.chains.conversation.memory import ConversationSummaryBufferMemory

# k = 2 only keep the last 2 iterations
# max_token_limits = 2048 only keep the last 2048 tokens
memory = ConversationSummaryBufferMemory(llm=llm, max_token_limits=40, k=2)

In [12]:
from langchain.chains import ConversationChain

template = """
        [INST]
        <SYS_PROMPT>
            You are a knowledgeable assistant using GPT technology, skilled across many topics. Provide clear, informative, and relevant responses to user queries, engaging professionally and asking for clarification when needed.

            Do not offer personal opinions or unverified information. If unsure, admit it. Avoid unsafe, disrespectful, or sensitive content, adhering to guidelines and privacy without engaging in real-time updates or transactions.
            
            Respond to user preferences for brevity or detail, and do not provide additional information without user prompts.
        </SYS_PROMPT>
        Current conversation:
        {history}
        Friend: {input}
        AI Assistant:
        [/INST]
        """
prompt = PromptTemplate(template=template, input_variables=["history", "input"])

conversation = ConversationChain(llm=llm, memory=memory, verbose=True, prompt=prompt)

In [13]:
conversation.predict(input="Hello, I'm Sam")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
        [INST]
        <SYS_PROMPT>
            You are a knowledgeable assistant using GPT technology, skilled across many topics. Provide clear, informative, and relevant responses to user queries, engaging professionally and asking for clarification when needed.

            Do not offer personal opinions or unverified information. If unsure, admit it. Avoid unsafe, disrespectful, or sensitive content, adhering to guidelines and privacy without engaging in real-time updates or transactions.
            
            Respond to user preferences for brevity or detail, and do not provide additional information without user prompts.
        </SYS_PROMPT>
        Current conversation:
        
        Friend: Hello, I'm Sam
        AI Assistant:
        [/INST]
        [0m

[1m> Finished chain.[0m


"\n        Hi there! It's great to meet you, Sam. If you have any questions or need assistance with anything, feel free to ask and I will be more than happy to help."

In [None]:
conversation.predict(input="can you help me with some customer support?")

In [None]:
conversation.predict(input="My tv is not working")

In [None]:
print(conversation.memory.buffer)

In [None]:
conversation.predict(input="Thank you for your help, can you remember what's my name?")