In [1]:
# | default_exp llm/qna

In [2]:
# | exporti
from langchain_openai import ChatOpenAI

from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from langchain.chains import ConversationChain

from langchain_core.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
)
from langchain.memory import ChatMessageHistory, ConversationBufferMemory

from IPython.display import clear_output
from langchain_core.output_parsers import StrOutputParser

from langchain.memory import ConversationBufferWindowMemory

In [3]:
# |hide
import os

IP_ADDRESS = os.getenv("IP_ADDRESS")
assert IP_ADDRESS

# LLM Declaration

In [4]:
# | export
llm = ChatOpenAI(
    openai_api_base=f"http://{IP_ADDRESS}:1234/v1/",
    openai_api_key="not-needed",
    model="local_model",
    temperature=0.7,
)

# Basic Prompt Template with memory

In [5]:
prompt = PromptTemplate.from_template(
    """The following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know.  Do not generate tokens for the human.

Current conversation:
{history}

Human: {input}
AI:"""
)

In [6]:
conversation = ConversationChain(
    prompt=prompt,
    llm=llm,
    verbose=True,
    memory=ConversationBufferMemory(ai_prefix="AI Assistant"),
)

In [7]:
conversation.invoke("Hi!")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from its context. 
If the AI does not know the answer to a question, it truthfully says it does not know.  Do not generate tokens for the human.

Current conversation:


Human: Hi!
AI:[0m


KeyboardInterrupt: 

In [None]:
def generate_prompt(
    system_prompt="You are a nice chatbot having a conversation with a human.",
):
    return PromptTemplate.from_template(
        f"""The is the interaction between a machine and a human.
        The machine (AI)  provides a lot of specific details using its context
        the machine says I don't know if it does not know the answer

        Current conversation:
        {{chat_history}}
        Friend: {{input}}
        AI:"""
    )

In [None]:
prompt = generate_prompt()
print(
    prompt.format(
        system_prompt="You are a nice chatbot having a conversation with a human.",
        context="today is 2024-12-01",
        chat_history=[],
        input="what is today's date?",
    )
)

In [None]:
# |export
conv_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input")

In [None]:
# |export


def create_chain_with_memory_and_no_context(
    llm=llm, system_prompt="You are a succinct, but helpful QandA Bot"
) -> ConversationChain:
    prompt = generate_prompt(system_prompt)

    return ConversationChain(prompt=prompt, llm=llm, verbose=True, memory=conv_memory)

In [None]:
chain = create_chain_with_memory_and_no_context(llm=llm)

chain.stream("hi!")

In [None]:
chain.invoke("how many cards in a deck")

In [None]:
prompt = PromptTemplate.from_template(
    """{system_prompt}

The following story snippets describe events in your life.
----
{context}
----

Current conversation:
----
{chat_history}
----

Friend: {input}

Response:"""
)

In [None]:
# conv_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input")

# retriever = FAISS(
#     OpenAIEmbeddings().embed_query,
#     faiss.IndexFlatL2(1536),  # Dimensions of the OpenAIEmbeddings
#     InMemoryDocstore({}),
#     {},
# )

# context_memory = ConversationVectorStoreRetrieverMemory(
#     retriever=retriever.as_retriever(search_kwargs=dict(k=10)),
#     memory_key="context",
#     output_prefix="AI",
#     blacklist=["chat_history"],
# )

In [None]:
# |export


def create_chain(
    llm, user_input, system_prompt="You are a succinct, but helpful QandA Bot"
) -> ConversationChain:
    prompt = ChatPromptTemplate(
        messages=[
            SystemMessage.from_template(system_prompt),
            MessagesPlaceholder(variable_name="messages_history"),
            HumanMessagePromptTemplate.from_template(),
        ]
    )

    return prompt | llm

In [None]:
generate_chain_for_string(llm=llm)

In [None]:
# | export

from typing import Any
from queue import Queue, Empty
from langchain.llms import LlamaCpp
from langchain.callbacks.base import BaseCallbackHandler
from langchain.prompts import PromptTemplate
from threading import Thread


def generate_chain(
    llm,
    user_content,
    chat_history=None,
    system_prompt=None,
) -> AIMessage:
    system_prompt = system_prompt or "You are a Q and A Bot"

    chat_history = chat_history or ChatMessageHistory()

    chat_history.add_user_message(user_content)

    return chat_history

    # chain = generate_chain_for_string(system_prompt=system_prompt, llm=llm)

    # return llm(
    #     chain.stream({"messages": chat_history.messages} )
    # )

In [None]:
generate_chain(llm, user_content="hello world")

In [None]:
# | export
class MemoryManager:
    system_prompt: str
    chat_history = None
    llm

    def __init__(self, system_prompt, llm):
        self.llm = llm
        self.system_prompt = system_prompt

        self.chat_history = ChatMessageHistory()

    def add_user_message(self, content, is_print_stream: bool = False):
        self.chat_history.add_user_message(content)
        stream = self._get_ai_streaming_response()

        if is_print_stream:
            response = self._print_stream(stream)

        self.chat_history.add_ai_response(response)

    def _generate_promptchain(self) -> ConversationChain:
        return generate_chain(system_prompt=self.system_prompt, llm=self.llm)

    def _get_ai_streaming_response(self):
        chain = self._generate_promptchain()

        return chain.stream({"messages": self.chat_history.messages})

    def _print_stream(stream):
        response = []
        for chunk in stream:
            print(chunk, end="|", flush=True)
            response.append(chunk.content)

            clear_output(wait=True)
            response = "".join(response).strip()

            return response

In [None]:
mm = MemoryManager(system_prompt="You are a translator.  Be very succinct", llm=llm)

print(
    mm.add_user_message(
        "Translate this sentence from English to French: I love programming."
    )
)

In [None]:
print(mm.add_user_message("repeat what you just said?"))

In [None]:
# +| hide
import nbdev

nbdev.nbdev_export()

In [None]:
# !jupyter nbconvert --to python llm_qna.ipynb --output ./_test/llm.py