# Read OpenAI API key

In [None]:
import os
from dotenv import load_dotenv

load_dotenv("secrets.env")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

# Call to OpenAI API

**Advantages and disadvantages of using OpenAI API:**

**Pros**

1. **Easy to use:** The API is well-documented and easy to integrate into various applications.
2. **Versatility**: The API can be used for a wide range of tasks, including text generation, translation, image generation, code explanation, and more.
3. **Continuous Improvement:** OpenAI continually updates and improves its models, so your chatbot can benefit from cutting-edge advancements in AI without needing significant internal resources.

**Cons**

1. **Cost:** Using the API extensively can get expensive. You'll be charged based on the number of tokens or images processed.
2. **No customization options:** You can't control or fine-tune how models are trained. You're limited to the models provided by OpenAI.
3. **Data Privacy Concerns:** Using a third-party API may raise concerns about data privacy and security, particularly in industries that handle sensitive customer information.
4. **Dependence on a third party service:** Relying on an external API means you're dependent on its uptime, updates, and potential changes in pricing or terms of service.

In [None]:
from openai import OpenAI

client = OpenAI(api_key=OPENAI_API_KEY)
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "What is AI?",
        }
    ],
    model="gpt-3.5-turbo",
)

print(chat_completion.choices[0].message.content)

If you want to learn more about OpenAI API check this [link](https://platform.openai.com/docs/overview)

# Do I need to write utility classes on my own? - Utilise LangChain

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

model = ChatOpenAI(model="gpt-3.5-turbo", api_key=OPENAI_API_KEY)
response = model.invoke([HumanMessage(content="What is AI?")])

print(response.content)

In [None]:
response = model.invoke([HumanMessage(content="It is really interesting what you have written. Can you say more about it?")])
print(response.content)

As you can see - the model by itself doesn't remember the history of messages. We need to pass all messages to get the desired answers.

In [None]:
from langchain_core.messages import AIMessage

response = model.invoke(
    [
        HumanMessage(content="What is AI?"),
        AIMessage(
            content="AI, or artificial intelligence, refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. AI technology aims to mimic human cognitive functions and improve efficiency and accuracy in tasks that were previously only achievable by humans."
        ),
        HumanMessage(content="It is really interesting what you have written. Can you say more about it?"),
    ]
)

print(response.content)

More on integrating with OpenAI using Langchain [here](https://python.langchain.com/v0.2/docs/integrations/chat/openai/).

# Message History

In [None]:
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]


chain_with_history = RunnableWithMessageHistory(model, get_session_history)

In [None]:
config = {"configurable": {"session_id": "session_1"}}

response = chain_with_history.invoke(
    [HumanMessage(content="What is AI?")],
    config=config,
)

print(response.content)

In [None]:
response = chain_with_history.invoke(
    [HumanMessage(content="It is really interesting what you have written. Can you say more about it?")],
    config=config,
)

print(response.content)

After a session_id change, another user will not have access to the previous conversation history.

In [None]:
config = {"configurable": {"session_id": "session_2"}}

response = chain_with_history.invoke(
    [HumanMessage(content="What was I asking about earlier?")],
    config=config,
)

print(response.content)

All on message history with Langchain [here](https://python.langchain.com/v0.2/docs/how_to/message_history/).

## Different types of memories

- all messages (presented earlier)
- trim messages
    - (+) helps to reduce context
    - (-) older parts of conversation will be discarded and not passed to LLM
- summarize conversation
    - (+) helps to reduce context
    - (-) some parts of conversation during summarization might be lost, but will better preserve older parts of conversation than trimming

### Trim messages

In [None]:
from langchain_core.runnables import RunnablePassthrough


# Trim to four messages
def trim_messages(chain_input):
    current_store = store[config["configurable"]["session_id"]]
    if len(current_store.messages) <= 4:
        return False

    current_store.clear()

    for message in current_store.messages[-4:]:
        current_store.add_message(message)

    return True


chain_with_trimming = RunnablePassthrough.assign(messages_trimmed=trim_messages) | chain_with_history

response = chain_with_trimming.invoke(
    {"input": "My name is Bob."},
    config=config,
)
print(f"1: {response.content}")

response = chain_with_trimming.invoke(
    {"input": "What is my name?"},
    config=config,
)
print(f"2: {response.content}")

response = chain_with_trimming.invoke(
    {"input": "What is my name?"},
    config=config,
)
print(f"3: {response.content}")

### Summarize conversation

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


def summarize_messages(chain_input):
    current_store = store[config["configurable"]["session_id"]]
    if len(current_store.messages) == 0:
        return False

    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            (
                "user",
                "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
            ),
        ]
    )

    summarization_chain = summarization_prompt | model
    summary_message = summarization_chain.invoke({"chat_history": current_store.messages})

    current_store.clear()
    current_store.add_message(summary_message)

    return True


chain_with_summarization = RunnablePassthrough.assign(messages_summarized=summarize_messages) | chain_with_history

response = chain_with_summarization.invoke(
    {"input": "My name is Bob."},
    config=config,
)
print(f"1: {response.content}")

response = chain_with_summarization.invoke(
    {"input": "With what name did I introduce myself?"},
    config=config,
)
print(f"2: {response.content}")

response = chain_with_summarization.invoke(
    {"input": "With what name did I introduce myself?"},
    config=config,
)
print(f"3: {response.content}")
print(store[config["configurable"]["session_id"]].messages)

# Prompting techniques

- Use delimiters to clearly indicate distinct parts of the input
- Ask for a structured output
- Ask the model to check whether conditions are satisfied
- "Few-shot" prompting
- Specify the steps required to complete a task

## Use delimiters to clearly indicate distinct parts of the input

In [None]:
text = f"""
You should express what you want a model to do by
providing instructions that are as clear and
specific as you can possibly make them.
This will guide the model towards the desired output,
and reduce the chances of receiving irrelevant
or incorrect responses. Don't confuse writing a
clear prompt with writing a short prompt.
In many cases, longer prompts provide more clarity
and context for the model, which can lead to
more detailed and relevant outputs.
"""
prompt = f"""
Summarize the text delimited by triple backticks \
into a single sentence.
```{text}```
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

## Ask for a structured output

In [None]:
prompt = """
Generate a list of three made-up book titles along with their number of pages (int), release date (yyyy-mm-dd) and if cover is hard or not (boolean).
Provide them in a yaml structured output format where title name is a key with three elemnts: number of pages, release date and if cover type.
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

### Task - try to rewrite this prompt to get the results in JSON format

In [None]:
prompt = """
Place for your prompt
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

## Ask the model to check whether conditions are satisfied

In [None]:
text_1 = f"""
Preheat the oven to 350°F (175°C).
Mix 1 cup of softened butter, 1 cup of sugar, 2 cups of flour, and 1 tsp vanilla extract until combined.
Scoop spoonfuls onto a baking sheet and bake for 10-12 minutes, until golden brown.
Let cool, and enjoy!
"""
prompt = f"""
You will be provided with text delimited by triple quotes.
If it contains a sequence of instructions,
re-write those instructions in the following format:

Step 1 - ...
Step 2 - …
…
Step N - …

If the text does not contain a sequence of instructions,
then simply write \"No steps provided.\"

\"\"\"{text_1}\"\"\"
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

In [None]:
text_2 = f"""
A short story is a piece of prose fiction. It can typically be read
in a single sitting and focuses on a self-containedincident or series
of linked incidents, with the intent of evoking a single effect or mood.
"""
prompt = f"""
You will be provided with text delimited by triple quotes.
If it contains a sequence of instructions,
re-write those instructions in the following format:

Step 1 - ...
Step 2 - …
…
Step N - …

If the text does not contain a sequence of instructions,
then simply write \"No steps provided.\"

\"\"\"{text_2}\"\"\"
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

## "Few-shot" prompting

In [None]:
prompt = f"""
Your task is to answer in a consistent style.

<child>: Teach me about patience.

<grandparent>: The river that carves the deepest
valley flows from a modest spring; the
grandest symphony originates from a single note;
the most intricate tapestry begins with a solitary thread.

<child>: Teach me about perseverance.
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

## Specify the steps required to complete a task

In [None]:
text = """
Preheat the oven to 350°F (175°C).
Mix 1 cup of softened butter, 1 cup of sugar, 2 cups of flour, and 1 tsp vanilla extract until combined.
Scoop spoonfuls onto a baking sheet and bake for 10-12 minutes, until golden brown.
Let cool, and enjoy!
"""

prompt = f"""
I will give you text delimited with ###.
Perform the following actions:

1 - If it contains the instruction, rewrite them in the following format:

A - <instruction 1>
B - <instruction 2>
C - <instruction 3>

2 - Translate each instruction to one of the following languages: spanish, french, german, polish, norwegian.
3 - Output a list of objects in a json format. Each object in a list is for one translated sentence and contains the following keys:
original_sentence, translated_sentence, translation_language. There should be as many objects as instructions in the previous step.
4 - If it does not contin instructions output json object with a key \"error" and value for it which will be an error message

###{text}###
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

### Task - Write a prompt that generates three different made-up movies and their descriptions. Each movie name and description should be in a different style (poem, pirate-like, William Shakespeare). The output should be an HTML table. Use all of the aforementioned techniques.

In [None]:
prompt = """
Place for your prompt.
"""

response = model.invoke([HumanMessage(content=prompt)])
print(response.content)

If you want to learn more about prompting techniques check [this website](https://www.promptingguide.ai/techniques).

# Prompt templates

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant and a pirate. Answer all questions to the best of your ability, but remember to use pirate-like english.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | model
pirate_chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

response = pirate_chain_with_history.invoke({"input": "What is AI?"}, config=config)
print(response.content)

Langchain prompt templates [guide](https://python.langchain.com/v0.2/docs/concepts/#prompt-templates).

# Build RAG - add vector storage
## Initialize vector storage

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=OPENAI_API_KEY)

vector_store = Chroma(
    collection_name="shrek_collection", embedding_function=embeddings, persist_directory="./chroma_langchain_db"
)

## Add documents

Let's start by creating storage where text chunks are quite big...
Creating larger chunks results in more text being passed as context. It may improve the accuracy of our LLM's answer, but it increases the cost of the API call.

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = PyPDFLoader("./docs/shrek-script.pdf")
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
chunked_documents = text_splitter.split_documents(document)

vector_store.add_documents(chunked_documents)

## Ask and wait for answears :)

In [None]:
config = {"configurable": {"session_id": "rag_session_1"}}

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a helpful assistant and a literature specialist. Answer all questions to the best of your ability.
            During answearing use this context from documents: {context}. If you do not know the answear - do not provide it.
            Answear as short as possible, preferably in one sentence""",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | model

rag_chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)


retriever = vector_store.as_retriever()
rag_chain = (
    {
        "context": retriever | format_docs,
        "input": RunnablePassthrough(),
    }
    | rag_chain_with_history
)

response = rag_chain.invoke("What was the name of the princess?", config=config)
print(response.content)

Let's try to make the chunk size smaller to reduce costs by providing fewer tokens in context. As you will see, larger chunks do not always give better results.
You have to find a chunk size that is not too big (we don't want our context to be too big) and that is sufficient for our LLM to get an answer from it.

In [None]:
vector_store = Chroma(
    collection_name="shrek_collection_small", embedding_function=embeddings, persist_directory="./chroma_langchain_db"
)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
chunked_documents = text_splitter.split_documents(document)

vector_store.add_documents(chunked_documents)

config = {"configurable": {"session_id": "rag_session_2"}}

retriever = vector_store.as_retriever()
rag_chain = {
    "context": retriever | format_docs,
    "input": RunnablePassthrough(),
} | rag_chain_with_history

response = rag_chain.invoke("What was the name of the princess shrek wanted to resque?", config=config)
print(response.content)

You can find more about using Chroma with Langchain in [docs](https://python.langchain.com/v0.2/docs/integrations/vectorstores/chroma/).

## Vector storage search types
- similarity (default)
- mmr
- similarity_score_threshold

More on this topic [here](https://python.langchain.com/v0.1/docs/modules/data_connection/vectorstores/).

### Task - set up how many documents (3) should be retrieved from our vector storage using "mmr" search type

Try to set up how many documents should be retrieved from our vector storage (and passed to our prompt). It is another hyperparameter which when well tuned can help us reduce api call costs without losing answear precision. Also - change used search algorithm to "mmr".
[Link to docs](https://python.langchain.com/v0.2/docs/integrations/vectorstores/chroma/#query-directly)

In [None]:
# Place for your code

If you want to learn more about components used in this section, check out those links:

- [document loaders](https://python.langchain.com/v0.2/docs/how_to/#document-loaders)
- [text splitters](https://python.langchain.com/v0.2/docs/how_to/#text-splitters)
- [embedding models](https://python.langchain.com/v0.2/docs/how_to/#embedding-models)
- [retrievers](https://python.langchain.com/v0.2/docs/how_to/#embedding-models)

# Put it all together - Demo with Frontend

In [None]:
import panel as pn

pn.extension()


async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    message = ""
    for response in rag_chain.stream(contents, config=config):
        message += response.content
        yield message


config = {"configurable": {"session_id": "rag_session_2"}}
chat_interface = pn.chat.ChatInterface(callback=callback, callback_user="Shrek director")
chat_interface.servable()

# Additional task - use HuggingFace model

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from huggingface_hub import login

HF_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")

login(token=HF_TOKEN)

llm = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        return_full_text=False,
        do_sample=True,
        repetition_penalty=1.03,
        temperature=0.1
    ),
)

hg_model = ChatHuggingFace(llm=llm)
chain = prompt | hg_model
hg_chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)
rag_chain = {
    "context": retriever | format_docs,
    "input": RunnablePassthrough(),
} | hg_chain_with_history

response = rag_chain.invoke("Who was Shrek's partner when rescuing Fiona?", config=config)
print(response.content)

If you are interested, check more information about integrating with Hugging Face:

- [Integrating with Hugging Face using Langchain](https://python.langchain.com/v0.1/docs/integrations/platforms/huggingface/)
- [Hugging Face docs](https://huggingface.co/docs)