### Install libraries and download data

In [1]:
!pip install -q llama-index llama-index-llms-openai gradio

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.9/315.9 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.5/142.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m33.4 MB/s[

In [2]:
!mkdir -p 'data/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham_essay.txt'

--2024-05-29 13:34:13--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘data/paul_graham_essay.txt’


2024-05-29 13:34:13 (3.20 MB/s) - ‘data/paul_graham_essay.txt’ saved [75042/75042]



In [4]:
import os
import gradio as gr
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.openai import OpenAI

In [5]:
# only for Google Colab; please comment Kaggle part in this case
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# only for Kaggle; please comment Google Colab part in this case
#from kaggle_secrets import UserSecretsClient
#user_secrets = UserSecretsClient()
#os.environ["OPENAI_API_KEY"] = user_secrets.get_secret("OPENAI_API_KEY")

## Chatbot with internal knowledge

For each chat interaction:
- first generate a standalone question from conversation context and last message, then
- query the query engine with the condensed question for a response.




In [6]:
llm = OpenAI(model="gpt-3.5-turbo")
data = SimpleDirectoryReader(input_dir="./data/").load_data()
index = VectorStoreIndex.from_documents(data)

In [7]:
from llama_index.core.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

context_prompt = """
You are a chatbot, able to have normal interactions, as well as talk \n
about an essay discussing Paul Grahams life \n
Here are the relevant documents for the context: \n
{context_str} \n
Instruction: Use the previous chat history, or the context above, to interact and help the user.
"""

chat_engine = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    llm=llm,
    context_prompt=context_prompt,
    verbose=False,
)

### Chat with your data

In [8]:
response = chat_engine.chat("What did Paul Graham do growing up")
response.response

"Paul Graham had a diverse range of experiences growing up. He was involved in painting and experimenting with new techniques in still life painting. Later on, he delved into the world of web applications, realizing their potential and founding companies like Viaweb and Aspra. Additionally, he explored the realm of programming languages, creating a new dialect of Lisp called Arc. Graham's journey also involved writing essays, working on spam filters, and engaging in various entrepreneurial ventures, ultimately leading to the creation of Y Combinator, an influential startup accelerator."

### Ask follow-up question

In [9]:
response_2 = chat_engine.chat("Can you tell me more?")
response.response

"Paul Graham had a diverse range of experiences growing up. He was involved in painting and experimenting with new techniques in still life painting. Later on, he delved into the world of web applications, realizing their potential and founding companies like Viaweb and Aspra. Additionally, he explored the realm of programming languages, creating a new dialect of Lisp called Arc. Graham's journey also involved writing essays, working on spam filters, and engaging in various entrepreneurial ventures, ultimately leading to the creation of Y Combinator, an influential startup accelerator."

### Use with Gradio

In [12]:
def predict(message, history):
    response = chat_engine.chat(message)
    return response.response

# Launch the Gradio interface with the prediction function
gr.ChatInterface(fn=predict, title="AI Chatbot with custom knowledge", description="Answering questions about Paul Graham essay").launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6f59875242a0de2efe.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




### Reset chat engine

In [13]:
chat_engine.reset()

## Chat Engine - ReAct Agent Mode

ReAct is an agent based chat mode built on top of a query engine over your data.

For each chat interaction, the agent enter a ReAct loop:

- first decide whether to use the query engine tool and come up with appropriate input
- (optional) use the query engine tool and observe its output
- decide whether to repeat or give final response

In [14]:
chat_engine = index.as_chat_engine(chat_mode="react", llm=llm, verbose=True)

In [15]:
response = chat_engine.chat(
    "Use the tool to answer what Graham do in the summer of 1995?"
)
print(response)

Added user message to memory: Use the tool to answer what Graham do in the summer of 1995?
=== Calling Function ===
Calling function: query_engine_tool with args: {"input":"What did Graham do in the summer of 1995?"}
Got output: In the summer of 1995, Graham started working on a new kind of still life painting technique where he would paint one piece in the traditional way, then photograph it, print it on canvas, and use it as the underpainting for a second still life painting.

In the summer of 1995, Graham started working on a new kind of still life painting technique where he would paint one piece in the traditional way, then photograph it, print it on canvas, and use it as the underpainting for a second still life painting.


In [16]:
response = chat_engine.chat("Is there any mention about 1995?")
print(response)

Added user message to memory: Is there any mention about 1995?
=== Calling Function ===
Calling function: query_engine_tool with args: {"input":"Is there any mention about 1995?"}
Got output: No, there is no mention of the year 1995 in the provided context information.

No, there is no mention of the year 1995 in the provided context information.


In [17]:
response = chat_engine.chat("Who is Klaus Iohannis?")
print(response)

Added user message to memory: Who is Klaus Iohannis?
=== Calling Function ===
Calling function: query_engine_tool with args: {"input":"Who is Klaus Iohannis?"}
Got output: I cannot provide an answer to that query as there is no mention or relevance to Klaus Iohannis in the provided context information.

I cannot provide an answer to that query as there is no mention or relevance to Klaus Iohannis in the provided context information.


More examples on https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_condense_plus_context/