In [None]:
from dotenv import load_dotenv
import os
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
from llama_index.memory import ChatMemoryBuffer
from llama_index.llms import OpenAI

In [None]:
load_dotenv(dotenv_path='../.env')
data = SimpleDirectoryReader(input_dir='../data/paul_graham/').load_data()
index = VectorStoreIndex.from_documents(data)

In [None]:
# condense the a conservation and the last question to make a condensed question.
chat_engine_condense = index.as_chat_engine(chat_mode="condense_question", verbose=True)


In [None]:
# retreive the context from the question

##Since the context retrieved can take up a large amount of the available LLM context, let’s ensure we configure a smaller limit to the chat history!
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
chat_engine_context = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about an essay discussing Paul Grahams life."
    ),
)

In [None]:
chat_engine_condense_and_context = index.as_chat_engine(
    chat_mode="condense_plus_context",
    memory=memory,
    context_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk"
        " about an essay discussing Paul Grahams life."
        "Here are the relevant documents for the context:\n"
        "{context_str}"
        "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
    ),
    verbose=False,


In [None]:
# react, launch the action according to question 
chat_engine_react = index.as_chat_engine(chat_mode="react", verbose=True)

Launch the chatbot

In [None]:
chat_engine = chat_engine_condense_and_context

In [None]:

response = chat_engine.chat("What did Paul Graham do after YC?")
print(response)
response = chat_engine.chat("What about after that?")
print(response)

To reset the chatbot

In [None]:
chat_engine.reset()
response = chat_engine.chat("What about after that?")
print(response)

Streamling the reponse

In [None]:
service_context = ServiceContext.from_defaults(
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0)
)

data = SimpleDirectoryReader(input_dir="../data/paul_graham/").load_data()

index = VectorStoreIndex.from_documents(data, service_context=service_context)
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
response = chat_engine.stream_chat("What did Paul Graham do after YC?")
for token in response.response_gen:
    print(token, end="")