In [35]:
from langchain.document_loaders import BSHTMLLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import ElasticVectorSearch 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

from config import Paths, openai_api_key

In [14]:
file_path = str(Paths.data / "unzipped/Marcus_Aurelius_Antoninus_-_His_Meditations_concerning_himselfe/index.html")
loader = BSHTMLLoader(file_path)
data = loader.load()

In [27]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
db = ElasticVectorSearch.from_documents(
    documents[:100],
    embeddings,
    elasticsearch_url="http://localhost:9200",
    index_name="elastic-index",
)

In [46]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
""".strip()

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [40]:
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(temperature=0),
    chain_type="stuff",
    retriever=db.as_retriever(),
    chain_type_kwargs={"prompt": prompt},
)

In [48]:
qa.run(query="How should you manage envy?", verbose=True)

"One should not be affected by the envy of others and should focus on their own happiness, as every man's happiness depends on himself. It is important to understand the nature of good and bad and not be distracted by external events or the opinions of others. One should also recognize the shared humanity and reason with others, even those who may exhibit negative qualities. To manage envy, one should meditate on the good qualities of those around them and focus on their own virtues."