In [13]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
import os 
from dotenv import load_dotenv
from time import time
import warnings
warnings.filterwarnings('ignore')

In [15]:
loader = TextLoader('The Great Gatsyby.txt')
documents = loader.load()

In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [17]:
len(texts)

3971

In [19]:
texts[10]

Document(page_content='This eBook is made available at no cost and with almost no restrictions', metadata={'source': 'The Great Gatsyby.txt'})

In [20]:
_ = load_dotenv()

HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]

llm=HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta", 
    model_kwargs={"temperature":0.2, "max_length":256},
    huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
    )

In [21]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="BAAI/bge-base-en-v1.5",
                                                      model_kwargs={"device": "cuda"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [22]:
%%time
persist_directory = 'db_HuggingFace'

embedding = instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

CPU times: total: 9.28 s
Wall time: 16 s


In [23]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
docs = retriever.get_relevant_documents("The story of The Great Gatsyby")

In [25]:
len(docs)

2

In [26]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [27]:
def process_llm_response(qa_chain, query):
    print(f"Query: {query}\n")
    time_1 = time()
    llm_response = qa_chain(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print(f"\nResult:", llm_response['result'])
    print(f"\nmetadata:", llm_response['source_documents'][0].metadata)

In [28]:
query = "Who is Gatsyby?"
process_llm_response(qa_chain, query)

Query: Who is Gatsyby?

Inference time: 3.959 sec.

Result:  Based on the given context, it is not explicitly stated who Gatsby is, but it seems that the person being asked wants to meet or speak with Gatsby about something important. Without further context, it is unclear who Gatsby is or what his role is in the conversation.

metadata: {'source': 'The Great Gatsyby.txt'}


In [30]:
query = "What is the ending of the Great Gatsyby?"
process_llm_response(qa_chain, query)

Query: What is the ending of the Great Gatsyby?

Inference time: 0.552 sec.

Result:  In the final scene of the novel, Nick Carraway, the narrator, watches as Gatsby's body is taken away in a hearse. Gatsby's dream of recapturing the past and his love for Daisy have come to an end, and he has been killed in a violent confrontation. Nick reflects on the meaning of Gatsby's life and the illusions that have consumed the characters in the novel. The novel ends with Nick's decision to leave the East Egg and the world of the wealthy elite, symbolizing his rejection of the superficial values and corruption that he has witnessed.

metadata: {'source': 'The Great Gatsyby.txt'}


In [31]:
query = "What is the main story of the Great Gatsyby?"
process_llm_response(qa_chain, query)

Query: What is the main story of the Great Gatsyby?

Inference time: 5.55 sec.

Result:  The Great Gatsby is a novel by F. Scott Fitzgerald that tells the story of Jay Gatsby, a wealthy and mysterious man who throws extravagant parties in the hopes of rekindling a past romance with Daisy Buchanan, a married woman. The story is set in the 1920s and explores themes of wealth, social class, and the American Dream.

metadata: {'source': 'The Great Gatsyby.txt'}


In [32]:
query = "How many characters in The Great Gastsyby?"
process_llm_response(qa_chain, query)

Query: How many characters in The Great Gastsyby?

Inference time: 5.57 sec.

Result:  The novel The Great Gatsby, written by F. Scott Fitzgerald, features a cast of several main characters, including Jay Gatsby, Nick Carraway, Daisy Buchanan, Tom Buchanan, and Myrtle Wilson. There are also supporting characters such as George Wilson, Catherine the maid, and Owl Eyes. Overall, the novel has a diverse and complex group of characters that contribute to its themes and plot.

metadata: {'source': 'The Great Gatsyby.txt'}


In [33]:
query = "What is the relationship between Nick Carraway and Jordan Baker?"
process_llm_response(qa_chain, query)

Query: What is the relationship between Nick Carraway and Jordan Baker?

Inference time: 3.47 sec.

Result:  Based on the context provided, it is unclear what the exact nature of the relationship between Nick Carraway and Jordan Baker is. All we know is that they came out together and were sitting at a table with a man. Without further context, it is impossible to determine whether they are friends, acquaintances, or something more.

metadata: {'source': 'The Great Gatsyby.txt'}


In [34]:
query = "What is the relationship between Nick Carraway and Gatsyby?"
process_llm_response(qa_chain, query)

Query: What is the relationship between Nick Carraway and Gatsyby?

Inference time: 3.19 sec.

Result:  Nick Carraway is the narrator of the novel and he becomes friends with Gatsby. Their relationship starts out as acquaintances, but as Nick learns more about Gatsby's past and present, their friendship grows stronger. However, as the story unfolds, the true nature of their relationship becomes more complicated.

metadata: {'source': 'The Great Gatsyby.txt'}


In [36]:
query = "What is the relationship between Nick and Daisy Buchanan?"
process_llm_response(qa_chain, query)

Query: What is the relationship between Nick and Daisy Buchanan?

Inference time: 3.409 sec.

Result:  Nick refers to Daisy as his second cousin once removed, indicating that they are related but not very closely. He also mentions that Daisy invited Tom, whom she knew before Nick and Daisy were married, further suggesting that Nick and Daisy have a familial relationship. However, the exact nature of their relationship beyond being second cousins is not explicitly stated in these pieces of context.

metadata: {'source': 'The Great Gatsyby.txt'}


In [37]:
query = "How does Nick meet Gatsyby?"
process_llm_response(qa_chain, query)

Query: How does Nick meet Gatsyby?

Inference time: 2.524 sec.

Result:  Nick meets Gatsby through his friend, Jordan Baker, who mentions him in conversation. Later, Nick receives an invitation from Gatsby to attend one of his extravagant parties. It is at this party that Nick and Gatsby first meet in person.

metadata: {'source': 'The Great Gatsyby.txt'}


In [39]:
query = "Describe ?"
process_llm_response(qa_chain, query)

Query: What does Gatsyby pursue in his life?

Inference time: 5.249 sec.

Result:  Based on the context provided, it is unclear what Gatsby pursues in his life. The first statement seems to suggest that the speaker wants information from Gatsby, but it does not provide any insight into what Gatsby's life is focused on. The second statement is ambiguous and could mean that Gatsby's name is associated with something in the speaker's office, but again, it does not provide any information about Gatsby's pursuits. Without further context, it is impossible to determine what Gatsby is pursuing in his life.

metadata: {'source': 'The Great Gatsyby.txt'}


In [22]:
query = "Where did the study take place?"
process_llm_response(qa_chain, query)

Query: Where did the study take place?

Inference time: 0.461 sec.

Result:  The study was supported by the Seed Funding Support for Thesis Research 2019–20, Faculty of Social Sciences, which suggests that the study may have taken place in a university or academic setting associated with the Faculty of Social Sciences. However, without further information, it is unclear where the study actually took place.

metadata: {'page': 9, 'source': 'PDF_file\\s41598-023-47912-0.pdf'}


In [23]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

('similarity', <langchain.vectorstores.chroma.Chroma at 0x19e73984110>)

In [24]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:
