# About
Here, I build a Q&A LLM chain to read the book "Time Machine" and answer questions about the book.

The LLM model I ued is `google/flan-t5-large` model (through Hugging Face).

# Settings

In [1]:
# Langchain related 
from langchain.document_loaders import GutenbergLoader
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma


from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

In [7]:
import os
import sys

sys.path.append('../')
# put Huggingface API token in an py file (..config/token_access.py) and load it
from config import token_access

# set HUGGINGFACEHUB_API_TOKEN to my token_access
os.environ["HUGGINGFACEHUB_API_TOKEN"] =token_access.token_access


### Variables that require changes

In [None]:
#----------------#
# data related
#----------------#
# a variable name used to define specific path
author="HGWells"

# book txt file link
book_link ="https://www.gutenberg.org/cache/epub/35/pg35.txt"


#----------------#
# model related
#----------------#
#  embeddings
embeddings_model_id ="sentence-transformers/all-MiniLM-L6-v2"

# llm model used for reading the book
llm_model_id ="google/flan-t5-large"


#----------------#
# your project root path 
#----------------#
main_Dir = "../book-reader"


### Directories

In [8]:
#----------------#
# data dir
#----------------#
data_Dir = os.path.join(main_Dir,"data")

# embeddings dir
embedding_string= f"{embeddings_model_id}".replace("/", "_").replace("-","_")

# hugging face llm pipeline model dir
embedding_Dir = os.path.join(data_Dir,f"{author}_{embedding_string}")

#----------------#
# model dir
#----------------#
model_Dir= os.path.join(main_Dir,"model")
cache_Dir = os.path.join(model_Dir,"cache")
                           
#----------------#
# make dirs
#----------------#
for f in [data_Dir, embedding_Dir, model_Dir, cache_Dir]:
    os.makedirs(f, exist_ok=True)


# Read a book 
### "The Time Machine by H. G. Wells" from Gutenberg Project

In [9]:
# load the book 
book_loader = GutenbergLoader(book_link)  

document = book_loader.load()

## Chunking and Embeddings 
 * split data into chunks using a CharacterTextSplitter
 * use Hugging Face's embedding LLM to embed this data for our vector store

In [10]:
# chunk sizes of 1024 and an overlap of 256 
text_splitter = CharacterTextSplitter(chunk_size =1024, chunk_overlap=256) 
texts = text_splitter.split_documents(document)


In [13]:
# "sentence-transformers/all-MiniLM-L6-v2" as model
embeddings = HuggingFaceEmbeddings(
  model_name = embeddings_model_id,
  cache_folder=cache_Dir
)

# Make Index using chromadb and the embeddings LLM
chromadb_index = Chroma.from_documents(
  texts,
    embeddings,
    persist_directory=embedding_Dir
)

## Creating a QA LLM Chain
This chain will be used to do QA on the document. We will need
 * A vector database that can perform document retrieval
 * An LLM to do the language interpretation
 * Specification on how to deal with this data


In [15]:
# Convert index to retriever (A wrapper around the functionality of our vector database so we can search for similar documents/chunks in the vectorstore and retrieve the results)
retriever = chromadb_index.as_retriever()

# llm
hf_llm = HuggingFacePipeline.from_model_id(
    model_id=llm_model_id,
    task="text2text-generation",
    model_kwargs={
        "temperature": 0,
        "max_length": 128,
        "cache_dir": cache_Dir,
    },
)


tokenizer_config.json: 100%|██████████████████████████████████████| 2.54k/2.54k [00:00<00:00, 590kB/s]
spiece.model: 100%|████████████████████████████████████████████████| 792k/792k [00:00<00:00, 4.84MB/s]
tokenizer.json: 100%|████████████████████████████████████████████| 2.42M/2.42M [00:00<00:00, 5.25MB/s]
special_tokens_map.json: 100%|████████████████████████████████████| 2.20k/2.20k [00:00<00:00, 501kB/s]
config.json: 100%|███████████████████████████████████████████████████| 662/662 [00:00<00:00, 56.9kB/s]
model.safetensors: 100%|█████████████████████████████████████████| 3.13G/3.13G [07:43<00:00, 6.75MB/s]
generation_config.json: 100%|████████████████████████████████████████| 147/147 [00:00<00:00, 13.0kB/s]


### Options: stuff, map_reduce, refine, map_rerank
* stuff - Stuffing is the simplest method, whereby you simply stuff all the related data into the prompt as context to pass to the language model.
 * map_reduce - This method involves running an initial prompt on each chunk of data (for summarization tasks, this could be a summary of that chunk; for question-answering tasks, it could be an answer based solely on that chunk).
 * refine - This method involves running an initial prompt on the first chunk of data, generating some output. For the remaining documents, that output is passed in, along with the next document, asking the LLM to refine the output based on the new document.
 * map_rerank - This method involves running an initial prompt on each chunk of data, that not only tries to complete a task but also gives a score for how certain it is in its answer. The responses are then ranked according to this score, and the highest score is returned.

In [25]:
# qa retriever
chain_type = "stuff" 
book_qa = RetrievalQA.from_chain_type(
    llm=hf_llm, 
    chain_type=chain_type, 
    retriever=retriever
)

## Q&A

### Did ok

In [33]:
question = "Who is the author of Time Machine?"
answer = book_qa.run(question)
display(answer)

'H. G. Wells'

In [38]:
question = "Who invented the Time Machine?"
answer = book_qa.run(question)
display(answer)

'The Time Traveller'

In [40]:
question = "How do Eloi look like?"
answer = book_qa.run(question)
display(answer)

'The Eloi, like the Carlovignan kings, had decayed to a mere beautiful futility'

In [41]:
question = "How do Morlock look like?"
answer = book_qa.run(question)
display(answer)

'half-bleached colour of the worms'

In [45]:
question = "In this novella, what is the fourth dimension?"
answer = book_qa.run(question)
display(answer)

'another way of looking at Time'

In [49]:
question = "What temporarily blinds the Morlocks?"
answer = book_qa.run(question)
display(answer)

'a fence of fire'

### Did not do well

In [29]:
question = "Who wrote this book?"
answer = book_qa.run(question)
display(answer)

'I'

In [36]:
question = "Who is Filby?"
answer = book_qa.run(question)
display(answer)

'The Time Traveller'

In [51]:
question = "What scientific principle does the Time Traveler's visit to the decaying world thirty millions in the future illustrate?"
answer = book_qa.run(question)
display(answer)

'velocity'

In [52]:
question = "Where is the Time Machine held?"
answer = book_qa.run(question)
display(answer)


'the workshop'

In [48]:
question = "What do the Eloi eat?"
answer = book_qa.run(question)
display(answer)

'meat'

# Save 

In [None]:
# save hugging face llm pipeline
hf_llm_string= f"{llm_model_id}".replace("/", "_").replace("-","_")

# hugging face llm pipeline model dir
hf_llm_Dir = os.path.join(model_Dir,f"{author}_{hf_llm_string}")

# create dir if not existing 
for f in [hf_llm_Dir]:
    os.makedirs(f, exist_ok=True)


# save huggingface pipeline 
hf_llm.save(os.path.join(hf_llm_Dir,f"{hf_llm_string}.json"))