In [2]:
# for preprocessing html data  
!pip install beautifulsoup4

# For RAG
!pip install langchain
!pip install langchainhub
!pip install chromadb
!pip install gpt4all

!pip install tqdm

# Needed if using LlamaCpp from LangChain
# !pip install llama-cpp-python




In [3]:
from tqdm import tqdm

# To deal with emails 
import email
from email.policy import default
from bs4 import BeautifulSoup

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain_core.output_parsers import StrOutputParser
from langchain.docstore.document import Document
from langchain import hub
from langchain_core.runnables import RunnablePassthrough




# Preprocess the Promotion Emails

In [6]:
# code from Stack Overflow:
# https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python
class MboxReader:
    def __init__(self, filename):
        self.handle = open(filename, 'rb')
        assert self.handle.readline().startswith(b'From ')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.handle.close()

    def __iter__(self):
        return iter(self.__next__())

    def __next__(self):
        lines = []
        while True:
            line = self.handle.readline()
            if line == b'' or line.startswith(b'From '):
                yield email.message_from_bytes(b''.join(lines), policy=default)
                if line == b'':
                    break
                lines = []
                continue
            lines.append(line)

In [7]:
path = "./Takeout/Mail/Category_promotions.mbox"
mbox = MboxReader(path)
emails_to_process = 10

current_mails = 0
promo_contents = ""
for __, message in tqdm(enumerate(mbox)):
    payload = message.get_payload(decode=True)
    if payload:
        current_mails += 1
        if current_mails > emails_to_process:
            break
        soup = BeautifulSoup(payload, 'html.parser')
        body_text = soup.get_text().replace('"','').replace("\n", "").replace("\t", "").strip()
        promo_contents += body_text + " "

13it [00:00, 20.89it/s]


In [8]:
promo_contents

'autoTRADER | autoHEBDOCould this be your next car? Check it out at AutoTrader \r            Find out what your car is worth instantly.\xa0\xa0Check  NowNew Price Drops for New & Used 2017 - 2024 Cars for sale in Toronto 32019 Chevrolet Cruze Mississauga, ON\xa0  \r59,000 km\xa0 Mississauga, ON\r$11,000View2019 Nissan LEAF Mississauga, ON\xa0  \r49,300 km\xa0 Mississauga, ON\r$12,500View Your Search Criteria:\xa0New & Used 2017 - 2024 Cars for sale in Toronto 3\r\xa0\rMake:Any\rLocation:Toronto, ON\rModel:Any\rSearch Radius:50 km \r\xa0View ListingsDo more on AutoTrader\r    \xa0Latest Reviews and Advice\r            \xa0Check out our editorial page for all of the latest information, advice, and expert reviews on the world of vehicles!\xa0\r            \xa0View Now\r                \xa0\r \xa0\r    \xa0Find Out What Your Car is Worth\xa0There is no better time to sell your car! Use Instant Cash Offer for an accurate trade-in value for your vehicle. Just enter your vehicle details and i

In [9]:
# write the arxiv emails into a txt file
with open("promo_contents.txt", "w", encoding="utf-8") as f:
 f.write(promo_contents)

### convert text document to langchain document format

In [10]:
doc = Document(page_content=promo_contents, 
                metadata={"source": "local"})

# split into different chunks
# chunk_size and chunk_overlap are a hyperparameters we choose 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)

# split the documents and convert to vector stores
all_splits = text_splitter.split_documents([doc])
vector_store = Chroma.from_documents(documents=all_splits, 
                                     embedding=GPT4AllEmbeddings())

### If using fine-tuned model, quantize to GGUF Format
For fine-tuning a given LLM, checkout [this video](https://youtu.be/_xxGMSVLwU8?feature=shared)

For quantizing, we can either: 
- Use the llama.cpp library written in C,C++ for this. Checkout [this video](https://youtu.be/j7ahltwlFH0?feature=shared)
- Or we can use LangChain's functionality. 

In any case we need the model to be converted to `gguf` format to run on the CPU.

## Load the quantize LLM model 
For the quantized model, we can either, 
- Use the LlamaCpp class from LangChain
- Use the GPT4All library

In any case, the models need to be quantized models

### Either use LlamaCpp as below

In [62]:
n_gpu_layers = 1 

n_batch = 512
quantized_gguf_model = "../generative-ai-course/quantized_models/ft-Q8_K_M.gguf"

# Initiate the LlamaCpp class to run the LLM
llm = LlamaCpp(
    model_path=quantized_gguf_model,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=1024,
    f16_kv=True,
    verbose=True,
)

ImportError: Could not import llama-cpp-python library. Please install the llama-cpp-python library to use this embedding model: pip install llama-cpp-python

### Or use GPT4All as below

In [11]:
from langchain_community.llms import GPT4All
#from gpt4all import GPT4All

llm = GPT4All(
    model="orca-mini-3b-gguf2-q4_0.gguf",
    max_tokens=2048, allow_download = True
)
# model = GPT4All(
#     "orca-mini-3b-gguf2-q4_0.gguf"
# )

#llm.invoke("what is Retrieval Augmented Generation?")

In [12]:
llm.invoke("What is the price of mazda?")

'\nWhat is the price of Mazda3?'

## Create the LangChain with and without RAG
For a given prompt, 
- Create a langchain without retrieval and see the response
- Create a langchain with the retrieval object

And see how the response differs

### LLM response without RAG

In [13]:
# retrieve relevant docs
rag_prompt = hub.pull("rlm/rag-prompt")
retriever = vector_store.as_retriever()

# Create the langchain with retriever
qa_chain = (
    {"context": {}, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)
qa_chain.invoke("what is the value of a mazda3?")

  prompt = loads(json.dumps(prompt_object.manifest))


' I am sorry, but I do not have enough information to provide an accurate answer to your question. Can you please provide me with more context or specific details about the Mazda3 that you are referring to?'

### LLM response with RAG

In [14]:
# retrieve relevant docs
rag_prompt = hub.pull("rlm/rag-prompt")
rag_prompt.messages



[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [15]:
def format_documents(documents):
    return "\n\n".join(doc.page_content for doc in documents)

In [16]:
retriever = vector_store.as_retriever()

# Create the langchain with retriever,
# prompt template and LLM
qa_chain = (
    {"context": retriever | format_documents, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)
qa_chain.invoke("what is the average price of Mazda3?")

' The average price of Mazda3 is $19,500.'