There are 3 models.
- Base pretrained model with One Shot Promt
- Semantic Vector Base
- RAG connected LLM Flaten Base

In [None]:
!pip install langchain==0.1.5
!pip install --quiet langchain_experimental
!pip install torch
!pip install transformers
!pip install faiss-cpu
!pip install pypdf
!pip install sentence-transformers
!pip install unstructured==0.12.3
!pip install unstructured[pdf]==0.12.3
!pip install tiktoken
!pip install huggingface_hub
from huggingface_hub import login

hf_token = "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"
login(token=hf_token, add_to_git_credential=True)

# Getting the model and tokenizer

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
import torch
#,GenerationConfig, TrainingArguments, Trainer

original_model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-base', torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base')

# Zero Shot LLM with a Promt Template

In [3]:
def get_base_pretrained_output(text_customer):
    prompt = f"""
    'Answer the question based on the context below. Keep the answer short. Respond 'Unsure about answer' if not sure about the answer.\n\n'

    {text_customer}

    response:
    """
    inputs = tokenizer(prompt, return_tensors='pt')
    output = tokenizer.decode(
        original_model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )
    return output

# Getting The Data

pdf's collected from Amazon and ebay

In [None]:
! git clone https://github.com/djmjs/RAG.git

In [None]:
from langchain.document_loaders import DirectoryLoader
loader = DirectoryLoader('RAG/Pdf_s', glob="**/*.pdf", show_progress=True)
pdfs = loader.load()

In [6]:
pdfs[:1]

[Document(metadata={'source': 'RAG/Pdf_s/Amazon Cancellation Policies.pdf'}, page_content="Cancellation Policies on Amazon About Cancelling an Item\n\nYou can cancel items fulfilled by Amazon or shipped and sold by Amazon that have\n\nnot entered the shipping process.\n\nIf the item is sold and shipped by a third-party seller, orders can typically be\n\ncanceled within one business day.\n\nOnce an order is in fulfillment, sellers are required to approve cancellation. To cancel\n\nan order that has not entered the shipping process, follow these steps:\n\n1. Go to Your Orders and select the order you want to cancel. 2. Check box of the item that you want to cancel from the order. To cancel the entire order, select all of the items.\n\n3. Select the reason for cancellation and click Cancel selected items in this order\n\nwhen finished.\n\nCancellation Confirmation\n\nAfter submitting the cancellation, we will send you a confirmation message to the email address on your account. To see the

# Splitting Doc's into Chunks

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(pdfs)

# MiniLM for Embedding phase

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings':False}
embeddings = HuggingFaceEmbeddings(
  model_name = modelPath,
  model_kwargs = model_kwargs,
  encode_kwargs=encode_kwargs
)

# Vector Database with Faiss from facebook AI

it is a similarity search using Vector DB

In [9]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)
question = "Can I cancel my order?"

In [10]:
def get_semantic_vector_output(question):
    searchDocs = db.similarity_search(question)
    return searchDocs[0].page_content

In [11]:
get_semantic_vector_output(question)

"Orders That Cannot be Canceled\n\nIf your order is shipped directly from Amazon and you cannot change it, you may\n\nrefuse the package or return it using our Online Returns Center.\n\nIf your order is shipped directly from a third-party seller and can't be changed,\n\ncontact the seller. For more information on how to contact the seller, go to Contact Third-Party Sellers.\n\nYou May Be Interested In…\n\nChecking the Status of Your Order ● Late Deliveries ● Undeliverable Orders ● Checking the Status of Your Refund ● Canceling Your Amazon Prime Membership"

# LLM with Vector DB

now using flaten as a part of the pipeline.

In [25]:
from langchain import HuggingFacePipeline

pipe = pipeline("text2text-generation", model=original_model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
   pipeline = pipe,
   model_kwargs={"temperature": 0, "max_length": 2048},
)

# LangChain Connection

In [13]:
from langchain.prompts import PromptTemplate

template = """I want you to use context to answer the customer query. Answer only if you know the answer if you don't know the answer, just say "I dont know the answer". Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [14]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type="stuff",
  retriever=db.as_retriever(),
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [15]:
def get_rag_output(question):
    result = qa_chain({"query": question})
    return result["result"]

In [16]:
question="Can I cancel my order?"
get_rag_output(question)

  result = qa_chain({"query": question})
Token indices sequence length is longer than the specified maximum sequence length for this model (823 > 512). Running this sequence through the model will result in indexing errors


'Yes'

# Test Cases

In [17]:
questions=[]
model_outputs=[]
# Sample Questions
questions = [
    "Can I cancel my order?",
    "How can I cancel my order?",
    "My order is not delivered. What should I do?",
    "The item is out of stock. What should I do?"
]

# Initializing empty dictionary to hold model outputs
model_outputs = {
    "Base Pretrained Model": [],
    "Semantic Vector Base": [],
    "RAG Connected LLM": []
}

In [18]:
for question in questions:
    # Get Base Pretrained Model output
    base_pretrained_output = get_base_pretrained_output(question)
    model_outputs["Base Pretrained Model"].append(base_pretrained_output)

    # Get Semantic Vector Base output
    semantic_vector_output = get_semantic_vector_output(question)
    model_outputs["Semantic Vector Base"].append(semantic_vector_output)

    # Get RAG Connected LLM output
    rag_output = get_rag_output(question)
    model_outputs["RAG Connected LLM"].append(rag_output)



In [23]:
import pandas as pd
# Create a Pandas DataFrame for organized table format
df = pd.DataFrame({
    "Question": questions,
    "Base Pretrained Model with a Promt Template": model_outputs["Base Pretrained Model"],
    "Semantic Vector Base FAISS": model_outputs["Semantic Vector Base"],
    "RAG Connected LLM": model_outputs["RAG Connected LLM"]
})

In [26]:
df

Unnamed: 0,Question,Base Pretrained Model with a Promt Template,Semantic Vector Base,RAG Connected LLM
0,Can I cancel my order?,"Yes, please.",Orders That Cannot be Canceled\n\nIf your orde...,Yes
1,How can I cancel my order?,"Click on the ""Cancel Order"" button and then cl...",Cancellation Policies on Amazon About Cancelli...,Cancel an order by selecting the button below....
2,My order is not delivered. What should I do?,I will try to get it delivered.,Orders That Cannot be Canceled\n\nIf your orde...,I dont know the answer.
3,The item is out of stock. What should I do?,I'll try to order it.,Unpaid item policy\n\nCanceling an order becau...,Cancel an order
