In [None]:
question = "Where are paint bubbles appearing?"

In [None]:
from langchain_community.document_loaders import PyMuPDFLoader
loader = PyMuPDFLoader("input_files/sample.pdf")
docs = loader.load()
docs 

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)

chunked_docs = splitter.split_documents(docs)
print(f"Got {len(chunked_docs):d} chunks")

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

db = FAISS.from_documents(
    chunked_docs,
    HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

In [None]:
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)

print(f"Question: {question}")
for doc in retriever.get_relevant_documents(question):
    print(doc)
    print("Doc: ", doc.page_content[:100], "...")

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = 'HuggingFaceH4/zephyr-7b-beta'
model_name = 'gpt2'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="question-answering",
    temperature=0.1,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=100,
    truncation=True,
)

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
Context:
{context}

Question: {question}

Answer:
""",
)

# Function to format input using the chat template
def apply_chat_template_with_retriever(question, retriever=retriever):
    context_docs = retriever.get_relevant_documents(question)
    context = "\n".join([doc.page_content for doc in context_docs])
    formatted_prompt = prompt_template.format(context=context, question=question)
    return formatted_prompt

retriever = db.as_retriever()

formatted_prompt = apply_chat_template_with_retriever(question, retriever)

# Generate text using the formatted prompt
output = text_generation_pipeline(formatted_prompt)
print(output[0]['generated_text'])

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

llm_chain = apply_chat_template_with_retriever | llm | StrOutputParser()


In [None]:
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

In [None]:
result_without_context = llm_chain.invoke({"context": "", "question": question})
print(result_without_context)

In [None]:
rag_chain = ({
    "context": retriever,
    "question": RunnablePassthrough()
    }
    | llm_chain
)

result_with_rag = rag_chain.invoke(question)
print(result_with_rag)