In [33]:
# !pip install langchain_community
# !pip install pypdf
# !pip install chromadb

In [31]:
from langchain_community.document_loaders import PyPDFLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from langchain.chains import RetrievalQA
from langchain.chains import retrieval_qa
import shutil

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from langchain.llms import HuggingFacePipeline


In [32]:
file_path = "/content/drive/MyDrive/transformer.pdf"

In [39]:
class RagChatPrompt:
  SINGLE_LINE_TEMPLATE = """You are a Deep Learning expert.You know everything about transformers in Deep Learning. Provide answer in clear,concise, meaningful Use this context:
    {context}

    Answer the question in one line: {input}"""

  MULTI_LINE_TEMPLATE =    """You are a Deep Learning expert and you can give all the answers related to transfromer of Deeplearning.
                            You have to give clear, concise, meaningful answers of the question in maximum of three to four line
                            In the first line ### Give the definitation of the question###
                            In the second line ### Tell why it is important###
                            In the third line ### Descirbe in short in a simple meaning, so that a begineer guy can understand###"""

In [40]:
class RagImplementation:
  def __init__(self, file):
    self.file = file

  def file_reader(self):
    loader = PyPDFLoader(self.file)
    docs = loader.load()
    return docs

  def split_into_chunks(self):
    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
    document = self.file_reader()
    splitted_text = splitter.split_documents(document)
    return splitted_text

  def embedding_vectorstore(self):
    embedding_func = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    # vectorstore =
    return Chroma.from_documents(self.split_into_chunks(), embedding_func)

  def retrieve_docs(self):

    # shutil.rmtree("./chroma_db", ignore_errors=True)
    embedded_store_text = self.embedding_vectorstore()
    retriever = embedded_store_text.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 3, "score_threshold": 0.25}
    )
    # ans = retriever.invoke("What is Transformer?")
    # return ans
    return retriever


In [26]:
# class TunedChatGeneration:
#   def __init__(self,rag_system, model):
#     self.rag_system = rag_system
#     self.model = model
#     # tokenizer = AutoTokenizer.from_pretrained(self.model)

#   def create_pipline(self):
#     pipe = pipeline("text-generation",
#                 model=self.model,
#                 # tokenizer=self.tokenizer,
#                 max_new_tokens=100,
#                 temperature=0.7)
#     hf_llm = HuggingFacePipeline(pipeline=pipe)
#     return hf_llm

#   def define_prompt(self, user_q):
#     prompt = ChatPromptTemplate.from_messages(
#     [
#         ("system", RagChatPrompt.SINGLE_LINE_TEMPLATE),
#         ("human", f"{user_q}")
#     ]
#         )
#     return prompt

#   def generate_tuned_output(self):
#     user_q = "What is attention in python?"
#     question_answer_chain = create_stuff_documents_chain(self.create_pipline(), self.define_prompt(user_q))
#     rag_chain = create_retrieval_chain(RagImplementation.retrieve_docs(), question_answer_chain)
#     response = rag_chain.invoke({"input":"What is attention in python?"})
#     return response["answer"]


In [27]:
# obj = RagImplementation(file=file_path)
# obj2 = TunedChatGeneration(rag_system=obj, model = "gpt2")
# # obj.retrieve_docs()

In [28]:
# obj.generate_tuned_output()
# obj2.generate_tuned_output()

In [41]:


class TunedChatGeneration:
    def __init__(self, rag_system, model):
        self.rag_system = rag_system
        self.model = model
        self.tokenizer = AutoTokenizer.from_pretrained(model)

    def create_pipeline(self):  # Fixed spelling
        pipe = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=100,
            temperature=0.7
        )
        return HuggingFacePipeline(pipeline=pipe)

    def define_prompt(self):
        return ChatPromptTemplate.from_template(RagChatPrompt.SINGLE_LINE_TEMPLATE)

    def generate_tuned_output(self, question: str):
        retriever = self.rag_system.retrieve_docs()
        llm = self.create_pipeline()
        prompt = self.define_prompt()

        question_answering_chain = create_stuff_documents_chain(llm, prompt)
        rag_chain = create_retrieval_chain(retriever, question_answering_chain)

        return rag_chain.invoke({"input": question})["answer"]




In [42]:
# Usage
rag = RagImplementation(file=file_path)
chat_bot = TunedChatGeneration(rag, "gpt2")
print(chat_bot.generate_tuned_output("What is attention in transformers?"))

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Human: You are a Deep Learning expert.You know everything about transformers in Deep Learning. Provide answer in clear,concise, meaningful Use this context:
    Figure 1: The Transformer - model architecture.
The Transformer follows this overall architecture using stacked self-attention and point-wise, fully
connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,
respectively.
3.1 Encoder and Decoder Stacks
Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two
sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position-

Figure 1: The Transformer - model architecture.
The Transformer follows this overall architecture using stacked self-attention and point-wise, fully
connected layers for both the encoder and decoder, shown in the left and right halves of Figure 1,
respectively.
3.1 Encoder and Decoder Stacks
Encoder: The encoder is composed of a stack of N = 6 identic