<a href="https://colab.research.google.com/github/harnalashok/deeplearning-sequences/blob/main/chatbot_withg_langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# https://www.linkedin.com/pulse/get-insight-from-your-business-data-build-llm-application-jain/

In [3]:
!git lfs install
!git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
!git clone https://huggingface.co/google/flan-t5-large

Git LFS initialized.
Cloning into 'all-MiniLM-L6-v2'...
remote: Enumerating objects: 52, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 52 (delta 2), reused 0 (delta 0), pack-reused 46[K
Unpacking objects: 100% (52/52), 317.58 KiB | 2.06 MiB/s, done.
Filtering content: 100% (3/3), 260.15 MiB | 50.87 MiB/s, done.
Cloning into 'flan-t5-large'...
remote: Enumerating objects: 110, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 110 (delta 0), reused 0 (delta 0), pack-reused 107[K
Receiving objects: 100% (110/110), 635.37 KiB | 1.42 MiB/s, done.
Resolving deltas: 100% (58/58), done.
Filtering content: 100% (5/5), 11.91 GiB | 43.50 MiB/s, done.


In [None]:
!pip install langchain
!pip install torch
!pip install transformers
!pip install faiss-cpu
!pip install pypdf
!pip install sentence-transformers

In [7]:
from langchain.document_loaders import PyPDFLoader
pdfLoader = PyPDFLoader("/content/sample_data/large_language_models.pdf")
documents = pdfLoader.load()

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings
modelPath = "/content/all-MiniLM-L6-v2"
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings':False}
embeddings = HuggingFaceEmbeddings(
  model_name = modelPath,
  model_kwargs = model_kwargs,
  encode_kwargs=encode_kwargs
)

In [None]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)

In [None]:
question = "Discuss the variants of the transformer architectures used in LLMs"
searchDocs = db.similarity_search(question)
print(searchDocs[0].page_content)

In [31]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline
from langchain import HuggingFacePipeline

tokenizer = AutoTokenizer.from_pretrained("/content/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("/content/flan-t5-large")
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(
    pipeline = pipe,
    model_kwargs={"temperature": 0, "max_length": 1512, "max_new_tokens" : 1500},
)

In [32]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [33]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type="stuff",
  retriever=db.as_retriever(),
  chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


In [35]:
qa_chain

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. \n{context}\nQuestion: {question}\nHelpful Answer:"), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text2text_generation.Text2TextGenerationPipeline object at 0x7f89ce4903d0>, model_kwargs={'temperature': 0, 'max_length': 1512, 'max_new_tokens': 1500})), document_variable_name='context'), retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f8af181b760>))

In [36]:
result = qa_chain({ "query" : question })
print(result["result"])


Token indices sequence length is longer than the specified maximum sequence length for this model (1036 > 512). Running this sequence through the model will result in indexing errors


Here we discuss the variants of the transformer architectures used in LLMs.
