In [None]:
!pip install langchain #for langchain
!pip install huggingface_hub #for loading LLM model
!pip install sentence_transformers #for changing sentences to LLM understandable tokens
!pip install faiss-cpu #vector database lib
!pip install PyPDF2 PyPDF

In [None]:
from langchain.document_loaders import TextLoader  #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.document_loaders import UnstructuredPDFLoader  #load pdf
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredURLLoader  #load urls into docoument-loader

#LLM
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain import HuggingFaceHub #for loading LLM
from langchain.embeddings import HuggingFaceEmbeddings #for loading embeddings

#Document Loader
from langchain.document_loaders import PyPDFLoader #for loading pdf files in langchain
from langchain.text_splitter import RecursiveCharacterTextSplitter #for splitting text

#Vector DB
from langchain.vectorstores import FAISS  #facebook AI Similarity Search for vectorization

#Chains
from langchain.chains.question_answering import load_qa_chain #for setting up QnA chain
from langchain.chains import ConversationalRetrievalChain #for setting the conversation chain

from langchain.memory import ConversationBufferMemory #for keeping the context memory in conversation


In [None]:
#from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/content/webscrap_hyryder.pdf")

pages = loader.load_and_split()
print(pages[1].page_content)

Vid6639 Team-BHP Support Toyota Urban Cruiser Hyryder Exterior Images Front fascia looks clean and butch thanks to the massive grill. While the car has road presence, it's definitely not as much as the Creta or Seltos: Rear end looks are acceptable to most. Silver skid plates add to the SUV look. This car is a prime candidate for some debadging: It's easy to see that the Hyryder is one of the longer cars in the segment at 4365mm. It's 50mm longer than the Seltos and 65mm more than the Creta. The cladding runs all around the car and definitely enhances its looks. However, the wheel wells appear a little too large: The overall design is simple and not overdone and hence, quite elegant: At first glance, the Hyryder resembles the Brezza from this angle: DRLs are reasonably bright and double up as turn-indicators: Front end has just the right amount of chrome in the form of a strip merging with the two headlamps and the Toyota logo in the centre. Bumper sports a huge air dam with a honeycom

In [16]:
import os

In [17]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [None]:
sections = RecursiveCharacterTextSplitter(
   chunk_size=1000,
   chunk_overlap=100,
   length_function=len).split_documents(pages)

In [None]:
# Embeddings
#from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# #Return VectorStore initialized from documents and embeddings.
faiss_index = FAISS.from_documents(sections, embeddings)

# #Save FAISS DB to disk
faiss_index.save_local("faiss_index")

In [11]:
faiss_index = FAISS.load_local("/content/faiss_index", embeddings, allow_dangerous_deserialization=True)

In [12]:
# expose Vector DB index in a retriever interface
retriever = faiss_index.as_retriever()

In [13]:
#from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
   memory_key='chat_history',
   return_messages=True,
   output_key='answer')


In [34]:
llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature": 0.1, "max_length": 512})


In [35]:
#from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(
   llm=llm,
   retriever=retriever,
   memory=memory
)

In [36]:
query = "What customer don't like about the product??"
result = chain({"question": query})
print(result["answer"])

The customer's main complaint about the product is that the claim process was not smooth.


In [24]:
query = "What are sunroof issues the product have??"
result = chain({"question": query})
print(result["answer"])

The sunroof issues the product has are the panes moving backwards, the glass being well tinted, and the thin shade.


In [25]:
query = "How many sunroof issues the product have??"
result = chain({"question": query})
print(result["answer"])

The product has two sunroof issues. The first is that the panes move backwards and the glass is well tinted. The second issue is that the shade is too light and the sunroof is too thin. The third issue is that the sunroof is too expensive and the sunroof is not as good as expected.


In [38]:
query = "How many engine issue complaints are present in the whole document??"
result = chain({"question": query})
print(result["answer"])

There are 4 engine issue complaints present in the whole document.


In [39]:
query = "Can those 4 engine isuues be listed??"
result = chain({"question": query})
print(result["answer"])

Yes, the four engine issue complaints can be listed.


In [40]:
query = "Please list the engine issues"
result = chain({"question": query})
print(result["answer"])

The four engine issue complaints present in the whole document are: 1. Quality feels cheap 2. Performance not adequate for Highway Drives 3. Engine sounds louder in hilly areas and sometimes struggle 4. Engine sounds louder in hilly areas and sometimes struggle


In [43]:
query = "List the username against the complaints for all 4 engine issues along with issues"
result = chain({"question": query})
print(result["answer"])

mave547 BHPian


In [44]:
query = "Please list the issues regarding voice activation"
result = chain({"question": query})
print(result["answer"])

The issues regarding voice activation are that it activates itself without any input, it is highly annoying and complete nuisance, it needs to be shut down, and it is not able to be switched off.


In [45]:
query = "Who raised complaints regarding voice actiation"
result = chain({"question": query})
print(result["answer"])

Ralags Newbie.


In [46]:
query = "How many voice activation issues are there in the whole document?"
result = chain({"question": query})
print(result["answer"])

There are two voice activation issues present in the whole document.


In [50]:
query = "Is there any Sunroof moonroof issues"
result = chain({"question": query})
print(result["answer"])

Yes, there are some sunroof moonroof issues.


In [51]:
query = "What are those issues??"
result = chain({"question": query})
print(result["answer"])

The sunroof moonroof issues are caused by the panes moving backwards and the glass being well tinted. The panes can be replaced with a darker shade to reduce the heat and reduce the issue.


In [54]:
query = "What are the bad reviews about vehicle mileage?"
result = chain({"question": query})
print(result["answer"])

The bad reviews about vehicle mileage are that it is not as fuel efficient as other cars and that it has a tendency to be noisy at idle.


In [55]:
query = "What is the product name that is reviewed?"
result = chain({"question": query})
print(result["answer"])

Toyota Hyryder.


In [56]:
query = "Give the description of the product"
result = chain({"question": query})
print(result["answer"])

The description of the Toyota Hyryder is "A powerful and efficient hybrid vehicle with a range of features that make it an ideal choice for anyone looking for a reliable and efficient vehicle."
