In [1]:
# Install necessary packages
! pip install langchain
!pip install chromadb
! pip install openai
! pip install tiktoken
! pip install langchain_openai
! pip install langchain-chroma
!pip install langchain_community
! pip install pypdf

Collecting langchain
  Downloading langchain-0.2.0-py3-none-any.whl (973 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.7/973.7 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.0-py3-none-any.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.9/307.9 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.0-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.59-py3-none-any.whl (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.2/121.2 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7

In [2]:
# Set the OpenAI API key as an environment variable
os.environ["OPENAI_API_KEY"] = "************************************"

In [22]:
# Import necessary modules from langchain and related packages

import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import LLMChain
from langchain import PromptTemplate

In [40]:
# Load and split the PDF document into pages

loader = PyPDFLoader("TATA Safari Reviews.pdf")
pages = loader.load_and_split()
All_Text = " ".join(page.page_content for  page in pages)
type(pages)

list

In [41]:
# Split the document text into smaller chunks for processing

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(pages)
chunks[0]

Document(page_content="The famed Tata Safari is back!! The car was once the most aspired vehicle for many a middle -class \nbuyer. Its good looks and large proportions won it a legion of fans - since its debut over two decades \nago. We last saw a Safari on sale in a Tata Motors show room back in 2019... And now it has returned \nwith an all -new generation. The car that's ready for launch has had an interesting metamorphosis - \nespecially with its name. The 'three -row Harrier' or H7X, started as the Buzzard at the Geneva Motor \nshow in 201 9, became the Gravitas when it debuted in India at the Delhi Auto Expo a year later. And \nnow after almost another year, it's market -ready with another rechristening! Now I have to say, I'm \nnot one of the people who buys the theory that this car was always supposed to be named the \nSafari! If you ask me, the Harrier should have been the Safari, and I have said that now for some", metadata={'source': 'TATA Safari Reviews.pdf', 'page': 0})

In [42]:
# Create embeddings for the document chunks and store them in a vector store

embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(chunks, embeddings)
vector_store

<langchain_chroma.vectorstores.Chroma at 0x7d86e5c85060>

In [43]:
# Define a function to get the context retriever chain for contextfullness

def get_context_retriever_chain(vector_store):
    llm = ChatOpenAI()
    retriever = vector_store.as_retriever()
    prompt = ChatPromptTemplate.from_messages([

      ("user", "{input}"),
      ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
    ])

    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
    return retriever_chain

In [44]:
# Define a function to get the conversational RAG chain

def get_conversational_rag_chain(retriever_chain):

    llm = ChatOpenAI()
    prompt = ChatPromptTemplate.from_messages([
      ("system", "Answer the user's questions based on the below context:\n\n{context}"),

      ("user", "{input}"),
    ])
    stuff_documents_chain = create_stuff_documents_chain(llm,prompt)
    return create_retrieval_chain(retriever_chain, stuff_documents_chain)


In [45]:
# Define a function to get a response to user input

def get_response(user_input):
    retriever_chain = get_context_retriever_chain(vector_store)
    conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
    response = conversation_rag_chain.invoke({

        "input": user_input
    })
    return response['answer']

In [46]:
# Define a function to perform summarization and aspect-based sentiment analysis

def summarized_ABSA(All_Text):

  # Prepare messages for summarization
    chat_messages_sum=[
    SystemMessage(content='You are an expert assistant with expertize in summarizing speeches'),
    HumanMessage(content=f'Please provide a short and concise summary of the following speech:\n TEXT: {All_Text}')]

    llm=ChatOpenAI(model_name='gpt-3.5-turbo')
    res_sum = llm(chat_messages_sum).content

   # Define a template for aspect-based sentiment analysis

    template = """
              Recognize all aspect terms with their corresponding sentiment polarity in the given review delimited by triple quotes. The aspect terms are nouns or phrases appearing in the review that indicate specific aspects or features of the product/service. Determine the sentiment polarity from the options [\"positive\", \"negative\", \"neutral\"]. Answer in the format [\"aspect\", \"sentiment\"] without any explanation. If no aspect term exists, then only answer \"[]\"."
               ```{content1}```
           """
    prompt = PromptTemplate(template=template, input_variables=["All_Text"])

    llm_chain = LLMChain(prompt=prompt, llm=llm)
    res_ABSA = llm_chain.run(All_Text)

    return res_sum,res_ABSA

In [56]:
# Execute the summarization and aspect-based sentiment analysis

final_res = summarized_ABSA(All_Text)
print(final_res[0])
print(final_res[1])

Summary:
The Tata Safari, a beloved vehicle making a comeback after two decades, has been redesigned with a new generation. This new model, built on the Land Rover platform, shares similarities with the Harrier but offers subtle differences in design. The interior boasts an upmarket feel with advanced tech features like Apple CarPlay and Android Auto. The car provides ample space with a third row, though access can be cumbersome. The Safari offers a spacious cargo area when the second and third-row seats are folded. It is powered by a 2.0-liter turbo-diesel engine available in manual and automatic transmissions. The driving experience is smooth, with notable differences between Eco and Sport modes. Safety features include dual airbags, ABS, hill hold control, and traction control, with top variants offering 6 airbags and more. The Safari is expected to be competitively priced against rivals like the MG Hector Plus and Toyota Innova Crysta.
["Tata Safari", "neutral"], ["car", "positive"

In [58]:
# Example user queries and responses

user_input = 'What are the Safety features of tata safri ?'
get_response(user_input)

'The Tata Safari comes with a comprehensive list of safety features. Standard safety features include dual airbags, ABS (antilock brakes) with ESP (electronic stability program), disc brakes on all wheels, hill hold control, and traction control. The top trim offers additional safety features such as 6 airbags, hill descent control, ISOFIX child seat mounts, a tyre pressure monitoring system, cruise control, and a reverse parking camera.'

In [59]:
user_input = 'price comparision between Toyota Crysta and TATA Safari ?'
get_response(user_input)

'The diesel variant of the Tata Safari is expected to be priced lower than the diesel Toyota Innova Crysta. The Toyota Innova Crysta is priced between Rs. 16.64 lakh to Rs. 24.33 lakh, while the Tata Safari is expected to be priced starting from under Rs. 14 lakh. Therefore, the Tata Safari is likely to be more competitively priced compared to the Toyota Innova Crysta.'

In [57]:
user_input = "Describe about 'three-row Harrier'  ?"
get_response(user_input)

"The 'three-row Harrier,' also known as the Tata Safari, is an all-new generation vehicle that has been designed to offer three rows of seating for passengers. It is built on the Land Rover D8 platform derived OMEGARC architecture, just like the Harrier. The Safari is 4661mm long, 1786mm high (80mm more than the Harrier due to its stepped roof design), 1894mm wide, and has a wheelbase of 2741mm, which is the same as the Harrier. The design cues are similar to the Harrier, but the Safari has subtle differences like a distinct front grille with Tata's 'tri-arrow' motif and more chrome detailing. The Safari is available in three color options - Royale Blue, Daytona Grey, and Orcus White. The interior is upmarket with an 'Oyster White' theme and features a spacious cabin with a massive panoramic sunroof. The cabin also includes a dual-tone black and white dash and an 8.8-inch floating island touchscreen system that is compatible with Apple CarPlay and Android Auto."