In [1]:
import os
import time
import pickle
import streamlit as st

from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import OllamaEmbeddings
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain

In [2]:
llm = Ollama(model="llama3")
llm

Ollama(model='llama3')

#### Load data

In [3]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

#### Split data to create chunks

In [4]:
text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 )
docs = text_splitter.split_documents(data)

In [5]:
len(docs)

34

In [6]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, Login Hello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistFREE Credit Score₹100 Cash RewardMy AlertsMy MessagesPrice AlertsMy Profile My PROMy PortfolioMy WatchlistFREE Credit Score₹100 Cash RewardMy AlertsMy MessagesPrice AlertsLogoutChat with UsDownload AppFollow us on:\n\nGo Ad-Free\n\nMy Alerts', metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'})

#### Create embeddings for these chunks and save them to FAISS index

In [7]:
ollama_emb = OllamaEmbeddings(model="llama3")
# Pass the documents and embeddings in order to create FAISS vector index
# vectorindex_ollama = FAISS.from_documents(docs, ollama_emb)

In [8]:
# Storing vector index create in local
# file_path="vector_index.pkl"
# with open(file_path, "wb") as f:
#     pickle.dump(vectorindex_ollama, f)

In [9]:
file_path="vector_index.pkl"
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

#### Retrieval_chain

In [13]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

prompt = ChatPromptTemplate.from_template("""
         Answer the following question based only on the provided context. 
         Think step by step before providing a detailed answer. 
         I will tip you $1000 if the user finds the answer helpful. 
         <context>
         {context}
         </context>
         Question: {input}""")

document_chain = create_stuff_documents_chain(llm , prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# import langchain
# langchain.debug=True

response = retrieval_chain.invoke({"input":"what is the price of Tiago iCNG?"})
response['answer']

"Based on the provided context, it is not possible to determine the specific price of the Tiago iCNG model as this information is not mentioned in the given text. However, we can infer that Tata Motors has introduced the twin-cylinder technology on its Tiago and Tigor models, but no specific prices are mentioned for these models.\n\nIf you're looking for a helpful answer, I'd be happy to provide more general information or help with any other question you may have!"

In [15]:
response

{'input': 'what is the price of Tiago iCNG?',
 'context': [Document(page_content="Set Alert\n\nlive\n\nbselive\n\nnselive\n\nVolume \n\nTodays L/H \n\nMore\n\nTata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\n\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement.\n\nStory continues below Advertisement\n\nRemove Ad\n\nIt is also equipped with other features such as voice assisted electric sunroof, automatic projector headlamps, LED DRLs, 16-inch diamond cut alloy wheels, 7-inch infotainment system by Harman that supports Android Auto and Apple Carplay connectivity, rain sensing wipers and height adjustable driver seat.\n\nThe com

#### RetrievalQAWithSourcesChain

In [10]:
retriever = vectorIndex.as_retriever()
chain = RetrievalQAWithSourcesChain.from_llm( llm = llm, retriever = retriever)
chain



In [12]:
query = "what is the price of Tiago iCNG?"

# import langchain
# langchain.debug=True
# To see what is going underneath

input_data = {"question": query}
result = chain.invoke(input_data, return_only_outputs=True)

  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (1988 > 1024). Running this sequence through the model will result in indexing errors


In [14]:
result

{'answer': "I don't know.\n\n", 'sources': ''}