In [2]:
import os
import streamlit
import pickle
import time
import langchain
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [3]:
os.environ['OPENAI_API_KEY'] = ''
print(os.environ['OPENAI_API_KEY'])




<h3>Initialise LLM with required params</h3>

In [46]:
llm = OpenAI(temperature=0.9, max_tokens=500)

<h3>Step 1: Load data</h3>

In [59]:
# loaders = UnstructuredURLLoader(urls=[
#     "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
#     "https://www.moneycontrol.com/news/business/earnings/tata-motors-q3-consolidated-net-profit-jumps-2x-to-rs-7025-crore-beats-estimates-12181411.html"
# ])

loaders = UnstructuredURLLoader(urls=[
"https://www.moneycontrol.com/news/business/earnings/tata-motors-q3-consolidated-net-profit-jumps-2x-to-rs-7025-crore-beats-estimates-12181411.html"
])
data = loaders.load()
len(data)

1

<h3>Step 2: Split data into chunks</h3>

In [60]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap=100
)

# As data is of type document, we can directly use split_documents over split_text in order to get the chunks
docs = text_splitter.split_documents(data)

In [61]:
len(docs)

35

In [62]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll', metadata={'source': 'https://www.moneycontrol.com/news/business/earnings/tata-motors-q3-consolidated-net-profit-jumps-2x-to-rs-7025-crore-beats-estimates-12181411.html'})

<h3>Step 3: Create Embeddings for these chunks and save them to FAISS index</h3>

In [63]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectorindex_openai = FAISS.from_documents(docs, embeddings) 

In [64]:
vectorindex_openai

<langchain_community.vectorstores.faiss.FAISS at 0x293d41790>

In [40]:
# storing vector indexes locally
# file_path = 'vector_index.pkl'
# with open(file_path, 'wb') as f:
#     pickle.dump(vectorindex_openai, f)

In [65]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex_openai.as_retriever())

In [66]:
chain



In [67]:
query = 'What was the profit earned by Tata Motors for the october-decemeber quarter?'
langchain.debug = True

chain({'question':query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What was the profit earned by Tata Motors for the october-decemeber quarter?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Go Ad-Free\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHome\n\nNews\n\nBusiness\n\nEarnings\n\nTata Motors Q3 consolidated net profit jumps 2x to Rs 7,025 crore, beats estimates\n\nTata Motors’ total vehicle sales across segment in the domestic & international market for Q3 stood at 234,981 vehicles, compared to 228,169 units during the year-ago period.\n\nMoneycontrol News\n\nF

{'answer': ' The profit earned by Tata Motors for the October-December quarter was Rs 7,025 crore.\n',
 'sources': 'https://www.moneycontrol.com/news/business/earnings/tata-motors-q3-consolidated-net-profit-jumps-2x-to-rs-7025-crore-beats-estimates-12181411.html'}