In [15]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.chains import RetrievalQAWithSourcesChain, RetrievalQA
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS
from langchain_openai import OpenAI, OpenAIEmbeddings
import configparser

In [2]:
config = configparser.RawConfigParser()
config.read('../../config.config')
openapi_key = config.get('Keys', 'openapi_key')
os.environ['OPENAI_API_KEY'] = openapi_key

In [21]:
llm = OpenAI(temperature = 0.9, max_tokens = 500)

loaders = UnstructuredURLLoader(
    urls = [
        "https://www.moneycontrol.com/news/business/personal-finance/hdfc-bank-share-crash-and-the-perils-of-equity-funds-that-hug-their-benchmarks-12079281.html",
        "https://www.moneycontrol.com/news/business/earnings/tata-communications-q3-profit-tumbles-88-6-12079871.html"
    ]
)
data = loaders.load() 
len(data)

2

In [22]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs  =text_splitter.split_documents(data)

In [23]:
len(docs)

38

In [24]:
docs[0]

Document(page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nMoneycontrol Trending Stock\n\nInfosys\xa0INE009A01021, INFY, 500209\n\nState Bank of India\xa0INE062A01020, SBIN, 500112\n\nYes Bank\xa0INE528G01027, YESBANK, 532648\n\nBank Nifty\n\nNifty 500\n\nQuotes\n\nMutual Funds\n\nCommodities\n\nFutures & Options\n\nCurrency\n\nNews\n\nCryptocurrency\n\nForum\n\nNotices\n\nVideos\n\nGlossary\n\nAll\n\nHello, LoginHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy Credit Score₹100 CashbackMy FeedMy MessagesMy AlertsMy Profile My PROMy PortfolioMy WatchlistMy Credit Score₹100 CashbackMy FeedMy MessagesMy AlertsLogoutChat with UsDownload AppFollow us on:\n\nPremium\n\nMy Feed', metadata={'source': 'https://www.moneycontrol.com/news/business/personal-finance/hdfc-bank-share-crash-and-the-perils-of-equity-funds-that-hug-their-benchmarks-12079281.html'})

In [25]:
embeddings = OpenAIEmbeddings()
vector_index_openai = FAISS.from_documents(docs, embeddings)

In [34]:
vector_index_openai.save_local('blah')

# Storing vector index create in local
# file_path="vector_index.pkl"
# with open(file_path, "wb") as f:
#     pickle.dump(vector_index_openai.save_local('faiss_index'), f)

In [None]:
del vector_index_openai['lock_object']

In [26]:
chain = RetrievalQAWithSourcesChain.from_llm(llm = llm,retriever = vector_index_openai.as_retriever())

In [27]:
chain



In [28]:
query = 'Which segment contributes more for Tata Communications?'
langchain.debug = True
chain({'question':query}, return_only_outputs = True)

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "Which segment contributes more for Tata Communications?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Go Ad-Free\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHome\n\nNews\n\nBusiness\n\nEarnings\n\nTata Communications posts fastest quarterly revenue growth in 9 years\n\nRevenue from the data services segment, which contributes more than 80% to the total, climbed 28.3% on robust demand in its core connectivity and digital portfolio segments.\n\nReuters\n\nJanuary 18, 2024 / 07:53 PM IST\n\n\n\n\n\n\n\

{'answer': ' The data services segment contributes more than 80% to Tata Communications.\n',
 'sources': 'https://www.moneycontrol.com/news/business/earnings/tata-communications-q3-profit-tumbles-88-6-12079871.html'}