In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")

## Langsmith Tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")


In [7]:
import warnings
warnings.filterwarnings("ignore")

Load data -> Chunking(dividing in smaller text) -> Vector Embeddings -> Vector Storing -> quering -> Retrieving 

#### Data Ingestion

In [9]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
website_path = "https://www.screener.in/company/HDFCBANK/consolidated/"

loader = WebBaseLoader(website_path)

In [10]:
data = loader.load()
data

[Document(metadata={'source': 'https://www.screener.in/company/HDFCBANK/consolidated/', 'title': 'HDFC Bank Ltd share price | About HDFC Bank | Key Insights - Screener', 'language': 'en'}, page_content="\n\n\n\n\n\nHDFC Bank Ltd share price | About HDFC Bank | Key Insights - Screener\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Home\n    \n\n\n\n\n\n      Screens\n    \n\n\n\n\n\n\n\n\n\n\n      Tools\n    \n\n\n\n\n\n      Login\n    \n\n\n\n\n\n\n\n\n\n\n\n\n          \n            Home\n          \n        \nScreens\n\n\n            Tools\n            \n\n\n\n\n\n\n\n\nCreate a stock screen\nRun queries on 10 years of financial data\n\n\n\n\n\n\nPremium features\n\n\n\n\n\n\n\n\n\nCommodity Prices\nSee prices and trends of over 10,000 commodities\n\n\n\n\n\n\n\n\n\nSearch shareholders\nSee companies where a person holds over 1% of the shares\n\n\n\n\n\n\n\n\n\nLatest Announcements\nBrowse, filter and set alerts for announcements.\n\n\n\n\nUpgrade to

#### Splitter/Chunking

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap = 500)
text = text_splitter.split_documents(data)

In [24]:
text

[Document(metadata={'source': 'https://www.screener.in/company/HDFCBANK/consolidated/', 'title': 'HDFC Bank Ltd share price | About HDFC Bank | Key Insights - Screener', 'language': 'en'}, page_content='HDFC Bank Ltd share price | About HDFC Bank | Key Insights - Screener\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n      Home\n    \n\n\n\n\n\n      Screens\n    \n\n\n\n\n\n\n\n\n\n\n      Tools\n    \n\n\n\n\n\n      Login\n    \n\n\n\n\n\n\n\n\n\n\n\n\n          \n            Home\n          \n        \nScreens\n\n\n            Tools\n            \n\n\n\n\n\n\n\n\nCreate a stock screen\nRun queries on 10 years of financial data\n\n\n\n\n\n\nPremium features\n\n\n\n\n\n\n\n\n\nCommodity Prices\nSee prices and trends of over 10,000 commodities\n\n\n\n\n\n\n\n\n\nSearch shareholders\nSee companies where a person holds over 1% of the shares\n\n\n\n\n\n\n\n\n\nLatest Announcements\nBrowse, filter and set alerts for announcements.\n\n\n\n\nUpgrade to premium\n\n

#### Vectorization of the chunks

Creating embeddings

In [25]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

In [26]:
from langchain_community.vectorstores import FAISS

vectordb = FAISS.from_documents(text,embeddings)

#### Querying

In [27]:
query = "Quarterly Results"

result = vectordb.similarity_search(query)
result

[Document(metadata={'source': 'https://www.screener.in/company/HDFCBANK/consolidated/', 'title': 'HDFC Bank Ltd share price | About HDFC Bank | Key Insights - Screener', 'language': 'en'}, page_content="Pros\n\nCompany is expected to give good quarterCompany has delivered good profit growth of 23.4% CAGR over last 5 yearsCompany has been maintaining a healthy dividend payout of 22.9%Company's median sales growth is 16.4% of last 10 years\n\n\n\nCons\n\nStock is trading at 2.83 times its book valueCompany has low interest coverage ratio.Contingent liabilities of Rs.24,09,821 Cr.Earnings include an other income of Rs.1,49,943 Cr.\n\n\n\n\n*\nThe pros and cons are machine generated.\n\n\n\n            Pros / cons are based on a checklist to highlight important points. Please exercise caution and do your own analysis.\n          \n\n\n\n\n\n\nPeer comparison\n\n              Sector:\n              Banks\n\n              Industry:\n              \n                Banks - Private Sector\n   

#### LLM

In [28]:
from langchain_huggingface import HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
    task="text-generation",
    max_new_tokens=500,
    do_sample=False,
)



The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\ABHISHEK SHARMA\.cache\huggingface\token
Login successful


#### Retrieval Chain

In [30]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
    Answer the following question based only on the provided context:
    <context>
    {context}
    </context>                                      
 """)

document_chain = create_stuff_documents_chain(llm,prompt)

In [31]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>                                      \n '), additional_kwargs={})])
| HuggingFaceEndpoint(repo_id='meta-llama/Llama-3.2-11B-Vision-Instruct', max_new_tokens=500, stop_sequences=[], server_kwargs={}, model_kwargs={}, model='meta-llama/Llama-3.2-11B-Vision-Instruct', client=<InferenceClient(model='meta-llama/Llama-3.2-11B-Vision-Instruct', timeout=120)>, async_client=<InferenceClient(model='meta-llama/Llama-3.2-11B-Vision-Instruct', timeout=120)>, task='text-generation')
| S

In [35]:
from langchain_core.documents import Document

document_chain.invoke({
    "input":"what is the Profit before tax",
    "context": [Document(page_content="Quarterly Results")]})

' What does the term "quarterly results" refer to? \n  A) Yearly performance of a company\n  B) Quarterly performance of a company\n  C) Half-yearly performance of a company\n  D) Daily performance of a company\n  Answer: B) Quarterly performance of a company\n  Skill: Emotion Recognition And Sentiment Analysis\n  Reasoning Skill: The question requires the reader to understand the meaning of the term "quarterly results" based on the context provided. The correct answer is "Quarterly performance of a company" because the term is directly related to the quarterly results mentioned in the context. The other options are incorrect because they refer to different time periods. This question requires the reader to analyze the context and make an inference based on the information provided, which is a key aspect of Emotion Recognition And Sentiment Analysis. \n\nExplanation:\nThe question requires the reader to understand the meaning of the term "quarterly results" based on the context provide

In [37]:
ret = vectordb.as_retriever()

from langchain.chains import create_retrieval_chain

ret_chain = create_retrieval_chain(ret,document_chain)

In [38]:
ret_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001A43DE8F200>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>                           

#### response

In [42]:
res = ret_chain.invoke({'input':"what is the Profit before tax in june 2024 givem in quaterly result table"})

In [47]:
print(res['answer'])

 The company has posted a Net Profit of 17,188 crores for the last 12 months, as per the latest available data. 
Answer: 17,188 crores.  
The company has posted a Net Profit of 17,188 crores for the last 12 months, as per the latest available data. 
The correct answer is 17,188 crores. 
The information about the company's Net Profit for the last 12 months is available in the table, which shows the company's consolidated figures in Rs. Crores. The table provides the data for the last 12 months, including the Net Profit, which is 17,188 crores. 
The answer can be directly obtained from the table, which clearly states the company's Net Profit for the last 12 months. 
Therefore, the answer is 17,188 crores. 
The final answer is 17,188 crores. 
The correct answer is 17,188 crores. 
The information about the company's Net Profit for the last 12 months is available in the table, which shows the company's consolidated figures in Rs. Crores. The table provides the data for the last 12 months, i