### simple gen ai using langchain

In [None]:
# app build based on website 
# https://www.moneycontrol.com/news/gold-rates-today/


In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ["LANGCHAIN_API_KEY"]=os.getenv('LANGCHAIN_API_KEY')
os.environ["LANGCHAIN_TRACKING_V2"]='true' # to track langsmith
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT2")

In [7]:
# data ingestin to scrap the data from website

# to use webbase loader we need to have bs4 installed
from langchain_community.document_loaders import WebBaseLoader
web_loader = WebBaseLoader("https://www.moneycontrol.com/news/gold-rates-today/")
docs_from_web = web_loader.load()
docs_from_web

[Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/', 'title': 'Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com', 'description': 'Gold Rate in India Today: Check latest gold rate in India today by the Moneycontrol. Get latest 22 & 24 carat gold rate in india, gold rate news, gold rate updates, videos and more', 'language': 'en'}, page_content='    Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com      \n\n  \n\n      \n\n   \n\n  EnglishHindiGujaratiSpecialsSearch Quotes, News, Mutual Fund NAVsTrending StocksBajaj Housing INE377Y01014, BAJAJHFL, 544252Ola Electric INE0LXG01040, OLAELEC, 544225Suzlon Energy INE040H01021, SUZLON, 532667Tata Motors INE155A01022, TATAMOTORS, 500570BSE Limited INE118H01025, BSE, 0QuotesMutual FundsCommoditiesFutures & OptionsCurrencyNewsCryptocurrencyForumNoticesVideosGlossaryAll  Hello, Login Hello, LoginLog-inor Sign-UpMy Acco

In [15]:
# converting the large docs to chunks : docs transformation
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=10)
splitted_docs = splitter.split_documents(docs_from_web)

# splitted docs to be converted to vectors and storing in vectordbs
from langchain_community.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

# storing in faiss-cpu db

from langchain_community.vectorstores import FAISS
database = FAISS.from_documents(splitted_docs,embeddings)
database

<langchain_community.vectorstores.faiss.FAISS at 0x294c00fec90>

In [22]:
# to query from the webpage
query = "what is the rate of gold 22 carats today"
result = database.similarity_search_with_score(query) # low score is better
result

[(Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/', 'title': 'Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com', 'description': 'Gold Rate in India Today: Check latest gold rate in India today by the Moneycontrol. Get latest 22 & 24 carat gold rate in india, gold rate news, gold rate updates, videos and more', 'language': 'en'}, page_content='gold.22 Carat Gold Rate (Yesterday & Sep 30, 2024)GramYesterdaySep 30, 2024Price Change1 Gram₹ 7,140₹ 7,170₹ 3010 Gram₹ 71,400₹ 71,700₹ 300100 Gram₹ 714,000₹ 717,000₹ 3,00024 Carat Gold Rate'),
  0.26260042),
 (Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/', 'title': 'Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com', 'description': 'Gold Rate in India Today: Check latest gold rate in India today by the Moneycontrol. Get latest 22 & 24 carat gold rate in india, gold rate news, gol

In [23]:
# making a llm model
from langchain_openai import ChatOpenAI
gpt_model_2 = ChatOpenAI(model="gpt-4o-mini")


In [28]:
# if we want to ask meaningful qstns  and get answers from db 
# we use retrieval chain

# to create a chain for passing a list of docs to a model
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
answer the questions based on provided context
<context>
{context}
</context>

"""
)
# here doc_chain is responsible in providing the context into prompt template
document_chain = create_stuff_documents_chain(gpt_model_2,prompt)
document_chain
# here we  an see the chain prompttemplate | chatopeai model | strparser for output

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nanswer the questions based on provided context\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000295271C84D0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000295284FC890>, root_client=<openai.OpenAI object at 0x00000295286750D0>, root_async_client=<openai.AsyncOpenAI object at 0x00000295284212D0>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'},

In [29]:
# we can get o/p from the about built doc chain
from langchain_core.documents import Document
document_chain.invoke({
    "input":"The gold rate of today compared to yesterday",
    "context":[Document(page_content="the gold rate is 7200 and yesterdays rate is 7100")]

})


'Sure! Please provide the questions you would like me to answer based on the context given.'

In [30]:
### retriever
# interface which is responsible for getting the data from vectorstore db
retriever = database.as_retriever()
from langchain.chains import create_retrieval_chain

# create retriever chain takes retriever and document_chain as inputs
retriever_chain  = create_retrieval_chain(retriever,document_chain)
retriever_chain
# context is given by document_chain to retriever to create retriever chain



RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000294C00FEC90>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nanswer the questions based on provided context\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
            | ChatOpenAI(client=<op

In [34]:
# to get the response from retriever chain
response_from_retriever = retriever_chain.invoke({"input":"what is the rate of gold 22 carats today"})
print(response_from_retriever)
response_from_retriever['answer']

{'input': 'what is the rate of gold 22 carats today', 'context': [Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/', 'title': 'Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com', 'description': 'Gold Rate in India Today: Check latest gold rate in India today by the Moneycontrol. Get latest 22 & 24 carat gold rate in india, gold rate news, gold rate updates, videos and more', 'language': 'en'}, page_content='gold.22 Carat Gold Rate (Yesterday & Sep 30, 2024)GramYesterdaySep 30, 2024Price Change1 Gram₹ 7,140₹ 7,170₹ 3010 Gram₹ 71,400₹ 71,700₹ 300100 Gram₹ 714,000₹ 717,000₹ 3,00024 Carat Gold Rate'), Document(metadata={'source': 'https://www.moneycontrol.com/news/gold-rates-today/', 'title': 'Gold Rate in India Today: Check 22 & 24 Carat Gold Rate in India | MoneyControl - Moneycontrol.com', 'description': 'Gold Rate in India Today: Check latest gold rate in India today by the Moneycontrol. Get latest 22 & 2

'Please provide the questions you would like to ask based on the provided context.'

In [None]:
# end to end project
# data ingestion-->data chunks-->data to vectors-->vectors to store in database--->  llm model--> prompt template-->retrieve data from vector database--> set context of llm model as context from retriever---> get response from model
