In [71]:
from dotenv import load_dotenv
import os
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")  

In [72]:
from langchain_groq import ChatGroq
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser

llm=ChatGroq(model="gemma2-9b-it")
embeddings=OpenAIEmbeddings(model="text-embedding-3-small")
parser=StrOutputParser()


chain = llm | parser

chain.invoke("What is the capital of France?")



'The capital of France is **Paris**.\n'

In [73]:
embeddings.embed_query("What is the capital of France?")

[0.04169800877571106,
 0.0158005952835083,
 0.028160491958260536,
 0.024351144209504128,
 -0.023142803460359573,
 -0.002739247865974903,
 -0.014223608188331127,
 0.01433624979108572,
 0.010834109038114548,
 -0.010199218057096004,
 0.006942841224372387,
 -0.024043940007686615,
 -0.06164587661623955,
 -0.01508378330618143,
 -0.014233848080039024,
 0.023163283243775368,
 -0.006625395733863115,
 0.019446099177002907,
 0.07241854071617126,
 -0.024392105638980865,
 0.003002932295203209,
 -0.010091695934534073,
 -0.04100167378783226,
 0.011970768682658672,
 0.06209644302725792,
 0.0070964437909424305,
 -0.04554831609129906,
 -0.007347328122705221,
 0.00364038348197937,
 0.03942468762397766,
 0.04214857518672943,
 -0.0251498781144619,
 -0.0019558740314096212,
 0.04309067130088806,
 -0.024535467848181725,
 -0.03995717689394951,
 -0.03764289617538452,
 -0.039342764765024185,
 0.021320052444934845,
 0.029676036909222603,
 -0.003136054612696171,
 -0.01302550733089447,
 0.00684555945917964,
 0.0132

## 1. Data Ingestion


In [74]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
import os

file_path = os.path.join(os.getcwd(), "data", "Monthly Economic Review February 2024.pdf")
loader = PyPDFLoader(file_path)

In [75]:
documents = loader.load()

In [76]:
len(documents)

27

In [77]:
documents

[Document(metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 0, 'page_label': '1'}, page_content='1 \n \nEconomic'),
 Document(metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 1, 'page_label': '2'}, page_content='2'),
 Document(metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.ch

In [9]:
len(documents)

27

In [78]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, 
    chunk_overlap=150,
    length_function=len)
text_splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x1e0dac1f5e0>

In [79]:
docs = text_splitter.split_documents(documents)


In [80]:
len(docs)

152

In [46]:
docs[14].metadata

{'producer': 'Microsoft® Word 2021',
 'creator': 'Microsoft® Word 2021',
 'creationdate': '2024-03-22T13:36:26+05:30',
 'author': 'sonali.chowdhry.312@gmail.com',
 'moddate': '2024-03-22T13:36:26+05:30',
 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf',
 'total_pages': 27,
 'page': 4,
 'page_label': '5'}

In [41]:
len(documents)

27

In [42]:
len(docs)

152

In [81]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

embedding_model=OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(docs, embedding_model)

In [52]:
len(embedding_model.embed_documents(docs[0].page_content))

13

1. In Memory ( FAISS is in memory vector store, chroma)
2. on disk ( one can store it on the disk)
3. Cloud storage(pinecone,weaviate,milvus,mongodbvectorsearch,astra)


This is the Retrieval process( from Vector DB we are going to fetch the most appropriate result)


In [82]:
relevant_doc = vectorstore.similarity_search("What was india's GDP growth in 2024?", k=5)

In [None]:
relevant_doc = vectorstore.similarity_search_with_relevance_scores("What was india's GDP growth in 2024?", k=5)

[(Document(id='cc2269b1-5d0c-45ce-84fa-b55bb5be8971', metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 5, 'page_label': '6'}, page_content='6 \nContinuing growth momentum  \n \n1. The GDP growth estimate for FY24 has been revised upwards from 7.3 per cent to 7.6 \nper cent in the second advance estimates, highlighting the enduring strength of the Indian \neconomy. India grew above 8 per cent for three consecutive quarters, reaffirming her position \nas a standout performer amidst sluggish global growth trends. Various agencies echo a similar \nsentiment revising the growth estimates of India FY24 closer to 8 percent.'),
  np.float32(0.519111)),
 (Document(id='16f9c085-e97e-4a1e

In [64]:
relevant_doc

[Document(id='cc2269b1-5d0c-45ce-84fa-b55bb5be8971', metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 5, 'page_label': '6'}, page_content='6 \nContinuing growth momentum  \n \n1. The GDP growth estimate for FY24 has been revised upwards from 7.3 per cent to 7.6 \nper cent in the second advance estimates, highlighting the enduring strength of the Indian \neconomy. India grew above 8 per cent for three consecutive quarters, reaffirming her position \nas a standout performer amidst sluggish global growth trends. Various agencies echo a similar \nsentiment revising the growth estimates of India FY24 closer to 8 percent.'),
 Document(id='16f9c085-e97e-4a1e-9d3a-315d0c5ffd99', metada

In [63]:
for i in relevant_doc:
    print(i)


page_content='6 
Continuing growth momentum  
 
1. The GDP growth estimate for FY24 has been revised upwards from 7.3 per cent to 7.6 
per cent in the second advance estimates, highlighting the enduring strength of the Indian 
economy. India grew above 8 per cent for three consecutive quarters, reaffirming her position 
as a standout performer amidst sluggish global growth trends. Various agencies echo a similar 
sentiment revising the growth estimates of India FY24 closer to 8 percent.' metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 5, 'page_label': '6'}
page_content='sentiment revising the growth estimates of India FY24 closer to 8 percent. 
 
In line with the official stat

In [83]:
retriever  = vectorstore.as_retriever()

In [84]:
retriever.invoke("What was india's GDP growth in 2024?")

[Document(id='7598ea99-fb87-448e-8f25-5ec3e03de27c', metadata={'producer': 'Microsoft® Word 2021', 'creator': 'Microsoft® Word 2021', 'creationdate': '2024-03-22T13:36:26+05:30', 'author': 'sonali.chowdhry.312@gmail.com', 'moddate': '2024-03-22T13:36:26+05:30', 'source': 'd:\\LLM Study\\LLMOps\\LLMOPS-PROJECT\\document_portal\\notebook\\data\\Monthly Economic Review February 2024.pdf', 'total_pages': 27, 'page': 5, 'page_label': '6'}, page_content='6 \nContinuing growth momentum  \n \n1. The GDP growth estimate for FY24 has been revised upwards from 7.3 per cent to 7.6 \nper cent in the second advance estimates, highlighting the enduring strength of the Indian \neconomy. India grew above 8 per cent for three consecutive quarters, reaffirming her position \nas a standout performer amidst sluggish global growth trends. Various agencies echo a similar \nsentiment revising the growth estimates of India FY24 closer to 8 percent.'),
 Document(id='085dcbf7-d0c2-488f-b722-fa3761efc286', metada

Question : user question

Context : based on the question retrieve the info from the vector DB


In [68]:
prompt_template = """
    Answer the question based on the context provided below.
    If the context does not provide an answer, say "I don't know".

    Context: {context}

    Question: {question}

    Answer:"""

In [85]:
# import Prompt Template
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

question = "What was india's GDP growth in 2024?"
context = relevant_doc
formatted_prompt = prompt.format(context=context, question=question)




In [86]:
rag_chain = prompt | llm | parser

In [92]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [96]:
# import RunnablePassThrough
from langchain_core.runnables import RunnablePassthrough

rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | parser

In [105]:
res = rag_chain.invoke("" \
"What was india's GDP growth in 2024?"
)
print(res)

The text says that the GDP growth estimate for FY24 has been revised upwards from 7.3 per cent to 7.6 per cent.  It also states that various agencies expect GDP growth for FY24 to be 8 percent. 


Let me know if you have any other questions. 



In [106]:
res = rag_chain.invoke("""
What are the main challenges India might face in FY25 according to the review?
                       """)
print(res)

According to the review, India might face the following challenges in FY25:

* **Hardening crude oil prices:** This will impact import costs.
* **Global supply chain bottlenecks:** This could affect the price competitiveness of Indian exports.
* **Slower growth in key trading partners:**  This will dampen the demand for Indian exports.
* **Sectoral impact:** Specific sectors like agricultural commodities, marine products, textiles and chemicals, capital goods, and petroleum products may be negatively affected. 


The review also highlights the need to **diversify trade routes and transportation options** to address these challenges, although this may increase transit costs.  



Take 10 PDFS keep it in same directory dn create a RAG on top of it
