In [1]:
from uuid import uuid4
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import faiss

In [None]:
%pip install faiss-cpu #importing faiss

In [3]:
# 1. Load your PDF
loader = PyPDFLoader("tcsreport.pdf")
raw_documents = loader.load()

In [4]:
# 2. Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2500,
    chunk_overlap=50,
)

In [5]:

chunks = text_splitter.split_documents(raw_documents) #using text splitter to split the raw doc into chunks

In [6]:
# 3. Generate unique IDs for each chunk
uuids = [str(uuid4()) for _ in range(len(chunks))]

In [7]:
# 4. Create embedding model using the updated (non-deprecated) wrapper
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
# 5. Create FAISS vector store (index is handled internally)
vector_store = FAISS.from_documents(
    documents=chunks,
    embedding=embedding,
    ids=uuids
)

print("Vector store created successfully with", len(chunks), "documents.")

In [None]:
query = "What are the key financial highlights mentioned in the report?"
docs = vector_store.similarity_search(query, k=3)

for i, doc in enumerate(docs):
    print(f"\n--- Document {i+1} ---")
    print(doc.page_content)

In [None]:
query = "What are the key financial highlights mentioned in the report?"
context = vector_store.similarity_search(query, k=1)


actual_context = context[0].page_content
print(actual_context)

In [11]:
our_question = "What are the key financial highlights mentioned in the report?"

our_prompt = f"""
I am asking a question. Please answer only from the given context.
Do not guess or make up an answer if it's not found in the context.

Context:
{actual_context}

Question:
{our_question}
"""

In [None]:
import os
os.environ["OPENAI_API_KEY"] = ""#Enter your OPENAI SECRET KEY

In [13]:
from openai import OpenAI
client = OpenAI()

response = client.responses.create(
    model="gpt-4o-mini",
    input= our_prompt
)

print(response.output_text)

The report highlights the following key financial figures:

- Profit for the year: `48,797 crore` for 2025 and `46,099 crore` for 2024.
- Operating profit before working capital changes: `67,853 crore` for 2025 and `63,709 crore` for 2024.
- Cash flows generated from operations: `64,494 crore` for 2025 and `56,827 crore` for 2024.
- Net cash flows generated from operating activities: `48,908 crore` for 2025 and `44,338 crore` for 2024.
