### **PART 1 RAG_MODEL**

**# Python Libraries**

In [5]:
import PyPDF2
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import google.generativeai as genai

**# Configure Gemini API**

In [22]:
# Configure Gemini API
genai.configure(api_key="Your_API_Key")

**# Step 1: Extract P&L Data from PDF**

In [23]:
# Step 1: Extract P&L Data from PDF
def extract_pdf_data(pdf_path):
    with open("/content/SampleFinancial.pdf", "rb") as file: # Open in binary mode 'rb'
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

**# Step 2: Preprocess Data (Convert to Key-Value Pairs)**

In [24]:
def preprocess_data(text):
    #Extract key-value pairs (customize based on your PDF structure)
    data = {}
    lines = text.split("\n")
    for line in lines:
        if ":" in line:
            key, value = line.split(":", 1)
            data[key.strip()] = value.strip()
    return data

**# Step 3: Generate Embeddings and Store in FAISS**

In [25]:
def create_faiss_index(data):
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Embedding model
    keys = list(data.keys())
    values = list(data.values())

    # Generate embeddings for keys and values
    key_embeddings = model.encode(keys)
    value_embeddings = model.encode(values)

    # Combine embeddings
    embeddings = np.hstack((key_embeddings, value_embeddings))

    # Create FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, keys, values, model

**# Step 4: Retrieve Relevant Data**

In [26]:
def retrieve_data(query, index, keys, values, model, top_k=3):
    query_embedding = model.encode([query])[0]
    query_embedding = np.hstack((query_embedding, query_embedding))  # Match shape

    # Search FAISS index
    distances, indices = index.search(np.array([query_embedding]), top_k)
    results = [(keys[i], values[i]) for i in indices[0]]
    return results

**# Step 5: Generate Response Using Gemini API**

In [27]:
def generate_response(query, context):
    # Initialize Gemini model
    model = genai.GenerativeModel('gemini-1.5-flash')
    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    response = model.generate_content(prompt)
    return response.text

**# Example Usage**

In [28]:
# Example Usage
pdf_path = "/content/SampleFinancial.pdf"
text = extract_pdf_data(pdf_path)
data = preprocess_data(text)
index, keys, values, model = create_faiss_index(data)

**# Query the RAG Model**

In [51]:
query = """What was the gross book value of the assets?
    What specific assets and liabilities are measured at fair value on a recurring basis as of March 31, 2024?
Why do the carrying amounts of Fortrade receivables, trade payables, and other assets/payables maturing within one year approximate fair value?
Are there any assets or liabilities not measured at fair value on a recurring basis as of March 31, 2024? If so, how are they valued?
What was the gross book value of the assets retired during the three months and year ended March 31, 2024? What was their net book value?
What is the significance of the net book value being nil for the retired assets? Does this suggest they were fully depreciated?
    """
results = retrieve_data(query, index, keys, values, model)
context = "\n".join([f"{k}: {v}" for k, v in results])
response = generate_response(query, context)
print("Response:", response)

2025-01-28 00:22:31.045 200 POST /v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 7686.67ms


Response: Here are the answers based on the provided text:

**1. What was the gross book value of the assets?**

The text mentions several gross book values for retired assets:

* **Year ended March 31, 2023:** ₹1,918 crore
* **Three months ended March 31, 2024:** ₹181 crore
* **Year ended March 31, 2024:** ₹775 crore

The total gross book value of *retired* assets across both periods is not explicitly stated, but it can be calculated by summing these figures: ₹1,918 + ₹181 + ₹775 = ₹2,874 crore.  The total gross book value of *all* assets is not provided.


**2. What specific assets and liabilities are measured at fair value on a recurring basis as of March 31, 2024?**

Fortrade receivables, trade payables, and other assets and payables maturing within one year from the balance sheet date (March 31, 2024).


**3. Why do the carrying amounts of Fortrade receivables, trade payables, and other assets/payables maturing within one year approximate fair value?**

The text states this is due

**Interactive QA Bot Interface**

In [42]:
import streamlit as st
import PyPDF2
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import google.generativeai as genai

# Configure Gemini API
genai.configure(api_key="Your_API_Key")

# Step 1: Extract and Preprocess Data
def extract_and_preprocess(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    data = {}
    lines = text.split("\n")
    for line in lines:
        if ":" in line:
            key, value = line.split(":", 1)
            data[key.strip()] = value.strip()
    return data

# Step 2: Create FAISS Index
def create_faiss_index(data):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    keys = list(data.keys())
    values = list(data.values())
    key_embeddings = model.encode(keys)
    value_embeddings = model.encode(values)
    embeddings = np.hstack((key_embeddings, value_embeddings))
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, keys, values, model

# Step 3: Retrieve Data
def retrieve_data(query, index, keys, values, model, top_k=3):
    query_embedding = model.encode([query])[0]
    query_embedding = np.hstack((query_embedding, query_embedding))
    distances, indices = index.search(np.array([query_embedding]), top_k)
    results = [(keys[i], values[i]) for i in indices[0]]
    return results

# Step 4: Generate Response Using Gemini API
def generate_response(query, context):
    model = genai.GenerativeModel('gemini-1.5-flash')
    prompt = f"Context: {context}\n\nQuestion: {query}\nAnswer:"
    response = model.generate_content(prompt)
    return response.text

# Streamlit App
st.title("Financial QA Bot")

# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF with P&L Data", type="pdf")

# Input Query
query = st.text_input("Ask a question about the financial data:")

# Generate Button
if st.button("Generate"):
    if uploaded_file and query:
        # Extract and preprocess data
        data = extract_and_preprocess(uploaded_file)
        index, keys, values, model = create_faiss_index(data)

        # Retrieve relevant data
        results = retrieve_data(query, index, keys, values, model)
        context = "\n".join([f"{k}: {v}" for k, v in results])

        # Generate and display response
        response = generate_response(query, context)
        st.write("### Answer:")
        st.write(response)
        st.write("### Relevant Data:")
        st.write(context)
    else:
        st.warning("Please upload a PDF and enter a query.")

