
**Installs the necessary Python packages for the project, including libraries for AI, vector storage, and PDF handling.**

In [None]:
!pip install -U -q "google-generativeai>=0.7.2"
!pip install faiss-cpu
!pip install langchain-community
!pip install langchain
!pip install PyPDF2
!pip install faiss-cpu
!pip install langchain_google_genai



**Imports the required libraries. The warnings line hides any non-critical warnings that might clutter the output.**

In [None]:
import google.generativeai as genai
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
import os
from PyPDF2 import PdfReader
import warnings
warnings.filterwarnings("ignore")



In [None]:
# Retrieves the API key securely from Colab’s user data storage.

from google.colab import userdata
API_KEY = userdata.get('API_KEY')


In [None]:
# Sets the API key as an environment variable and configures the google.generativeai library to use the specified model for AI generation.

os.environ['GOOGLE_API_KEY'] = API_KEY
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('gemini-1.5-flash')

In [None]:
# Creates a file upload widget to allow users to upload a PDF file in Colab.

import io
import ipywidgets as widgets
from IPython.display import display
from PyPDF2 import PdfReader

upload = widgets.FileUpload(accept='.pdf', multiple=False)

display(upload)



FileUpload(value={}, accept='.pdf', description='Upload')

In [None]:
# Reads the uploaded PDF file, calculates the number of pages, and displays this information.

uploaded_file = list(upload.value.values())[0]
pdf_content = uploaded_file['content']

pdf_reader = PdfReader(io.BytesIO(pdf_content))
num_pages = len(pdf_reader.pages)
print(f"Uploaded PDF has {num_pages} pages.")

Uploaded PDF has 19 pages.


In [None]:
# Extracts text from each page of the PDF and stores it as a single string (raw_text).

from typing_extensions import Concatenate
raw_text = ''
for i, page in enumerate(pdf_reader.pages):
    content = page.extract_text()
    if content:
        raw_text += content

In [None]:
# Displays the extracted text for review.

raw_text

" \n \nAugust 03, 2023 \nTo, \nNational Stock Exchange of India Limited  \nExchange Plaza, 5th Floor, \nPlot No. C/1, G Block, \nBandra – Kurla Complex, \nBandra (E), Mumbai -400 051 \n \nSymbol: SJS  BSE Limited  \nCorporate Relationship Department, \n2nd Floor, New Trading Wing, \nRotunda Building, P.J. Towers, \nDalal Street, Mumbai – 400 001 \n \nScrip Code: 543387  \n \nISIN: INE284S01014 \n \nDear Sir/Madam,   \n \nSubject: Transcripts of Analysts/Investor Meet/ Earnings Call of the Company pertaining to Q1 of FY \n2023-24 \nPlease find enclosed the transcripts of the Analysts/Investor Meet/ Earnings Call of Q1 FY 2023-24 held \non July 27, 2023. \n \nYou are requested to kindly take the same on record. \n \nThanking you. \nYours faithfully, \nFor S.J.S. Enterprises Limited \n \n \n \n_______________________ \nThabraz Hushain W.  \nCompany Secretary and Compliance Officer \nMembership No.: A51119 \n \nEncl: As above \n  \n \nSJS Enterprises Limited  \nQ1 FY202 4 Earnings Conferen

In [None]:
# Splits the PDF text into chunks that are small enough for efficient processing by language models.
text_splitter = CharacterTextSplitter(
    separator = "\n\n",
    chunk_size = 800,
    chunk_overlap  = 200,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [None]:
len(texts)

1

In [None]:
# Imports additional libraries for embeddings and language model processing.
from langchain.llms import GooglePalm
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [None]:
# Converts the text chunks into embeddings (vector representations) to make them searchable.
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
document_search = FAISS.from_texts(texts, embeddings)
document_search


<langchain_community.vectorstores.faiss.FAISS at 0x7918037c20e0>

In [None]:
# Initializes the chat model (ChatGoogleGenerativeAI) and loads a question-answering chain for document retrieval.
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", task_type="retrieval_document")

from langchain.chains.question_answering import load_qa_chain

llm = ChatGoogleGenerativeAI(model="gemini-pro", convert_system_message_to_human=True)
chain = load_qa_chain(llm, chain_type="stuff")




In [None]:
#  Accepts a user question, searches for relevant text chunks, and provides an answer using the language model.

query = input("Please enter question: ")
docs = document_search.similarity_search(query)
chain.run(input_documents=docs, question=query)

Please enter question: tell me management name 


'- K.A. Joseph\n- Sanjay Thapar\n- Mahendra Naredi\n- Devanshi Dhruva'

**Note**


1.   go to colab left side secret key section
2.   add new secret  
3.   Give secret key name - API_KEY
4.   Value (API Key) - AIzaSyCAZjqG6AMOEsu1OvI72s9Wg6EUeB5CnVM



