In [1]:
!pip --quiet install pinecone pymongo transformers pypdf langchain-community langchain langchain_core google-generativeai langchain-google-genai python-dotenv PyPDF2

In [2]:
import pymongo
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from langchain_community.document_loaders import PyPDFLoader
import google.generativeai as genai
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
uri = ""
# Create a new client and connect to the server
client = MongoClient(uri)
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [4]:
DB_NAME = ""
db = client[DB_NAME]

In [5]:
COLLECTION_NAME = ""
collection = db[COLLECTION_NAME]

ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"

In [17]:
GOOGLE_API_KEY = ""

In [18]:
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [8]:
loader = PyPDFLoader("Constitution-of-Nepal_2072_Eng_www.moljpa.gov_.npDate-72_11_16.pdf")
data = loader.load()

In [9]:
# Split the data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
docs = text_splitter.split_documents(data)

In [57]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
vector_search = MongoDBAtlasVectorSearch.from_documents(
                                documents=docs,
                                embedding= embeddings,
                                collection=collection,
                                index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME
                                                    )

In [69]:
from langchain_core.runnables import RunnablePassthrough

# Create prompt and RAG workflow
prompt = PromptTemplate.from_template("""
    Answer the question  from the provided context in as details as possible in bullet points, make sure to provide the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
""")


model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.4)

#prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

retriever = vector_search.as_retriever(
                        search_type = "similarity",
                        search_kwargs = {"k": 1}
)

In [70]:
retrieved_docs = retriever.invoke("Agriculture")

In [71]:
print(retrieved_docs)

[Document(metadata={'_id': ObjectId('66e3dcf64f83eab26dc3ef59'), 'embedding': [0.04362678527832031, -0.0013764278264716268, -0.07246433198451996, 0.022717710584402084, 0.04777597635984421, 0.05904601141810417, 0.023352304473519325, -0.014690672978758812, 0.02882307954132557, 0.07329129427671432, -0.04267140105366707, 0.00979599542915821, -0.03556458279490471, 0.04755822941660881, -0.0003133683931082487, -0.021175513043999672, 0.03383037447929382, 0.04851040244102478, 0.000774181738961488, -0.05524386465549469, 0.02793959714472294, 0.02852463163435459, 0.00022388229263015091, 0.0008089441689662635, 0.021841498091816902, -0.01748763769865036, 0.010773986577987671, -0.060149185359478, -0.020066479220986366, 0.02672751061618328, -0.026434335857629776, 0.016210177913308144, -0.08172263205051422, 0.016354776918888092, 0.02550257369875908, -0.04222916066646576, -0.012258462607860565, 0.0010909774573519826, -0.01940288580954075, 0.028447167947888374, 0.0016348928911611438, -0.01257173810154199

In [72]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate

# Assuming prompt template requires a 'context' and 'question'
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="Based on the following context:\n{context}\nAnswer the question: {question}"
)

rag_chain = (
   {
      "context": retriever,
      "question": RunnablePassthrough()
   }
   | prompt_template
   | model
   | StrOutputParser()
)

In [74]:
question = "what is about Media?"
answer = rag_chain.invoke(question)
print(answer)

The context mentions that the state should make necessary provisions to make mass media fair, healthy, impartial, decent, responsible and professional.
