In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
# Extract text from a PDF file
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents    

In [4]:
import os
os.chdir("../")

In [5]:
extracted_data = load_pdf("data")

In [6]:
extracted_data

[Document(metadata={'producer': 'Acrobat Distiller 7.0 (Windows)', 'creator': 'PageMaker 7.0', 'creationdate': '2022-04-26T11:41:40+05:30', 'author': 'Kishore', 'moddate': '2022-04-27T16:31:02+05:30', 'title': 'book a4.pmd', 'rgid': 'PB:360539346_AS:11431281444442311@1747552340804', 'source': 'data\\VegetableCompendium.pdf', 'total_pages': 129, 'page': 0, 'page_label': '123'}, page_content='See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/360539346\nCompendium of Vegetable Crops Diseases\nBook · March 2022\nCITATIONS\n0\nREADS\n358\n12 authors, including:\nPratibha Sharma\n101 PUBLICATIONS\xa0\xa0\xa01,481 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nDinesh Singh\nICAR-Indian Institute of Sugarcane Research\n251 PUBLICATIONS\xa0\xa0\xa03,905 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nSanjeev Sharma\nICAR-Central Potato Research Institute\n231 PUBLICATIONS\xa0\xa0\xa02,913 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nPranab Dutta\nCentral Agricultural

In [7]:
len(extracted_data)

129

In [8]:
# Split the documents into smaller chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
    )
    texts_chunk = text_splitter.split_documents(extracted_data)
    return texts_chunk

texts_chunks = text_split(extracted_data)

In [1]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embeddings(model_name= "sentence-transformers/all-MiniLM-L6-v2"):
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    return embeddings

embeddings = download_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name=model_name)
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
vector = embeddings.embed_query("What is the impact of climate change on agriculture?")
vector

  return forward_call(*args, **kwargs)


[-0.005426445975899696,
 0.06126094609498978,
 0.03586757928133011,
 0.021354349330067635,
 0.09003327786922455,
 0.0006526417564600706,
 -0.07494443655014038,
 -0.04775404930114746,
 -0.007960768416523933,
 0.025862468406558037,
 0.08441580086946487,
 -0.03246045857667923,
 -0.026120813563466072,
 -0.007448591757565737,
 -0.024101344868540764,
 0.0033107884228229523,
 -0.041675325483083725,
 0.0443352572619915,
 -0.05857554078102112,
 -0.11892368644475937,
 -0.014727404341101646,
 0.04543473199009895,
 -0.07884199917316437,
 -0.017768539488315582,
 -0.04913901910185814,
 -0.04881063476204872,
 -0.06332499533891678,
 0.0250377357006073,
 -0.10646843910217285,
 0.018779331818223,
 -0.0796935111284256,
 0.1182025820016861,
 0.03861185908317566,
 0.03420533239841461,
 -0.06550925970077515,
 0.03129710629582405,
 0.10800140351057053,
 -0.005137470550835133,
 -0.013726222328841686,
 -0.0028595654293894768,
 -0.036526888608932495,
 -0.0893489345908165,
 0.05133294314146042,
 -0.0756948441267

In [11]:
len(vector)

384

In [20]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

import os
# Replace with your OpenRouter API key
load_dotenv()  # Load from .env file
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0.7)


In [4]:
from pinecone import Pinecone
pinecone_api_key = PINECONE_API_KEY

pc = Pinecone(api_key=PINECONE_API_KEY)

In [14]:
pc

<pinecone.pinecone.Pinecone at 0x1cc3d70ec40>

In [16]:
from pinecone import ServerlessSpec 

index_name = "agri-bot"

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=384,  # Dimension of the embeddings
        metric= "cosine",  # Cosine similarity
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )


index = pc.Index(index_name)

In [17]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore.from_documents(
    documents=texts_chunks,
    embedding=embeddings,
    index_name=inde_name,
)


In [5]:
# for loading the index from existing data
# Uncomment the following lines if you want to load an existing index instead of creating a new one
inde_name = "agri-bot"  # Ensure this matches the index name you created    
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore.from_existing_index(
    embedding=embeddings,
    index_name=inde_name,
)


In [None]:
# add more data in existing index
# def add_data_to_index(data):
#     vector_store.add_documents(data=[])

In [6]:
# add the vector store to the retriever
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3}) 

In [7]:
retriever.invoke(" What is the impact of climate change on agriculture?")  # Example query

  return forward_call(*args, **kwargs)


[Document(id='b1c7c766-2572-41e5-ab6e-97967dc6c635', metadata={'author': 'Kishore', 'creationdate': '2022-04-26T11:41:40+05:30', 'creator': 'PageMaker 7.0', 'moddate': '2022-04-27T16:31:02+05:30', 'page': 7.0, 'page_label': '2', 'producer': 'Acrobat Distiller 7.0 (Windows)', 'rgid': 'PB:360539346_AS:11431281444442311@1747552340804', 'source': 'data\\VegetableCompendium.pdf', 'title': 'book a4.pmd', 'total_pages': 129.0}, page_content='management has been taken in real application in agriculture particularly for vegetable production.\nI'),
 Document(id='686a1515-2d81-4067-a1b2-0cea7bc321f5', metadata={'author': 'Kishore', 'creationdate': '2022-04-26T11:41:40+05:30', 'creator': 'PageMaker 7.0', 'moddate': '2022-04-27T16:31:02+05:30', 'page': 8.0, 'page_label': '3', 'producer': 'Acrobat Distiller 7.0 (Windows)', 'rgid': 'PB:360539346_AS:11431281444442311@1747552340804', 'source': 'data\\VegetableCompendium.pdf', 'title': 'book a4.pmd', 'total_pages': 129.0}, page_content='management has b

In [8]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [9]:
system_prompt = (
    "You are a helpful Agriculture assistant that provides information based on the provided documents. "
    "Use the information from the documents to answer the user's questions accurately and concisely."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [33]:
answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,answer_chain)


In [11]:
# Example usage of the RAG chain
query = "What is damping-off in agriculture?"
response = rag_chain.invoke({"input": query})
print(response['answer'])  # Print the response from the RAG chain

  return forward_call(*args, **kwargs)


Damping-off is a disease that causes the death of seedlings before or after they emerge from the soil.  It's a significant problem for farmers, particularly those raising tomato seedlings, with losses ranging from 5% to 80%.


In [17]:
from langchain.memory import ConversationBufferWindowMemory, ChatMessageHistory

In [36]:
async def conversation_QAchain():
    retriever = vector_store.as_retriever(search_kwargs={"k": 2})
    message_history = ChatMessageHistory()
    chat_memory = ConversationBufferWindowMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True
        )
    conversation_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=answer_chain
        )

In [None]:
chain = await conversation_QAchain()
query = "What is damping-off in agriculture?"
response = rag_chain.invoke({"input": query})
print(response['answer']) 

  return forward_call(*args, **kwargs)


Damping-off is a disease that causes the death of seedlings, before or after they emerge from the soil.  It's a significant problem for farmers, particularly those raising tomato seedlings.  The disease can affect 5-80% of seedlings.


In [39]:
query = "How do I manage it?"
response = rag_chain.invoke({"input": query})
print(response['answer'])

  return forward_call(*args, **kwargs)


Damping-off management requires an integrated disease management (IDM) approach using both preventive and curative methods.  Preventive measures include sowing seeds in well-drained, prepared raised seedbeds; avoiding overcrowding, excessive shade, overwatering, over-fertilization, and excessive farm yard manure or nitrogen fertilizer; and transplanting disease-free seedlings.
