In [None]:
!pip install -q langchain==0.1.7 faiss-cpu openai pypdf tiktoken fsspec==2024.10.0

In [None]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

In [None]:


# ✅ Set OpenAI API Key
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")  # Stored in Kaggle secrets

# ✅ Load and split PDFs
def load_and_split_pdfs(pdf_paths):
    docs = []
    for path in pdf_paths:
        loader = PyPDFLoader(path)
        docs.extend(loader.load())
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    split_docs = splitter.split_documents(docs)
    return split_docs

# ✅ Build vector store
def build_vector_store(documents):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents, embeddings)
    vectorstore.save_local("faiss_index")
    return vectorstore

# ✅ Load RAG chain
def load_rag_chain():
    vectorstore = FAISS.load_local("faiss_index", OpenAIEmbeddings())
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    return qa_chain

# ✅ Ask bot
def ask_bot(query):
    qa_chain = load_rag_chain()
    response = qa_chain({"query": query})
    return response['result']

# ✅ Run everything
pdf_files = ["pmkisan.pdf", "crop_insurance.pdf"]  # Change to your actual filenames
docs = load_and_split_pdfs(pdf_files)
build_vector_store(docs)

# ✅ Ask a question
query = "How much subsidy is provided for wheat crop insurance?"
print("Q:", query)
print("A:", ask_bot(query))


In [None]:
# ✅ Set your OpenAI API key (Add this to Kaggle Secrets!)
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# ✅ Load PDFs and split
def load_and_split_pdfs(pdf_paths):
    docs = []
    for path in pdf_paths:
        loader = PyPDFLoader(path)
        docs.extend(loader.load())
    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    return splitter.split_documents(docs)

# ✅ Build and save vector store
def build_vector_store(documents):
    embeddings = OpenAIEmbeddings()
    vectordb = FAISS.from_documents(documents, embeddings)
    vectordb.save_local("faiss_index")
    return vectordb

# ✅ Load RAG chain
def load_rag_chain():
    db = FAISS.load_local("faiss_index", OpenAIEmbeddings())
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain

# ✅ Ask question
def ask_bot(question):
    chain = load_rag_chain()
    return chain.run(question)

# ✅ Run everything
pdf_files = ["pmkisan.pdf", "crop_insurance.pdf"]  # Upload these to your Kaggle Notebook
docs = load_and_split_pdfs(pdf_files)
build_vector_store(docs)

# ✅ Test it
print("Q: What is the eligibility for PM Kisan?")
print("A:", ask_bot("What is the eligibility for PM Kisan?"))