In [13]:
import os
from typing import List
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import google.generativeai as genai


In [14]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyBlvXS-P2zhMJ3HsXk57H75MXg56__Xwrk"
def load_pdf(file_path: str) -> List:
    loader = PyPDFLoader(file_path)
    pages = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_documents(pages)
    return chunks


In [15]:
def create_vector_store(chunks: List) -> FAISS:
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_documents(chunks, embeddings)
    # Save the vector store
    vector_store.save_local(VECTOR_STORE_PATH)
    return vector_store

def load_vector_store() -> FAISS:
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    if os.path.exists(VECTOR_STORE_PATH):
        try:
            vector_store = FAISS.load_local(
                VECTOR_STORE_PATH, 
                embeddings, 
                allow_dangerous_deserialization=True
            )
            return vector_store
        except (KeyError, AttributeError) as e:
            print(f"Error loading vector store: {e}")
            print("Deleting incompatible vector store and recreating...")
            import shutil
            shutil.rmtree(VECTOR_STORE_PATH)
            return None
    return None

In [16]:
def setup_rag():
    # Try to load existing vector store
    vector_store = load_vector_store()
    
    if vector_store is None:
        # If no existing vector store, create new one
        chunks = load_pdf(PDF_PATH)
        vector_store = create_vector_store(chunks)
    
    # Initialize Google's LLM with correct model name and convert system messages
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash", 
        temperature=0.7,
        convert_system_message_to_human=True
    )
    
    # Create RAG chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(),
        return_source_documents=True
    )
    return qa_chain

In [17]:
def query_rag(qa_chain, query: str):
    response = qa_chain({"query": query})
    return response["result"]

In [18]:
# Constants for file paths
VECTOR_STORE_PATH = "./vector_store"
PDF_PATH = "comprehensive-clinical-nephrology.pdf"

print("Creating new vector store...")
# Delete existing vector store if it exists
import shutil
if os.path.exists(VECTOR_STORE_PATH):
    shutil.rmtree(VECTOR_STORE_PATH)

# Initialize RAG system
qa_chain = setup_rag()

# Example query
query = "What are the common causes of chronic kidney disease?"
result = query_rag(qa_chain, query)
print(result)

Creating new vector store...
Based on the provided text, common causes of chronic kidney disease (CKD) include systemic hypertension, diabetes mellitus, cardiovascular disease, dyslipidemia, smoking, obesity/metabolic syndrome, hyperuricemia, and low socioeconomic status.  Exposure to nephrotoxins (NSAIDs, analgesics, traditional herbal remedies, heavy metals, lead) and older age are also listed as initiation factors.

Based on the provided text, common causes of chronic kidney disease (CKD) include systemic hypertension, diabetes mellitus, cardiovascular disease, dyslipidemia, smoking, obesity/metabolic syndrome, hyperuricemia, and low socioeconomic status.  Exposure to nephrotoxins (NSAIDs, analgesics, traditional herbal remedies, heavy metals, lead) and older age are also listed as initiation factors.



In [15]:
!pip install langchain-google-genai



In [None]:
def get_retriever_from_vector_store():
    """Load vector store and return retriever"""
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    
    if os.path.exists(VECTOR_STORE_PATH):
        vector_store = FAISS.load_local(
            VECTOR_STORE_PATH, 
            embeddings, 
            allow_dangerous_deserialization=True
        )
        return vector_store.as_retriever(search_kwargs={"k": 5})
    else:
        print("Vector store not found. Please create it first.")
        return None

def retrieve_documents(query: str):
    """Retrieve relevant documents from vector store"""
    retriever = get_retriever_from_vector_store()
    if retriever:
        docs = retriever.get_relevant_documents(query)
        print(f"Found {len(docs)} relevant documents for: '{query}'")
        for i, doc in enumerate(docs):
            print(f"\nDocument {i+1}:")
            print(f"Content: {doc.page_content[:300]}...")
            if hasattr(doc, 'metadata'):
                print(f"Metadata: {doc.metadata}")
        return docs
    return []

In [None]:
# Test the retriever directly
test_query = "chronic kidney disease causes"
retrieved_docs = retrieve_documents(test_query)