In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
loader = PyPDFLoader("vivek_personal.pdf")
safe_pdf_docs = loader.load()

In [None]:
safe_pdf_docs

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=500,
    chunk_overlap=50,
)
pdf_text_splits = text_splitter.split_documents(safe_pdf_docs)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [None]:
vector_store.add_documents(documents=pdf_text_splits)

In [None]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

retriever.invoke("what is viveks job description?")

In [None]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate(
    [
        ("system", "{context}"),
        ("user", "{question}"),
    ]
)
prompt

In [None]:
model = ChatGroq(model="gemma2-9b-it",temperature=0.0, max_tokens=5000)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
parser = StrOutputParser()

In [None]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])  

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough() } |
    prompt |
    model |
    parser
)

In [None]:
rag_chain.invoke("How much vivek contributes as SIPs")