### RAG Application

In [None]:
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model = 'gemini-1.5-flash'
)

response = model.invoke('Hello')
response

In [None]:
from langchain_community.document_loaders.pdf import PyPDFLoader

pdf_loader = PyPDFLoader(
    file_path='Arrora_Company_Internal_Info.pdf'
)

docs = pdf_loader.load()

docs

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 50)
chunks = splitter.split_documents(docs)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001"
)

In [None]:
from pinecone import Pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import os


pinecone_api_key = os.environ.get("PINECONE_API_KEY")
pc = Pinecone(api_key=pinecone_api_key)

index_name = "arrora-company-internal-info"  # change if desired

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

index = pc.Index(index_name)

# Pinecone Vector Store
vector_store = PineconeVectorStore(
    index = index,
    embedding= embeddings
)

# vector_store.add_documents(chunks)

In [None]:
retriever = vector_store.as_retriever(search_type = 'similarity', search_kwargs = {'k' : 3})

In [None]:
response = retriever.invoke("What is the leave policy?")

In [None]:
print('\n\n'.join(context.page_content for context in response))

In [None]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template= """"
    You are an helpful assistant.
    Answer the user's query from the given documents or provided context.
    If the answer not present inside the provided text then instead of giving random answer just say "I don't know".
    
    Provided Context: 
    {context}
    
    User's query:
    {query}
    """,
    input_variables= ['context', 'query']
)

In [None]:
from langchain.schema.runnable import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_context(reletative_docs):
    return '\n\n'.join(doc.page_content for doc in reletative_docs)


parallel_chain = RunnableParallel(
    {
        'context': retriever | RunnableLambda(format_context),
        'query': RunnablePassthrough()
    }
)

In [None]:
parser = StrOutputParser()

rag_chain = parallel_chain | prompt | model | parser

In [None]:
response = rag_chain.invoke("Tell me more about this company")

print(response)