# 1) OllamaLLm and HuggingFaceEmbedding: mistralai/Mistral-7B-Instruct-v04

## Document Loader
    pdf loader : langchain inbuilt document loader 

In [None]:
from langchain_community.document_loaders import PyPDFLoader
file_path = ("Leave_Policy_2024.pdf")
loader = PyPDFLoader(file_path)
pages = loader.load_and_split()
len(pages)


## Split
    smaller chunks

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)
len(splits)


In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

## Create vector store
    stores embeddings of Documents

In [None]:
from langchain_huggingface import Hu

In [None]:
## Locally loading embedding
### Still pending --------------------------->>>>>>>>>>>>>>
# new_embeddings = HuggingFaceEmbeddings(cache_folder= r"C:\Users\30078206\Downloads\sentence-transformersall-mpnet-base-v2")


In [None]:
## Not local
new_embeddings = HuggingFaceEmbeddings(model_name= "sentence-transformers/all-mpnet-base-v2")
vectorstore = Chroma.from_documents(documents=splits, embedding=new_embeddings)
vectorstore

## Create retriever

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity")

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [None]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

### LLM Used:

In [None]:
# !ollama pull mistral

In [None]:
# LLM from Ollama
local_model = "mistral"
llm = ChatOllama(model=local_model)

### Creating Prompt

In [None]:

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)


### New Terms:
- Multiqueryretriever: 
    - automates process of tuning
    - to generate multiple queries from different perspective 
    - for each query- returns relevant documents,, takes union across all 
    - Overcomes the limitation of distance based retrieval


In [None]:
retriever = MultiQueryRetriever.from_llm(
    retriever, 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [None]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
questions = chain.invoke("Make a list of questions from the document")

In [None]:
import re

In [None]:
answer =re.sub(r'[0-9]','',questions)
question_list = answer.split('\n')
question_list

In [None]:
chain.invoke(question_list[0])

In [None]:
''' Taking too much time'''
# answer_list = []
# for question in question_list:
#     ans = chain.invoke(question)
#     answer_list.append(ans)

In [None]:
print("ANSWERS-----")
# print(answer_list)

In [None]:
question_bank = [
    "Give me a summary of the document",
    "Tell me all about the leaves",
    "Make a list of questions from the document"
]

--------------------------------------------(2)--------------------------------------------