### **RAG with PDF**

In [8]:
from langchain_mistralai import ChatMistralAI
from langchain_mistralai import MistralAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

import getpass
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
# Load the API key from the environment
def get_mistral_api_key():
    key = os.getenv("MISTRAL_API_KEY")
    if not key:
        key = getpass.getpass("Enter your Mistral API key: ")
        os.environ["MISTRAL_API_KEY"] = key
    return key

# Get the Mistral API key
mistral_api_key = get_mistral_api_key()

In [3]:
loader = PyMuPDFLoader("/Users/Shatten/Edu/Python/MLOps/p_eBook.pdf")

In [4]:
pages = loader.load_and_split()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
chunks = text_splitter.split_documents(pages)

In [10]:
# embeddings = MistralAIEmbeddings(
#     model="mistral-embed",
# )
embeddings = OllamaEmbeddings(model='nomic-embed-text')

In [11]:
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings)

In [12]:
retriever = vectorstore.as_retriever()

In [13]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [14]:
llm = ChatMistralAI(model="mistral-small-2501", temperature=0, max_retries=2)

In [15]:
template = """SYSTEM: You are a question answer bot.
                Be factual in your response.
                Respond to the following question: {question} only from 
                the below context : {context}.
                If you don't know the answer, just say that you don't know.
          """
prompt = PromptTemplate.from_template(template)

In [16]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser ()
)

In [17]:
chain.invoke("What is the key steps of career growth?")

'Based on the provided context, the key steps of career growth related to "meaningful projects" are:\n\n1. **Working on projects**: This step involves deepening your skills, building a portfolio, and creating impact.'

### **RAG with webpages**

In [18]:
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [19]:
URL = "https://www.theverge.com/2024/4/18/24133808/meta-ai-assistant-llama-3-chatgpt-openai-rival"

In [20]:
loader = WebBaseLoader(URL)

In [23]:
chain.invoke("What is company’s next major AI model?")

"I don't know. The provided context does not mention any specific information about a company's next major AI model."