In [11]:
import os
import dotenv
from pathlib import Path

from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders import (
    WebBaseLoader,
    PyPDFLoader,
    Docx2txtLoader,
)
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, WebBaseLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.embeddings import HuggingFaceEmbeddings

In [6]:
doc_paths = [
    r"docs\test_rag.docx",
    r"docs\test_rag.pdf",
]

docs = []
for doc_file in doc_paths:
    file_path = Path(doc_file)

    try:
        if file_path.suffix == ".pdf":
            loader = PyPDFLoader(file_path)
        elif file_path.suffix == ".docx":
            loader = Docx2txtLoader(file_path)
        elif file_path.suffix in [".txt", ".md"]:
            loader = TextLoader(file_path)
        else:
            print(f"Unsupported document type: {file_path}")
            continue

        docs.extend(loader.load())

    except Exception as e:
        print(f"Error loading document {file_path}: {e}")

# Load URL document
url = "https://docs.streamlit.io/develop/quick-reference/release-notes"
try:
    loader = WebBaseLoader(url)
    docs.extend(loader.load())

except Exception as e:
    print(f"Error loading document from {url}: {e}")


In [7]:
docs

[Document(metadata={'source': 'docs\\test_rag.docx'}, page_content='My favorite food is margarita pizza.\n\nThere are 47588 bottles in the truck.'),
 Document(metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2024-09-15T19:40:36+02:00', 'msip_label_1cf2ba15-c468-47c8-b178-cba8acf110ec_siteid': 'eb25818e-5bd5-49bf-99de-53e3e7b42630', 'msip_label_1cf2ba15-c468-47c8-b178-cba8acf110ec_method': 'Standard', 'msip_label_1cf2ba15-c468-47c8-b178-cba8acf110ec_enabled': 'True', 'author': 'Domingo Domènech Enric (ERNI)', 'moddate': '2024-09-15T19:40:36+02:00', 'source': 'docs\\test_rag.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='My favorite food is margarita pizza. \nThere are 47588 bottles in the truck.'),

In [8]:
# Split docs

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=5000,
    chunk_overlap=1000,
)
document_chunks = text_splitter.split_documents(docs)

In [12]:
# Tokenize and load the documents to the vector store

# Instantiate Hugging Face Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create vector store
vector_db = Chroma.from_documents(
    documents=document_chunks,
    embedding=embeddings,
)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [13]:
# Retrieve

def _get_context_retriever_chain(vector_db, llm):
    retriever = vector_db.as_retriever()
    prompt = ChatPromptTemplate.from_messages([
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
        ("user", "Generate a search query for document retrieval."),
    ])
    retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
    return retriever_chain

In [14]:
def get_conversational_rag_chain(llm):
    retriever_chain = _get_context_retriever_chain(vector_db, llm)

    prompt = ChatPromptTemplate.from_messages([
        ("system",
        """You are a helpful assistant. You will have to answer to user's queries.
        You will have some context to help with your answers, but now always would be completely related or helpful.
        You can also use your knowledge to assist answering the user's queries.\n
        {context}"""),
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
    ])
    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)

    return create_retrieval_chain(retriever_chain, stuff_documents_chain)

In [16]:
# Augmented Generation

llm_stream = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=os.getenv("GOOGLE_GEMINI_API"),
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

messages = [
    {"role": "user", "content": "Hi"},
    {"role": "assistant", "content": "Hi there! How can I assist you today?"},
    {"role": "user", "content": "What is the latest version of Streamlit?"},
]
messages = [HumanMessage(content=m["content"]) if m["role"] == "user" else AIMessage(content=m["content"]) for m in messages]

conversation_rag_chain = get_conversational_rag_chain(llm_stream)

response_message = "*(RAG Response)*\n"
for chunk in conversation_rag_chain.pick("answer").stream({
    "messages": messages[:-1],
    "input": messages[-1].content
}):
    response_message += chunk
    print(chunk, end="", flush=True)

messages.append({"role": "assistant", "content": response_message})

The latest version of Streamlit is 1.49.0, released on August 26, 2025.


In [17]:
prompt = "Tell me something about Google Gemini"

for chunk in llm_stream.stream(prompt):
    print(chunk.content, end="", flush=True)

Okay, here's a summary of key things to know about Google Gemini:

**What it is:**

*   **Google's most advanced AI model:** Gemini is a family of multimodal AI models developed by Google AI. It's designed to be more powerful and versatile than previous models like LaMDA.
*   **Multimodal from the ground up:** Unlike some models that add multimodality as an afterthought, Gemini is natively multimodal. This means it's trained to understand and reason across different types of information, including text, code, audio, images, and video, simultaneously.
*   **Three versions:**
    *   **Gemini Ultra:** The largest and most capable model, intended for highly complex tasks.
    *   **Gemini Pro:** A more balanced model, designed for a wide range of tasks and scalable deployment. Powers Google's Bard (now Gemini) chatbot.
    *   **Gemini Nano:** A smaller, efficient model designed for on-device tasks on smartphones and other devices.

**Key Capabilities and Features:**

*   **Advanced Reaso