In [6]:
!pip install langchain faiss-cpu chromadb sentence-transformers
!pip install ollama
!pip install langchain chromadb sentence-transformers ollama
!pip list | grep langchain
!pip install --upgrade langchain-ollama langchain langchain-community
!pip install -U langchain-huggingface


Collecting langchain
  Downloading langchain-0.3.24-py3-none-any.whl.metadata (7.8 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp312-cp312-macosx_14_0_arm64.whl.metadata (4.8 kB)
Collecting chromadb
  Downloading chromadb-1.0.7-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.9 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting langchain-core<1.0.0,>=0.3.55 (from langchain)
  Downloading langchain_core-0.3.56-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith<0.4,>=0.1.17 (from langchain)
  Downloading langsmith-0.3.37-py3-none-any.whl.metadata (15 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp312-cp312-macos

In [4]:
# %% [markdown]
# ## 1. Ingest PDFs, Embed & Build RetrievalQA (with forced-rebuild support)

# %%
import sys
import logging
from pathlib import Path
from shutil import rmtree
from tqdm.auto import tqdm

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

# ——— Logging & Debug Info ———
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logging.info(f"Python    : {sys.version.split()[0]}")
try:
    import langchain
    logging.info(f"LangChain : {langchain.__version__}")
except ImportError:
    logging.warning("LangChain not installed or version unknown")

# ——— Configuration ———
PDF_DIR          = Path("./")
CHROMA_DIR       = Path("./chroma_db_sha")
CHUNK_SIZE       = 200
CHUNK_OVERLAP    = 50
EMBED_MODEL_NAME = "all-MiniLM-L6-v2"
LLM_MODEL        = "deepseek-r1:1.5b-qwen-distill-q4_K_M"
RETRIEVE_K       = 4

# %% 
def ingest_and_build(rerun: bool = False):
    """
    1) Loads all PDFs in PDF_DIR
    2) Splits into chunks
    3) Embeds with HuggingFace
    4) Persists or reloads Chroma vector store,
       optionally wiping it on rerun=True
    5) Returns a RetrievalQA chain ready for use
    """
    # 1) Find all PDF files
    pdf_files = sorted(PDF_DIR.glob("*.pdf"))
    if not pdf_files:
        raise FileNotFoundError(f"No PDFs found in {PDF_DIR.resolve()!s}")
    logging.info(f"Found {len(pdf_files)} PDF(s) to load.")

    # 2) Load pages
    documents = []
    for pdf in tqdm(pdf_files, desc="Loading PDFs"):
        loader = PyPDFLoader(str(pdf))
        documents.extend(loader.load())
    logging.info(f"Total pages loaded: {len(documents)}")

    # 3) Split into chunks
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
    )
    chunks = splitter.split_documents(documents)
    logging.info(f"Split into {len(chunks)} chunks (size={CHUNK_SIZE}, overlap={CHUNK_OVERLAP})")

    # 4) Embed + Chroma
    embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)

    # — if rerun, delete any existing DB so we start fresh
    if rerun and CHROMA_DIR.exists():
        logging.info(f"Removing old Chroma directory at {CHROMA_DIR!s}")
        rmtree(CHROMA_DIR)

    if rerun or not CHROMA_DIR.exists():
        logging.info("Creating new Chroma store & embedding…")
        db = Chroma.from_documents(
            chunks,
            embedding=embeddings,
            persist_directory=str(CHROMA_DIR),
        )
        db.persist()
    else:
        logging.info("Loading existing Chroma store.")
        db = Chroma(
            persist_directory=str(CHROMA_DIR),
            embedding_function=embeddings
        )

    # count may be approximate
    try:
        count = db._collection.count()
    except Exception:
        count = "unknown"
    logging.info(f"Chroma contains ~{count} vectors")

    # 5) Build RetrievalQA
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": RETRIEVE_K})
    llm       = Ollama(model=LLM_MODEL, temperature=0.1)
    qa_chain  = RetrievalQA.from_chain_type(
        llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
    )
    logging.info("RetrievalQA chain ready.")
    return qa_chain

# %%
# Usage example:
# Set rerun=True to force deletion of old DB and full re-embed.
qa = ingest_and_build(rerun=True)


2025-05-11 21:33:32,294 INFO Python    : 3.12.7
2025-05-11 21:33:32,295 INFO LangChain : 0.3.24
2025-05-11 21:33:32,296 INFO Found 13 PDF(s) to load.


Loading PDFs:   0%|          | 0/13 [00:00<?, ?it/s]

2025-05-11 21:33:35,041 INFO Total pages loaded: 369
2025-05-11 21:33:35,051 INFO Split into 790 chunks (size=200, overlap=50)
2025-05-11 21:33:39,028 INFO Use pytorch device_name: mps
2025-05-11 21:33:39,028 INFO Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2025-05-11 21:33:42,737 INFO Creating new Chroma store & embedding…
2025-05-11 21:33:43,181 INFO Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
  db.persist()
2025-05-11 21:33:47,905 INFO Chroma contains ~790 vectors
2025-05-11 21:33:47,907 INFO RetrievalQA chain ready.


In [None]:
import html
import time
from IPython.display import display, Markdown, HTML, clear_output
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama

# ——— 1) Build the LLM Instance ———
llm = Ollama(model="deepseek-r1:1.5b-qwen-distill-q4_K_M", temperature=0.1)

# ——— 2) Define System + User Prompt ———
system_message = SystemMessagePromptTemplate.from_template(
    "You are a helpful AI assistant. Use ONLY the provided context to answer the user's question. "
    "If the answer is not in the context, say \"I don't know.\" Do not hallucinate."
)
user_message = HumanMessagePromptTemplate.from_template(
    """
Context:
---------------------
{context}
---------------------

Question: {question}

<think>
Explain how you arrive at the answer using step-by-step reasoning based on the context.
</think>

Answer:
Provide the final concise answer based only on the reasoning above.
"""
)
chat_prompt = ChatPromptTemplate.from_messages([system_message, user_message])

# ——— 3) Build or rebuild RetrievalQA chain ———
# (Assumes you have an existing retriever instance: `qa.retriever`)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=qa.retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": chat_prompt},
)

# ——— 4) Spinner HTML ———
spinner_html = """
<div style="display:flex;align-items:center">
  <div class="loader" style="
      border: 8px solid #f3f3f3;
      border-top: 8px solid #3498db;
      border-radius: 50%;
      width: 40px;
      height: 40px;
      animation: spin 1s linear infinite;
      margin-right:10px;
    "></div>
  <div><b>Thinking…</b></div>
</div>
<style>
@keyframes spin {
  0%   { transform: rotate(0deg); }
  100% { transform: rotate(360deg); }
}
</style>
"""

# ——— 5) Q&A helper with styled separation of reasoning & final answer ———
def answer_question(qa_chain, question: str):
    clear_output(wait=True)
    display(Markdown(f"**Q:** {html.escape(question)}\n"))

    # show spinner
    handle = display(HTML(spinner_html), display_id="spinner")
    resp = qa_chain.invoke({"query": question})
    handle.update(HTML(""))

    raw = resp["result"]
    srcs = resp.get("source_documents", [])

    # Parse out <think>...</think> and the text that follows
    think_html = ""
    answer_html = ""
    if "<think>" in raw and "</think>" in raw:
        _, rest = raw.split("<think>", 1)
        think_content, after = rest.split("</think>", 1)
        # Styled reasoning block
        think_html = f"""
        <div style="background:#f5f5f5;padding:10px;border-left:5px solid #999;margin:10px 0;">
          <strong>🤔 Reasoning:</strong><br>
          {html.escape(think_content).replace('\n','<br>')}
        </div>
        """
        # Extract the final answer (strip leading "Answer:" if present)
        answer_text = after.strip()
        if answer_text.lower().startswith("answer:"):
            answer_text = answer_text[len("answer:"):].strip()
        answer_html = f"""
        <div style="background:#e6f7ff;padding:15px;border-left:5px solid #1890ff;margin:10px 0;">
          <strong>✅ Final Answer:</strong><br>
          {html.escape(answer_text).replace('\n','<br>')}
        </div>
        """
    else:
        # Fallback: show everything as the answer
        answer_html = f"""
        <div style="background:#e6f7ff;padding:15px;border-left:5px solid #1890ff;margin:10px 0;">
          <strong>✅ Answer:</strong><br>
          {html.escape(raw).replace('\n','<br>')}
        </div>
        """

    # Display the blocks
    if think_html:
        display(HTML(think_html))
    display(HTML(answer_html))

    # Display sources
    if srcs:
        display(Markdown("### 📚 Sources"))
        for i, doc in enumerate(srcs, 1):
            page = doc.metadata.get("page", "unknown")
            snippet = (
                html.escape(doc.page_content[:200]).replace("\n", " ")
            )
            display(
                Markdown(
                    f"- **Doc {i}** (page {page}): “{snippet}…”"
                )
            )

# ——— 6) CLI Loop ———
print("Type questions below (or 'stop' to exit):")
while True:
    query = input("▶ ").strip()
    if query.lower() in ("stop", "exit", "quit"):
        clear_output(wait=True)
        print("Goodbye! 👋")
        break
    if not query:
        continue
    answer_question(qa, query)


**Q:** what are the main compontn of cloud computing


### 📚 Sources

- **Doc 1** (page 1): “Cloud.&quot; CSP is in charge of the infrastructure, including hardware,  software, networking, and physical security. • Customer Responsibility: Termed &quot;Security in the Cloud.&quot; Customers…”

- **Doc 2** (page 1): “customers are responsible for managing their data and applications. • Cloud Service Provider (CSP) Responsibility: Known as &quot;Security of the…”

- **Doc 3** (page 21): “cripple its performance. • Multiple cloud service requests are sent, each of which is designed to consume excessive memory and processing resources.…”

- **Doc 4** (page 6): “Reference: Above the Clouds: A Berkeley View of Cloud Computing, 2009. Resource Provisioning…”