In [3]:
%pip install --upgrade llama-index        # core
%pip install --upgrade llama-index-llms-google-genai  # Google / Gemini LLM integration
%pip install --upgrade llama-index-embeddings-google-genai  # embeddings via Google GenAI
%pip install --upgrade google-generativeai  # underlying Google z

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [6]:
import os, textwrap
from pinecone import Pinecone
from llama_index.core import Settings, VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding

In [8]:
# ============================ 1) EMBEDDING / LLM LAYER============================
# Match Pinecone vectors: text-embedding-004 → 768-d
Settings.embed_model = GoogleGenAIEmbedding(
 model_name="models/text-embedding-004",
 api_key=""
)
Settings.llm = GoogleGenAI(
 model="gemini-2.5-flash",
 api_key="",
 temperature=0.2, max_tokens=600,
)
# ====

In [16]:
# ============================ 2) DATA / INDEX LAYER (Pinecone)============================
pc = Pinecone(api_key="")
pc_index = pc.Index("coffeeindex")
vstore = PineconeVectorStore(pinecone_index=pc_index,text_key="text") # change if you used "page_content"
index = VectorStoreIndex.from_vector_store(vstore)
# uses Settings.embed_model
import nest_asyncio
nest_asyncio.apply()

In [17]:
# ============================ 3) QUERY LAYER (prompt templating AFTER retrieval) ============================
def ask_with_grounded_prompt(question: str, k: int = 5, per_source_chars: int = 900):
    """
    - Retrieval first (no LLM): top-k semantic results from Pinecone
    - Then ONE LLM call with a strict, grounded prompt:
        * "Use ONLY the CONTEXT"
        * fallback: "No support in retrieved context."
        * inline citations [S#] after claims
    - Returns (answer_text, sources_map) where:
        sources_map = {S#: {heading, source/url}}
    """

    # 3.1 Retrieve nodes (semantic) — ONE embedding call
    nodes = index.as_retriever(similarity_top_k=k).retrieve(question)

    # 3.2 Build enumerated CONTEXT and source map for your UI
    parts = []
    sources_map = {}

    for i, h in enumerate(nodes, start=1):
        tag = f"S{i}"
        md = h.node.metadata or {}

        heading = (
            md.get("heading")
            or md.get("title")
            or "Untitled"
        )

        src = (
            md.get("url")
            or md.get("source")
            or md.get("file_path")
            or md.get("doc_id")
            or "N/A"
        )

        snippet = h.node.get_content()

        # Trim snippet if per_source_chars > 0
        if per_source_chars:
            snippet = snippet[:per_source_chars]

        # Add formatted block
        parts.append(f"[{tag}] {heading}\n{snippet}")

        # Save metadata for UI
        sources_map[tag] = {"heading": heading, "source": src}

    # Join all parts into a single context block
    context = "\n\n---\n\n".join(parts) if parts else "(no results)"

    # 3.3 Strict, hallucination-resistant prompt
    prompt = f"""
You are a concise specialty-coffee expert.

RULES:
- Use ONLY the CONTEXT below.
- If the answer is not fully supported by the CONTEXT, reply exactly:
  "No support in retrieved context."
- Write 4–6 sentences max.
- After each factual claim, add an inline citation like [S1], [S2].
- Do NOT use outside knowledge.

QUESTION:
{question}

CONTEXT (numbered sources):
{context}

ANSWER (with inline [S#] citations):
""".strip()

    # 3.4 One-shot synthesis (ONE Gemini call)
    answer = Settings.llm.complete(prompt).text.strip()

    return answer, sources_map


# ---------------------------- EXAMPLE ----------------------------
ans, srcs = ask_with_grounded_prompt("What is turmeric coffee and ideal brew temperature?")

print("\n=== ANSWER ===\n", textwrap.fill(ans, 100))

print("\n=== SOURCES MAP (for UI) ===")
for tag, meta in srcs.items():
    print(f"{tag}: {meta['heading']} | {meta['source']}")


=== ANSWER ===
 No support in retrieved context.

=== SOURCES MAP (for UI) ===
