In [1]:
%pip install --upgrade llama-index        # core
%pip install --upgrade llama-index-llms-google-genai  # Google / Gemini LLM integration
%pip install --upgrade llama-index-embeddings-google-genai  # embeddings via Google GenAI
%pip install --upgrade google-generativeai  # underlying Google SDK


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A n

In [2]:
%pip install pinecone 


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
# ---------------- IMPORTS (all in one place) ----------------
import os
from pinecone import Pinecone
from llama_index.core import Settings, VectorStoreIndex, get_response_synthesizer
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.vector_stores.types import MetadataFilters, ExactMatchFilter

In [9]:
# ================= 1) EMBEDDING / LLM LAYER =================
# Must match your Pinecone dimensions (004 → 768-d)

Settings.embed_model = GoogleGenAIEmbedding(
    model_name="models/text-embedding-004",
    api_key="",
)

Settings.llm = GoogleGenAI(
    model="gemini-2.5-flash",
    api_key="",
    temperature=0.2,
    max_tokens=1024,
)

In [10]:
# =================== 2) DATA / INDEX LAYER (Pinecone only) ===================

INDEX_NAME = "coffeeindex"
TEXT_KEY   = "text"       # change to "page_content" if you stored text under that
HEADING_KEY = "heading"   # or "headings" if that's your metadata key

pc = Pinecone(api_key="")
pc_index = pc.Index(INDEX_NAME)

vstore = PineconeVectorStore(
    pinecone_index=pc_index,
    text_key=TEXT_KEY,
)

index = VectorStoreIndex.from_vector_store(vstore)   # uses Settings.embed_model
import nest_asyncio
nest_asyncio.apply()


# =================== 3) QUERY LAYER ===================

# --- A) Multi-query: expand into paraphrases, fuse, single-shot synthesis ---
def rag_multiqueury(question: str):
    sem_ret = index.as_retriever(similarity_top_k=5)

    fused = QueryFusionRetriever(
        retrievers=[sem_ret],
        similarity_top_k=5,
        mode="reciprocal_rerank",
        num_queries=4,      # LLM creates ~3 paraphrases + original
        use_async=False,
    )

    synth = get_response_synthesizer(
        llm=Settings.llm,
        response_mode="compact"
    )

    qe = RetrieverQueryEngine.from_args(
        retriever=fused,
        response_synthesizer=synth
    )

    return qe.query(question)

In [15]:
# --- B) SelfQuery (headings): infer a heading term → filter by Pinecone metadata, then answer ---

def rag_selfquery_by_heading(question: str):
    # Ask LLM for a concise heading/section term (plain text, minimal cost)
    prompt = f"From this question, guess the most relevant document heading in 2-4 words (no quotes):\n{question}"
    heading_term = Settings.llm.complete(prompt).text.strip()
    print(heading_term)

    filters = MetadataFilters(filters=[
        ExactMatchFilter(key=HEADING_KEY, value=heading_term)
    ])

    ret = index.as_retriever(similarity_top_k=5, filters=filters)

    synth = get_response_synthesizer(
        llm=Settings.llm,
        response_mode="compact",
    )

    qe = RetrieverQueryEngine.from_args(
        retriever=ret,
        response_synthesizer=synth
    )

    return qe.query(question)


# --------------------------- EXAMPLES ---------------------------
print(rag_multiqueury("What is Ashwagandha Coffee?"))
print(rag_selfquery_by_heading("Ashwagandha about it and benefits?"))

Empty Response
Ashwagandha Benefits
Empty Response
