In [None]:
%pip install --upgrade llama-index        # core
%pip install --upgrade llama-index-llms-google-genai  # Google / Gemini LLM integration
%pip install --upgrade llama-index-embeddings-google-genai  # embeddings via Google GenAI
%pip install --upgrade google-generativeai  # underlying Google SDK

In [None]:
import os
from pinecone import Pinecone
from llama_index.core import (
 Settings,
 VectorStoreIndex,
 SimpleDirectoryReader,
 KeywordTableIndex,)
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.query_engine import RouterQueryEngine,SubQuestionQueryEngine
from llama_index.core.question_gen import LLMQuestionGenerator
from llama_index.core.query_engine import SubQuestionQueryEngine

In [None]:
# ============================ 1) EMBEDDING / LLM LAYER ============================
# ============================ 2) DATA / INDEX LAYER ============================
# Keep LLM OFF while loading PDFs (prevents accidental calls)
# Set embedding BEFORE building Pinecone index
Settings.embed_model = GoogleGenAIEmbedding(
    model_name="models/text-embedding-004",
    api_key=""   # or your key directly
)

Settings.llm = None
try:
 docs = SimpleDirectoryReader("./GenAI/GenAI-NoteBooks/coffee_pages",required_exts=[".html"]).load_data()
except TypeError:
 docs = SimpleDirectoryReader("./GenAI/GenAI-NoteBooks/coffee_pages",file_exts=[".html"]).load_data()

kw_idx = KeywordTableIndex.from_documents(docs)

# Pinecone semantic index
pc = Pinecone(api_key="pcsk_3v68tN_L3G7scFvZJ8FtqsGh4T3yfHS86sXPfnojrAFUfx5D6XnDvHcWrYKy5T4CcRSZXs")
pc_index = pc.Index("coffeeindex")
vstore = PineconeVectorStore(pinecone_index=pc_index,text_key="text") # change if you used "page_content"
sem_idx = VectorStoreIndex.from_vector_store(vstore)

In [None]:
# Turn LLM ON now
Settings.llm = GoogleGenAI(
 model="gemini-2.5-flash",
 api_key="",
 temperature=0.2,
 max_tokens=2024,
)

In [None]:
# Wrap each index as a query engine (single-shot synthesis via response_mode="compact")
qe_sem = sem_idx.as_query_engine(similarity_top_k=5,response_mode="compact", llm=Settings.llm)
qe_kw = kw_idx.as_query_engine(response_mode="compact",llm=Settings.llm)

In [None]:
# Package as proper tools (use ToolMetadata, not dict)
tools = [
 QueryEngineTool(query_engine=qe_sem,metadata=ToolMetadata(name="semantic", description="General coffee KB(Pinecone)")),
 QueryEngineTool(query_engine=qe_kw,metadata=ToolMetadata(name="keyword", description="PDF keyword/headings lookup")),
]

In [None]:
# ============================ 3) QUERY LAYER============================
# (A) RouterQueryEngine -- route whole query to the best engine
router = RouterQueryEngine.from_defaults(
 query_engine_tools=tools,
 selector=LLMSingleSelector.from_defaults(llm=Settings.llm),
)
import nest_asyncio
nest_asyncio.apply()
print(router.query("Find entries mentioning ashwagandha in titles or headings.")) # likely routes to keyword
print(router.query("What is turmeric coffee?"))

In [None]:
%pip install llama-index-question-gen-llm

In [None]:
# (B) SubQuestionQueryEngine -- split, route parts, then merge
question_gen = LLMQuestionGenerator.from_defaults(llm=Settings.llm)
subq = SubQuestionQueryEngine.from_defaults(
 query_engine_tools=tools,
 question_gen=question_gen,
 llm=Settings.llm,
)
print(subq.query("Compare turmeric coffee vs saffron coffee and give a one-paragraph verdict."))