<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/AAI_RAG_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain_core langchain_community langgraph langchain_google_genai -q
!pip install chromadb tiktoken -q

In [None]:
import google.generativeai as genai
import time
import json
from google.colab import userdata # Keep this as per your instruction

# --- API Key Setup (as provided by you, directly used) ---
GOOGLE_API_KEY = userdata.get('GEMINI')
if GOOGLE_API_KEY:
    genai.configure(api_key=GOOGLE_API_KEY)
    print("Google Generative AI configured successfully using Colab Secrets.")
else:
    print("WARNING: GOOGLE_API_KEY not found in Colab Secrets. Please ensure 'GEMINI' secret is set.")
    print("API calls will likely fail. Proceeding with unconfigured API.")

# --- Agent Configuration ---
class AgentConfig:
    LLM_MODEL_NAME: str = "gemini-2.5-flash" # As specified by you

# Initialize the Gemini model for general responses and agentic decisions
try:
    AGENTIC_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    RESPONDER_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    print(f"Gemini model '{AgentConfig.LLM_MODEL_NAME}' initialized for agentic and response generation.")
except Exception as e:
    print(f"ERROR: Failed to initialize Gemini model. Please check your API key and model name. Error: {e}")
    # Fallback to dummy models if Gemini initialization fails
    AGENTIC_MODEL = None
    RESPONDER_MODEL = None

Google Generative AI configured successfully using Colab Secrets.
Gemini model 'gemini-2.5-flash' initialized for agentic and response generation.


In [None]:
# Assuming you have the following setup already executed in your Colab notebook:
from google.colab import userdata
import google.generativeai as genai
import os # Import os module to set environment variables

# --- API Key Setup ---
GOOGLE_API_KEY_GEMINI = userdata.get('GEMINI') # Renamed to avoid confusion with search API key
if GOOGLE_API_KEY_GEMINI:
    genai.configure(api_key=GOOGLE_API_KEY_GEMINI)
    print("Google Generative AI configured successfully using Colab Secrets.")
else:
    print("WARNING: GOOGLE_API_KEY (for Gemini) not found in Colab Secrets. Please ensure 'GEMINI' secret is set.")
    print("API calls for Gemini will likely fail. Proceeding with unconfigured API.")

# --- IMPORTANT: Set GOOGLE_API_KEY for GoogleSearchAPIWrapper ---
# Use the same API key if it's a general Google Cloud API key, or a separate one if needed.
# For simplicity, we'll assume your 'GEMINI' key is also valid for Google Search API.
# If you have a separate key for search, retrieve it from userdata.get('YOUR_SEARCH_API_KEY')
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY_GEMINI

# You also need a Google Custom Search Engine ID (CSE ID)
# Get this from https://programmablesearchengine.google.com/
# Add this to your Colab Secrets as 'GOOGLE_CSE_ID'
# https://programmablesearchengine.google.com/controlpanel/overview?cx=514fb1ae50d034b58
GOOGLE_CSE_ID = userdata.get('GOOGLE_CSE_ID')
if GOOGLE_CSE_ID:
    os.environ["GOOGLE_CSE_ID"] = GOOGLE_CSE_ID
    print("Google CSE ID configured successfully from Colab Secrets.")
else:
    print("WARNING: GOOGLE_CSE_ID not found in Colab Secrets. Web search will fail.")
    print("Please ensure 'GOOGLE_CSE_ID' secret is set.")


# --- Agent Configuration ---
class AgentConfig:
    LLM_MODEL_NAME: str = "gemini-2.5-flash"

# Initialize the Gemini model for general responses and agentic decisions
try:
    AGENTIC_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    RESPONDER_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    print(f"Gemini model '{AgentConfig.LLM_MODEL_NAME}' initialized for agentic and response generation.")
except Exception as e:
    print(f"ERROR: Failed to initialize Gemini model. Please check your API key and model name. Error: {e}")
    AGENTIC_MODEL = None
    RESPONDER_MODEL = None

# Now, the rest of your code from the previous response can follow.
# The `web_search_tool = GoogleSearchAPIWrapper()` line should now work correctly.

Google Generative AI configured successfully using Colab Secrets.
Google CSE ID configured successfully from Colab Secrets.
Gemini model 'gemini-2.5-flash' initialized for agentic and response generation.


In [None]:
from google.colab import userdata
import google.generativeai as genai
import os
from typing import List, Dict, Any

# Required for AgentState class (even if not strictly used in this simplified demo's flow)
from pydantic import BaseModel

# Required for Embeddings and Vector Store
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

# Required for Web Search Tool
from langchain_community.utilities import GoogleSearchAPIWrapper

# Required for LLM Prompts
from langchain_core.prompts import ChatPromptTemplate

# --- API Key and Environment Setup ---

# 1. Retrieve Gemini API Key using Colab Secrets (standard secure method)
GOOGLE_API_KEY_GEMINI = userdata.get('GEMINI')
if GOOGLE_API_KEY_GEMINI:
    genai.configure(api_key=GOOGLE_API_KEY_GEMINI)
    # Set GOOGLE_API_KEY environment variable for GoogleSearchAPIWrapper
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY_GEMINI
    print("Google Generative AI configured successfully using Colab Secrets.")
else:
    print("CRITICAL ERROR: 'GEMINI' secret not found in Colab. API calls will fail.")
    # Exit or raise an error if the primary API key is missing
    raise ValueError("GEMINI API key is missing. Please set it in Colab Secrets.")

# 2. Set Google Custom Search Engine ID (CSE ID)
# WARNING: Hardcoding this ID directly is NOT secure for production.
# This is done here to bypass persistent userdata.get timeouts as per user request.
# For secure use, ensure 'GOOGLE_CSE_ID' is set reliably in Colab Secrets.
HARDCODED_CSE_ID = "514fb1ae50d034b58" # Directly from your screenshot
os.environ["GOOGLE_CSE_ID"] = HARDCODED_CSE_ID
print(f"Google CSE ID set (hardcoded for demo): {HARDCODED_CSE_ID}. Please remove hardcoding for secure deployment.")

# --- Agent LLM Configuration ---
class AgentConfig:
    LLM_MODEL_NAME: str = "gemini-2.5-flash"

try:
    AGENTIC_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    RESPONDER_MODEL = genai.GenerativeModel(AgentConfig.LLM_MODEL_NAME)
    print(f"Gemini model '{AgentConfig.LLM_MODEL_NAME}' initialized for agentic and response generation.")
except Exception as e:
    print(f"CRITICAL ERROR: Failed to initialize Gemini model. Check API key/model name. Error: {e}")
    raise

# --- Helper Functions and Tools ---

def get_llm_response(model, prompt_text: str) -> str:
    """Helper to get response from a GenerativeModel."""
    try:
        response = model.generate_content(prompt_text)
        return response.text.strip()
    except Exception as e:
        print(f"LLM API Call Error: {e}")
        return f"Error communicating with LLM: {e}"

# 1. Embedding Model
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

# 2. Vector Database for "Self Data" (pre-indexed knowledge)
VECTOR_DB_PATH = "./chroma_db_simple_rag" # Use a different path to avoid conflicts
dummy_documents = [
    Document(page_content="Retrieval Augmented Generation (RAG) combines large language models with external knowledge retrieval systems to generate more informed and accurate responses by fetching relevant documents or data before generating the answer."),
    Document(page_content="The primary goal of an AI agent for flight planning is to optimize routes for fuel efficiency and safety, considering factors like weather, airspace restrictions (NOTAMs), aircraft performance, and air traffic control requirements."),
    Document(page_content="Mount Everest is Earth's highest mountain above sea level, located in the Mahalangur Himal sub-range of the Himalayas."),
    Document(page_content="Paris is the capital and most populous city of France."),
    Document(page_content="A key challenge in flight planning is balancing minimum flight time with optimal fuel consumption, while adhering to all regulatory and safety standards."),
    Document(page_content="The current weather in Montreal, Quebec, Canada, typically experiences four distinct seasons. Summers are warm and humid, while winters are cold with significant snowfall. Spring and autumn are mild and pleasant."),
]
vectorstore = Chroma.from_documents(documents=dummy_documents, embedding=embeddings, persist_directory=VECTOR_DB_PATH)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) # Retrieve top 2 documents

# 3. Web Search Tool
web_search_tool = GoogleSearchAPIWrapper()


# --- Simple Agentic RAG Demo Function ---

def simple_agentic_rag_demo(query: str) -> str:
    print(f"\n--- Simple Agentic RAG Demo for Query: '{query}' ---")
    final_answer = "I'm sorry, I couldn't process your request." # Default fallback

    # Step 1: Agent decides the best approach (using AGENTIC_MODEL)
    decision_prompt = ChatPromptTemplate.from_template(
        """You are an intelligent AI router. Based on the user's query, decide the best way to get the information.

        Options:
        - 'self_data': If the query is about common knowledge, definitions, or topics likely present in an internal, pre-indexed knowledge base (e.g., "what is RAG?", "tell me about Mount Everest", "flight planning basics").
        - 'web_search': If the query requires current, real-time information, very specific facts not in a general knowledge base, or broad external details (e.g., "current weather in Montreal", "latest news on X", "who is the current PM of Canada").
        - 'direct_answer': If the query is a simple greeting or can be answered directly by the LLM's general knowledge without any retrieval (e.g., "hello", "what is 2+2").

        Provide only your chosen option as a single word: self_data, web_search, or direct_answer.

        User Query: {query}
        """
    )

    try:
        decision = get_llm_response(AGENTIC_MODEL, decision_prompt.format(query=query)).strip().lower()
        if decision not in ["self_data", "web_search", "direct_answer"]:
            print(f"Agent's decision was unclear ('{decision}'). Defaulting to web_search.")
            decision = "web_search" # Fallback if LLM doesn't follow instructions
    except Exception as e:
        print(f"Error getting agent decision from LLM: {e}. Defaulting to web_search.")
        decision = "web_search" # Robust fallback

    print(f"Agent Decision: {decision}")

    context = ""

    # Step 2: Execute based on decision and get context
    if decision == "self_data":
        print("Retrieving from internal knowledge base (Self-Data)...")
        try:
            docs = retriever.invoke(query)
            context = "\n\n".join([d.page_content for d in docs])
            if not context.strip(): # Check if context is effectively empty
                 context = "No relevant internal data found."
                 print("No relevant self-data found.")
            else:
                 print(f"Self-Data Context (partial): {context[:200]}...")
        except Exception as e:
            context = f"Error retrieving self-data: {e}"
            print(context)

        # Step 3 (for self_data): Generate answer with retrieved context
        generation_prompt = ChatPromptTemplate.from_messages([
            ("system", "You are a helpful AI assistant. Use the following context to answer the user's question. If the context is empty or irrelevant, state that you cannot answer comprehensively based on the provided information."),
            ("human", "Context:\n{context}\n\nQuestion: {query}")
        ])
        final_answer = get_llm_response(RESPONDER_MODEL, generation_prompt.format(context=context, query=query))

    elif decision == "web_search":
        print("Performing web search...")
        try:
            search_results = web_search_tool.run(query)
            context = search_results
            if not context.strip(): # Check if context is effectively empty
                context = "No relevant web search results found."
                print("No relevant web search results found.")
            else:
                print(f"Web Search Context (partial): {context[:200]}...")
        except Exception as e:
            # This is the error we've been seeing for the Web Search API
            context = f"Error during web search: {e}. (This often indicates Google Custom Search API is not enabled for your GCP project, or billing is not active)."
            print(context)

        # Step 3 (for web_search): Generate answer with search results
        generation_prompt = ChatPromptTemplate.from_messages([
            ("system", "You are a helpful AI assistant. Use the following web search results to answer the user's question. If results are empty or irrelevant, state that you cannot answer comprehensively based on the provided information."),
            ("human", "Web Search Results:\n{context}\n\nQuestion: {query}")
        ])
        final_answer = get_llm_response(RESPONDER_MODEL, generation_prompt.format(context=context, query=query))

    elif decision == "direct_answer":
        print("Answering directly from LLM's general knowledge...")
        final_answer = get_llm_response(RESPONDER_MODEL, query)

    print(f"\nFinal Answer: {final_answer}")
    print("-" * 50)
    return final_answer

# --- Demo Usage ---
if __name__ == "__main__":
    print(f"--- Simple Agentic RAG Demo with Gemini {AgentConfig.LLM_MODEL_NAME} ---")

    # Example 1: Should use self_data (defined concept)
    simple_agentic_rag_demo("Explain Retrieval Augmented Generation (RAG).")

    # Example 2: Should use self_data (factual knowledge in dummy data)
    simple_agentic_rag_demo("What is the highest mountain on Earth?")

    # Example 4: Should use direct_answer (simple query)
    simple_agentic_rag_demo("Hello, how are you today?")

    # Example 4: Flight planning query (should use self_data due to new dummy content)
    simple_agentic_rag_demo("What are the essential elements for effective flight planning?")


Google Generative AI configured successfully using Colab Secrets.
Google CSE ID set (hardcoded for demo): 514fb1ae50d034b58. Please remove hardcoding for secure deployment.
Gemini model 'gemini-2.5-flash' initialized for agentic and response generation.
--- Simple Agentic RAG Demo with Gemini gemini-2.5-flash ---

--- Simple Agentic RAG Demo for Query: 'Explain Retrieval Augmented Generation (RAG).' ---
Agent Decision: self_data
Retrieving from internal knowledge base (Self-Data)...
Self-Data Context (partial): Retrieval Augmented Generation (RAG) combines large language models with external knowledge retrieval systems to generate more informed and accurate responses by fetching relevant documents or data be...

Final Answer: Retrieval Augmented Generation (RAG) combines large language models with external knowledge retrieval systems. This allows it to generate more informed and accurate responses by fetching relevant documents or data before generating the answer.
--------------------