<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/RAG_GOOGLE_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# 1. IMPORTS
import os
import time
import requests
from google import genai
from google.genai import types

# --- Secure API Client Initialization using Userdata/Environment Variables ---
GEMINI_API_KEY = None
try:
    from google.colab import userdata
    GEMINI_API_KEY = userdata.get('GEMINI')
except (ImportError, KeyError):
    GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

REQUESTED_MODEL_ID = 'gemini-3-pro-preview'
client = None

if GEMINI_API_KEY:
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        # Check to ensure client initialized successfully before proceeding
        if client:
            print(f"✅ Gemini client configured for **{REQUESTED_MODEL_ID}**.")
    except Exception as e:
        print(f"❌ Client initialization failed: {e}")
        client = None
else:
    print("❌ API Key not found. Please ensure your key is set up.")

if not client:
    exit()


# --- 2. AGENT CONFIGURATIONS ---
def get_low_think_config():
    """Config for fast, low-reasoning tasks (low latency)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="low"
        )
    )

def get_high_think_config():
    """Config for complex, high-reasoning tasks (high quality, slower)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="high",
            include_thoughts=True
        )
    )

# --- 3. RAG DEMO CONFIGURATION ---
PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf"
FILE_PATH = "Attention_Is_All_You_Need.pdf"
STORE_DISPLAY_NAME = "Transformer_RAG_Store"
USER_QUERY = "Summarize the major components of the model as described in the abstract."
FILE_DISPLAY_NAME = "Attention Is All You Need Paper" # Defined for citation clarity

# --- 4. CORE FUNCTIONS ---

def download_pdf(url, filepath):
    """Downloads a PDF from a URL and saves it locally."""
    print(f"Attempting to download PDF from: {url}")
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(filepath, 'wb') as pdf_file:
            for chunk in response.iter_content(chunk_size=8192):
                pdf_file.write(chunk)
        print(f"Successfully downloaded and saved to: {filepath}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"[ERROR] Failed to download file: {e}")
        return False

def run_rag_demo():
    # Attempt to download the file first
    if not download_pdf(PDF_URL, FILE_PATH):
        print("Exiting RAG demo due to download failure.")
        return

    file_search_store = None
    uploaded_op = None

    try:
        # --- RAG STEP 1: Create a FileSearchStore ---
        print("\nCreating File Search Store...")
        file_search_store = client.file_search_stores.create(
            config={"display_name": STORE_DISPLAY_NAME}
        )
        print(f"Store created: {file_search_store.name}")

        # --- RAG STEP 2: Upload File and Import into Store ---
        print(f"Uploading and indexing file: {FILE_PATH}...")

        uploaded_op = client.file_search_stores.upload_to_file_search_store(
            file=FILE_PATH,
            file_search_store_name=file_search_store.name,
            config={"display_name": FILE_DISPLAY_NAME}
        )

        print("Waiting for indexing to complete (This may take a moment)...")
        while not uploaded_op.done:
            time.sleep(5)
            uploaded_op = client.operations.get(uploaded_op)
            print("Indexing in progress...")

        print("Indexing complete.")

        # --- RAG STEP 3: Query the Model using the FileSearchTool ---
        print("\nConfiguring model with RAG tool...")

        high_think_config = get_high_think_config()

        # Using the stable configuration for the tool
        high_think_config.tools = [
            types.Tool(
                file_search=types.FileSearch(
                    file_search_store_names=[file_search_store.name]
                )
            )
        ]

        print(f"\nUser Query (using high thinking level): {USER_QUERY}")

        response = client.models.generate_content(
            model=REQUESTED_MODEL_ID,
            contents=USER_QUERY,
            config=high_think_config
        )

        print("\n--- AI Response (Retrieval Augmented Generation) ---")
        print(response.text)

        # FIX 1: Safely check for thinking_output
        if hasattr(response, 'thinking_output') and response.thinking_output:
            print("\n--- Model's Internal Thoughts ---")
            print(str(response.thinking_output))
        else:
             print("\n--- Model Thoughts ---")
             print("Model thoughts were requested but not returned by the current API structure.")

        # FINAL ROBUST FIX: Citation Access
        grounding_metadata = response.candidates[0].grounding_metadata

        chunks = None
        if hasattr(grounding_metadata, 'grounding_chunks'):
            chunks = grounding_metadata.grounding_chunks
        elif hasattr(grounding_metadata, 'retrieval_chunks'):
             chunks = grounding_metadata.retrieval_chunks

        if chunks:
            print("\n--- RAG Citations (Source Document) ---")
            print("The concepts were grounded by retrieving the following chunks from the indexed PDF:")

            #

            for chunk in chunks:
                # The robust check for page number
                page = 'N/A'
                if hasattr(chunk, 'page_number') and chunk.page_number is not None:
                    page = chunk.page_number
                elif hasattr(chunk, 'rag_chunk') and chunk.rag_chunk and hasattr(chunk.rag_chunk, 'page_number') and chunk.rag_chunk.page_number is not None:
                    page = chunk.rag_chunk.page_number

                print(f"- Source: {FILE_DISPLAY_NAME} (Page: {page})")
        else:
             print("\n--- RAG Citations (Source Document) ---")
             print("Grounding metadata was found, but the structure of the citation chunks is not recognized by this SDK version.")


    except Exception as e:
        print(f"\n[ERROR] An API error occurred during the process: {e}")

    finally:
        # --- 5. Clean up ---
        print("\n--- Cleanup ---")

        if file_search_store:
            try:
                print(f"Forcing deletion of File Search Store: {file_search_store.name}")
                client.file_search_stores.delete(
                    name=file_search_store.name,
                    config={'force': True}
                )
            except Exception as e:
                print(f"Failed to delete store (Manual deletion may be required): {e}")

        if os.path.exists(FILE_PATH):
            print(f"Deleting local file: {FILE_PATH}")
            os.remove(FILE_PATH)

        print("Cleanup complete.")

if __name__ == "__main__":
    run_rag_demo()

✅ Gemini client configured for **gemini-3-pro-preview**.
Attempting to download PDF from: https://arxiv.org/pdf/1706.03762.pdf
Successfully downloaded and saved to: Attention_Is_All_You_Need.pdf

Creating File Search Store...
Store created: fileSearchStores/transformerragstore-wy6tksi3ftzd
Uploading and indexing file: Attention_Is_All_You_Need.pdf...
Waiting for indexing to complete (This may take a moment)...
Indexing in progress...
Indexing complete.

Configuring model with RAG tool...

User Query (using high thinking level): Summarize the major components of the model as described in the abstract.

--- AI Response (Retrieval Augmented Generation) ---
Based on the abstract of the paper "Attention Is All You Need," the major components and characteristics of the model are summarized as follows:

*   **Transformer Architecture:** The abstract introduces a new, simple network architecture called the **Transformer**.
*   **Sole Reliance on Attention Mechanisms:** Unlike dominant sequence

## Full Code with Complex Query

In [10]:
# 1. IMPORTS
import os
import time
import requests
from google import genai
from google.genai import types

# --- Secure API Client Initialization using Userdata/Environment Variables ---
GEMINI_API_KEY = None
try:
    from google.colab import userdata
    GEMINI_API_KEY = userdata.get('GEMINI')
except (ImportError, KeyError):
    GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

REQUESTED_MODEL_ID = 'gemini-3-pro-preview'
client = None

if GEMINI_API_KEY:
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        if client:
            print(f"✅ Gemini client configured for **{REQUESTED_MODEL_ID}**.")
    except Exception as e:
        print(f"❌ Client initialization failed: {e}")
        client = None
else:
    print("❌ API Key not found. Please ensure your key is set up.")

if not client:
    exit()


# --- 2. AGENT CONFIGURATIONS ---
def get_low_think_config():
    """Config for fast, low-reasoning tasks (low latency)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="low"
        )
    )

def get_high_think_config():
    """Config for complex, high-reasoning tasks (high quality, slower)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="high",
            include_thoughts=True
        )
    )

# --- 3. RAG DEMO CONFIGURATION ---
PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf"
FILE_PATH = "Attention_Is_All_You_Need.pdf"
STORE_DISPLAY_NAME = "Transformer_RAG_Store"
# --- UPDATED COMPLEX QUERY ---
USER_QUERY = "What is the complexity per layer for the self-attention mechanism, and how does this compare to recurrent layers, as described in Section 3.1 and 4?"
# -----------------------------
FILE_DISPLAY_NAME = "Attention Is All You Need Paper"

# --- 4. CORE FUNCTIONS ---

def download_pdf(url, filepath):
    """Downloads a PDF from a URL and saves it locally."""
    print(f"Attempting to download PDF from: {url}")
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(filepath, 'wb') as pdf_file:
            for chunk in response.iter_content(chunk_size=8192):
                pdf_file.write(chunk)
        print(f"Successfully downloaded and saved to: {filepath}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"[ERROR] Failed to download file: {e}")
        return False

def run_rag_demo():
    # Attempt to download the file first
    if not download_pdf(PDF_URL, FILE_PATH):
        print("Exiting RAG demo due to download failure.")
        return

    file_search_store = None
    uploaded_op = None

    try:
        # --- RAG STEP 1: Create a FileSearchStore ---
        print("\nCreating File Search Store...")
        file_search_store = client.file_search_stores.create(
            config={"display_name": STORE_DISPLAY_NAME}
        )
        print(f"Store created: {file_search_store.name}")

        # --- RAG STEP 2: Upload File and Import into Store ---
        print(f"Uploading and indexing file: {FILE_PATH}...")

        uploaded_op = client.file_search_stores.upload_to_file_search_store(
            file=FILE_PATH,
            file_search_store_name=file_search_store.name,
            config={"display_name": FILE_DISPLAY_NAME}
        )

        print("Waiting for indexing to complete (This may take a moment)...")
        while not uploaded_op.done:
            time.sleep(5)
            uploaded_op = client.operations.get(uploaded_op)
            print("Indexing in progress...")

        print("Indexing complete.")

        # --- RAG STEP 3: Query the Model using the FileSearchTool ---
        print("\nConfiguring model with RAG tool...")

        high_think_config = get_high_think_config()

        # Using the stable configuration for the tool
        high_think_config.tools = [
            types.Tool(
                file_search=types.FileSearch(
                    file_search_store_names=[file_search_store.name]
                )
            )
        ]

        print(f"\nUser Query (using high thinking level): {USER_QUERY}")

        response = client.models.generate_content(
            model=REQUESTED_MODEL_ID,
            contents=USER_QUERY,
            config=high_think_config
        )

        print("\n--- AI Response (Retrieval Augmented Generation) ---")
        print(response.text)

        # FIX 1: Safely check for thinking_output
        if hasattr(response, 'thinking_output') and response.thinking_output:
            print("\n--- Model's Internal Thoughts ---")
            print(str(response.thinking_output))
        else:
             print("\n--- Model Thoughts ---")
             print("Model thoughts were requested but not returned by the current API structure.")

        # FINAL ROBUST FIX: Citation Access
        grounding_metadata = response.candidates[0].grounding_metadata

        chunks = None
        if hasattr(grounding_metadata, 'grounding_chunks'):
            chunks = grounding_metadata.grounding_chunks
        elif hasattr(grounding_metadata, 'retrieval_chunks'):
             chunks = grounding_metadata.retrieval_chunks

        if chunks:
            print("\n--- RAG Citations (Source Document) ---")
            print("The concepts were grounded by retrieving the following chunks from the indexed PDF:")

            #

            for chunk in chunks:
                # The robust check for page number
                page = 'N/A'
                if hasattr(chunk, 'page_number') and chunk.page_number is not None:
                    page = chunk.page_number

                # Check on the internal rag_chunk (if it exists)
                elif hasattr(chunk, 'rag_chunk') and chunk.rag_chunk and hasattr(chunk.rag_chunk, 'page_number') and chunk.rag_chunk.page_number is not None:
                    page = chunk.rag_chunk.page_number

                print(f"- Source: {FILE_DISPLAY_NAME} (Page: {page})")
        else:
             print("\n--- RAG Citations (Source Document) ---")
             print("Grounding metadata was found, but the structure of the citation chunks is not recognized by this SDK version.")


    except Exception as e:
        print(f"\n[ERROR] An API error occurred during the process: {e}")

    finally:
        # --- 5. Clean up ---
        print("\n--- Cleanup ---")

        if file_search_store:
            try:
                print(f"Forcing deletion of File Search Store: {file_search_store.name}")
                client.file_search_stores.delete(
                    name=file_search_store.name,
                    config={'force': True}
                )
            except Exception as e:
                print(f"Failed to delete store (Manual deletion may be required): {e}")

        if os.path.exists(FILE_PATH):
            print(f"Deleting local file: {FILE_PATH}")
            os.remove(FILE_PATH)

        print("Cleanup complete.")

if __name__ == "__main__":
    run_rag_demo()

✅ Gemini client configured for **gemini-3-pro-preview**.
Attempting to download PDF from: https://arxiv.org/pdf/1706.03762.pdf
Successfully downloaded and saved to: Attention_Is_All_You_Need.pdf

Creating File Search Store...
Store created: fileSearchStores/transformerragstore-c6v2y1jzwm1p
Uploading and indexing file: Attention_Is_All_You_Need.pdf...
Waiting for indexing to complete (This may take a moment)...
Indexing in progress...
Indexing complete.

Configuring model with RAG tool...

User Query (using high thinking level): What is the complexity per layer for the self-attention mechanism, and how does this compare to recurrent layers, as described in Section 3.1 and 4?

--- AI Response (Retrieval Augmented Generation) ---
Based on **Section 4** (specifically Table 1 and the accompanying text) of the "Attention Is All You Need" paper, here is the complexity per layer for the self-attention mechanism and its comparison to recurrent layers:

### Complexity per Layer
*   **Self-Attent

## Full Code with Multi-Head Attention Query

In [11]:
# 1. IMPORTS
import os
import time
import requests
from google import genai
from google.genai import types

# --- Secure API Client Initialization using Userdata/Environment Variables ---
GEMINI_API_KEY = None
try:
    from google.colab import userdata
    GEMINI_API_KEY = userdata.get('GEMINI')
except (ImportError, KeyError):
    GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

REQUESTED_MODEL_ID = 'gemini-3-pro-preview'
client = None

if GEMINI_API_KEY:
    try:
        client = genai.Client(api_key=GEMINI_API_KEY)
        if client:
            print(f"✅ Gemini client configured for **{REQUESTED_MODEL_ID}**.")
    except Exception as e:
        print(f"❌ Client initialization failed: {e}")
        client = None
else:
    print("❌ API Key not found. Please ensure your key is set up.")

if not client:
    exit()


# --- 2. AGENT CONFIGURATIONS ---
def get_low_think_config():
    """Config for fast, low-reasoning tasks (low latency)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="low"
        )
    )

def get_high_think_config():
    """Config for complex, high-reasoning tasks (high quality, slower)."""
    return types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_level="high",
            include_thoughts=True
        )
    )

# --- 3. RAG DEMO CONFIGURATION ---
PDF_URL = "https://arxiv.org/pdf/1706.03762.pdf"
FILE_PATH = "Attention_Is_All_You_Need.pdf"
STORE_DISPLAY_NAME = "Transformer_RAG_Store"
# --- UPDATED COMPLEX QUERY ---
USER_QUERY = "What is the motivation behind using Multi-Head Attention instead of single-head attention, and what are the specific benefits of dividing the attention space? (Reference sections 3.2.2 and 3.2.3)"
# -----------------------------
FILE_DISPLAY_NAME = "Attention Is All You Need Paper"

# --- 4. CORE FUNCTIONS ---

def download_pdf(url, filepath):
    """Downloads a PDF from a URL and saves it locally."""
    print(f"Attempting to download PDF from: {url}")
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(filepath, 'wb') as pdf_file:
            for chunk in response.iter_content(chunk_size=8192):
                pdf_file.write(chunk)
        print(f"Successfully downloaded and saved to: {filepath}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"[ERROR] Failed to download file: {e}")
        return False

def run_rag_demo():
    # Attempt to download the file first
    if not download_pdf(PDF_URL, FILE_PATH):
        print("Exiting RAG demo due to download failure.")
        return

    file_search_store = None
    uploaded_op = None

    try:
        # --- RAG STEP 1: Create a FileSearchStore ---
        print("\nCreating File Search Store...")
        file_search_store = client.file_search_stores.create(
            config={"display_name": STORE_DISPLAY_NAME}
        )
        print(f"Store created: {file_search_store.name}")

        # --- RAG STEP 2: Upload File and Import into Store ---
        print(f"Uploading and indexing file: {FILE_PATH}...")

        uploaded_op = client.file_search_stores.upload_to_file_search_store(
            file=FILE_PATH,
            file_search_store_name=file_search_store.name,
            config={"display_name": FILE_DISPLAY_NAME}
        )

        print("Waiting for indexing to complete (This may take a moment)...")
        while not uploaded_op.done:
            time.sleep(5)
            uploaded_op = client.operations.get(uploaded_op)
            print("Indexing in progress...")

        print("Indexing complete.")

        # --- RAG STEP 3: Query the Model using the FileSearchTool ---
        print("\nConfiguring model with RAG tool...")

        high_think_config = get_high_think_config()

        # Using the stable configuration for the tool
        high_think_config.tools = [
            types.Tool(
                file_search=types.FileSearch(
                    file_search_store_names=[file_search_store.name]
                )
            )
        ]

        print(f"\nUser Query (using high thinking level): {USER_QUERY}")

        response = client.models.generate_content(
            model=REQUESTED_MODEL_ID,
            contents=USER_QUERY,
            config=high_think_config
        )

        print("\n--- AI Response (Retrieval Augmented Generation) ---")
        print(response.text)

        # FIX 1: Safely check for thinking_output
        if hasattr(response, 'thinking_output') and response.thinking_output:
            print("\n--- Model's Internal Thoughts ---")
            print(str(response.thinking_output))
        else:
             print("\n--- Model Thoughts ---")
             print("Model thoughts were requested but not returned by the current API structure.")

        # FINAL ROBUST FIX: Citation Access
        grounding_metadata = response.candidates[0].grounding_metadata

        chunks = None
        if hasattr(grounding_metadata, 'grounding_chunks'):
            chunks = grounding_metadata.grounding_chunks
        elif hasattr(grounding_metadata, 'retrieval_chunks'):
             chunks = grounding_metadata.retrieval_chunks

        if chunks:
            print("\n--- RAG Citations (Source Document) ---")
            print("The concepts were grounded by retrieving the following chunks from the indexed PDF:")

            for chunk in chunks:
                # The robust check for page number
                page = 'N/A'
                if hasattr(chunk, 'page_number') and chunk.page_number is not None:
                    page = chunk.page_number

                # Check on the internal rag_chunk (if it exists)
                elif hasattr(chunk, 'rag_chunk') and chunk.rag_chunk and hasattr(chunk.rag_chunk, 'page_number') and chunk.rag_chunk.page_number is not None:
                    page = chunk.rag_chunk.page_number

                print(f"- Source: {FILE_DISPLAY_NAME} (Page: {page})")
        else:
             print("\n--- RAG Citations (Source Document) ---")
             print("Grounding metadata was found, but the structure of the citation chunks is not recognized by this SDK version.")


    except Exception as e:
        print(f"\n[ERROR] An API error occurred during the process: {e}")

    finally:
        # --- 5. Clean up ---
        print("\n--- Cleanup ---")

        if file_search_store:
            try:
                print(f"Forcing deletion of File Search Store: {file_search_store.name}")
                client.file_search_stores.delete(
                    name=file_search_store.name,
                    config={'force': True}
                )
            except Exception as e:
                print(f"Failed to delete store (Manual deletion may be required): {e}")

        if os.path.exists(FILE_PATH):
            print(f"Deleting local file: {FILE_PATH}")
            os.remove(FILE_PATH)

        print("Cleanup complete.")

if __name__ == "__main__":
    run_rag_demo()

✅ Gemini client configured for **gemini-3-pro-preview**.
Attempting to download PDF from: https://arxiv.org/pdf/1706.03762.pdf
Successfully downloaded and saved to: Attention_Is_All_You_Need.pdf

Creating File Search Store...
Store created: fileSearchStores/transformerragstore-0hrq1vopweyt
Uploading and indexing file: Attention_Is_All_You_Need.pdf...
Waiting for indexing to complete (This may take a moment)...
Indexing in progress...
Indexing complete.

Configuring model with RAG tool...

User Query (using high thinking level): What is the motivation behind using Multi-Head Attention instead of single-head attention, and what are the specific benefits of dividing the attention space? (Reference sections 3.2.2 and 3.2.3)

--- AI Response (Retrieval Augmented Generation) ---
Based on **Section 3.2.2 (Multi-Head Attention)** of the "Attention Is All You Need" paper, the motivation and benefits are as follows:

### Motivation
The primary motivation for using Multi-Head Attention instead of