Combination of python and bash files.

Program vector search using Bigquery & RAG

Create faq embedding model

In [87]:
!gcloud auth application-default login


You are running on a Google Compute Engine virtual machine.
The service credentials associated with this virtual machine
will automatically be used by Application Default
Credentials, so it is not necessary to use this command.

If you decide to proceed anyway, your user credentials may be visible
to others with access to this virtual machine. Are you sure you want
to authenticate with your personal account?

Do you want to continue (Y/n)?  Y

Go to the following link in your browser, and complete the sign-in prompts:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fsdk.cloud.google.com%2Fapplicationdefaultauthcode.html&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=9BHAu8SiriNcrdNCiTLtc9iRSp4u5h&prompt=consent&token_

In [85]:
# You might already have google-cloud-bigquery, but you need this for the LLM
!pip install google-cloud-aiplatform
!pip install google-auth



In [None]:
%%bigquery
-- 1. Create the remote model
CREATE OR REPLACE MODEL
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.faq_embedding_model`
REMOTE WITH CONNECTION
  `US.vertex-rag-connector` -- üö® Try using the multi-region prefix "US"
OPTIONS (
  ENDPOINT = 'text-embedding-005'
);



Query is running:   0%|          |

Create table with embeddings

In [None]:
%%bigquery
CREATE OR REPLACE TABLE
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.embedded_aurora_faqs` AS
SELECT
  t.question,
  t.answer,
  -- Combine question and answer for a richer chunk to embed, and alias it as 'content'
  CONCAT('Question: ', t.question, '. Answer: ', t.answer) AS content,
  -- This column contains the generated vector (ARRAY<FLOAT64>)
  ml_generate_embedding_result AS embedding
FROM
  ML.GENERATE_EMBEDDING(
    MODEL `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.faq_embedding_model`,
    ( -- Inner query must select the text column to embed and alias it as 'content'
      SELECT
        CONCAT('Question: ', question, '. Answer: ', answer) AS content
      FROM
        `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.aurora-bay-faqs`
    )
  ) AS e
-- Join back to the original table to keep the original question/answer text
JOIN
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.aurora-bay-faqs` AS t
ON
  e.content = CONCAT('Question: ', t.question, '. Answer: ', t.answer);

Query is running:   0%|          |

Create the gemini model

In [14]:
%%bigquery
CREATE OR REPLACE MODEL
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.gemini_model`
REMOTE WITH CONNECTION
  `US.vertex-rag-connector` -- Use the region prefix that worked (US)
OPTIONS (
  ENDPOINT = 'gemini-2.5-flash' -- The LLM used for generating the final answer
);

Query is running:   0%|          |

Try it out to make sure it works

In [44]:
%%bigquery
DECLARE user_question STRING DEFAULT 'What is the population of Aurora Bay?';

-- Step 1: Get the embedding for the user's question
CREATE TEMP TABLE query_emb AS
SELECT ml_generate_embedding_result AS embedding
FROM ML.GENERATE_EMBEDDING(
  MODEL `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.faq_embedding_model`,
  (SELECT user_question AS content)
);

-- Step 2: Find similar documents
CREATE TEMP TABLE similar_docs AS
SELECT
  t.content,
  ML.DISTANCE(t.embedding, q.embedding, 'COSINE') AS distance
FROM
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.embedded_aurora_faqs` t,
  query_emb q
ORDER BY distance ASC
LIMIT 5;

-- Step 3: Generate the answer
SELECT
  ml_generate_text_result
FROM ML.GENERATE_TEXT(
  MODEL `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.gemini_model`,
  (SELECT
    CONCAT(
      'Answer the question based *only* on the following context. If the context does not contain the answer, state that you cannot answer.\n\n',
      'Question: ', user_question, '\n\n',
      'Context:\n',
      STRING_AGG(content, '\n')
    ) AS prompt
  FROM similar_docs),
  STRUCT(
    512 AS max_output_tokens
  )
);

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,ml_generate_text_result
0,"{""candidates"":[{""avg_logprobs"":-0.370920838980..."


In [48]:
%%bigquery
DECLARE user_question STRING DEFAULT 'What is the population of Aurora Bay?';

-- Step 1: Get the embedding for the user's question (No Change)
CREATE TEMP TABLE query_emb AS
SELECT ml_generate_embedding_result AS embedding
FROM ML.GENERATE_EMBEDDING(
  MODEL `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.faq_embedding_model`,
  (SELECT user_question AS content)
);

-- Step 2: Find similar documents (No Change)
CREATE TEMP TABLE similar_docs AS
SELECT
  t.content,
  ML.DISTANCE(t.embedding, q.embedding, 'COSINE') AS distance
FROM
  `qwiklabs-gcp-03-b295c10c44aa.rag_dataset.embedded_aurora_faqs` t,
  query_emb q
ORDER BY distance ASC
LIMIT 5;

-- Step 3: Use REGEXP_EXTRACT to output ONLY the text following "Answer: "
SELECT
  REGEXP_EXTRACT(content, r'Answer: (.*)') AS extracted_answer
FROM similar_docs
ORDER BY distance ASC
LIMIT 1;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,extracted_answer
0,Aurora Bay has a population of approximately 3...


In [49]:
from google.cloud import bigquery
import json

def get_rag_answer(user_question: str) -> str:
    # Initialize the BigQuery Client
    client = bigquery.Client()

    # ‚ö†Ô∏è Replace these with your actual project and model IDs
    PROJECT_ID = "qwiklabs-gcp-03-b295c10c44aa"
    DATASET_ID = "rag_dataset"
    FAQ_TABLE = f"`{PROJECT_ID}.{DATASET_ID}.embedded_aurora_faqs`"
    EMBEDDING_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.faq_embedding_model`"
    GEMINI_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.gemini_model`"

    # The full RAG SQL query
    rag_query = f"""
    DECLARE user_question STRING DEFAULT '{user_question}';

    -- Step 1: Get the embedding for the user's question
    CREATE TEMP TABLE query_emb AS
    SELECT ml_generate_embedding_result AS embedding
    FROM ML.GENERATE_EMBEDDING(
        MODEL {EMBEDDING_MODEL},
        (SELECT user_question AS content)
    );

    -- Step 2: Find similar documents
    CREATE TEMP TABLE similar_docs AS
    SELECT
        t.content,
        ML.DISTANCE(t.embedding, q.embedding, 'COSINE') AS distance
    FROM
        {FAQ_TABLE} t,
        query_emb q
    ORDER BY distance ASC
    LIMIT 5;

    -- Step 3: Generate the answer using the retrieved context
    SELECT
        ml_generate_text_result
    FROM ML.GENERATE_TEXT(
        MODEL {GEMINI_MODEL},
        (SELECT
            CONCAT(
                'Answer the question based *only* on the following context. If the context does not contain the answer, state that you cannot answer.\n\n',
                'Question: ', user_question, '\n\n',
                'Context:\n',
                STRING_AGG(content, '\n')
            ) AS prompt
        FROM similar_docs),
        STRUCT(
            512 AS max_output_tokens
        )
    );
    """

    # Execute the query
    query_job = client.query(rag_query)

    # Get the final result
    results = query_job.result()

    # The result is a JSON string containing the model's output
    for row in results:
        # Load the JSON string from the result
        response_json = json.loads(row[0])
        # Extract the text part from the Gemini response structure
        return response_json['candidates'][0]['content']['parts'][0]['text']

    return "Sorry, I couldn't find an answer."

In [88]:
# Install the necessary library (if not already installed)
!pip install google-cloud-bigquery

# --- Authenticate Colab to use your GCP credentials ---
from google.colab import auth
try:
    auth.authenticate_user()
    print("Authentication successful.")
except Exception as e:
    print(f"Authentication failed: {e}. Please ensure you are logged into GCP.")

Authentication successful.


In [76]:
from google.cloud import bigquery
import json
# Assuming necessary imports for an external Gemini LLM call (e.g., from google import genai)
# and configuration are set up outside this snippet.

# Define your project and model IDs here (keep this outside the function)
PROJECT_ID = "qwiklabs-gcp-03-b295c10c44aa"
DATASET_ID = "rag_dataset"
FAQ_TABLE = f"`{PROJECT_ID}.{DATASET_ID}.embedded_aurora_faqs`"
EMBEDDING_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.faq_embedding_model`"
# We will no longer use GEMINI_MODEL for the final generation in BQ

client = bigquery.Client(project=PROJECT_ID)

def get_context_from_bq(user_question: str) -> list[str]:
    """
    Executes the Retrieval (RAG) step in BigQuery to get the relevant context documents.
    Returns a list of content strings.
    """
    # üí• FIX: First, strip any leading/trailing quotes from the user input.
    cleaned_question = user_question.strip("'").strip('"')

    # Second, escape any remaining single quotes within the string for safe SQL insertion
    sql_question = cleaned_question.replace("'", "\\'")

    # --- SQL Query to get ONLY the top 5 context documents ---
    retrieval_query = f"""
    SELECT
      t.content
    FROM
      {FAQ_TABLE} t,
      ( -- Step 1: Get the embedding for the user's question
        SELECT ml_generate_embedding_result AS embedding
        FROM ML.GENERATE_EMBEDDING(
          MODEL {EMBEDDING_MODEL},
          (SELECT '{sql_question}' AS content)
        )
      ) q
    ORDER BY ML.DISTANCE(t.embedding, q.embedding, 'COSINE') ASC
    LIMIT 5;
    """

    context_list = []
    try:
        query_job = client.query(retrieval_query)

        # Get the context results
        for row in query_job.result():
            context_list.append(row[0])

        return context_list

    except Exception as e:
        print(f"[ERROR] BigQuery Retrieval Query Failed: {e}")
        return []

In [110]:
from google.cloud import bigquery

import json



# Define your project and model IDs here (keep this outside the function)

PROJECT_ID = "qwiklabs-gcp-03-b295c10c44aa"

DATASET_ID = "rag_dataset"

FAQ_TABLE = f"`{PROJECT_ID}.{DATASET_ID}.embedded_aurora_faqs`"

EMBEDDING_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.faq_embedding_model`"

GEMINI_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.gemini_model`"



client = bigquery.Client(project=PROJECT_ID)



def get_rag_answer_from_bq(user_question: str) -> str:

    """

    Executes the full RAG workflow in BigQuery.

    """



    # üí• FIX: First, strip any leading/trailing quotes from the user input.

    cleaned_question = user_question.strip("'").strip('"')



    # Second, escape any remaining single quotes within the string for safe SQL insertion

    sql_question = cleaned_question.replace("'", "\\'")



    # --- The Full RAG SQL Query ---

    rag_query = f"""

    SELECT

      ml_generate_text_result

    FROM ML.GENERATE_TEXT(

      MODEL {GEMINI_MODEL},

      (SELECT

        CONCAT(

          'Answer the question based *only* on the following context. If the context does not contain the answer, state that you cannot answer.\\n\\n',

          'Question: ', '{sql_question}', '\\n\\n',

          'Context:\\n',

          STRING_AGG(content, '\\n')

        ) AS prompt

      FROM (

        SELECT

          t.content

        FROM

          {FAQ_TABLE} t,

          ( -- Step 1: Get the embedding for the user's question

            SELECT ml_generate_embedding_result AS embedding

            FROM ML.GENERATE_EMBEDDING(

              MODEL {EMBEDDING_MODEL},

              (SELECT '{sql_question}' AS content)

            )

          ) q

        ORDER BY ML.DISTANCE(t.embedding, q.embedding, 'COSINE') ASC

        LIMIT 5

      )),

      STRUCT(

        512 AS max_output_tokens

      )

    );

    """



    # ... (rest of the try/except block to execute the query and parse results) ...

    # Execute the query

    try:

        query_job = client.query(rag_query)



        # Get the final result from the job

        for row in query_job.result():

            raw_result = row[0]



            # üí• FIX: Safely check the type of the result before loading JSON

            if isinstance(raw_result, str):

                # If it's a string, load the JSON structure

                response_json = json.loads(raw_result)

            elif isinstance(raw_result, dict):

                # If it's already a dict (pre-parsed), use it directly

                response_json = raw_result

            else:

                return f"[ERROR] Unexpected BigQuery result type: {type(raw_result)}"



            # Extract the final answer text

            return response_json['candidates'][0]['content']['parts'][0]['text']



    except Exception as e:

        return f"[ERROR] BigQuery Query Failed: {e}"



    return "Sorry, I couldn't find an answer using the RAG model."

This is just the RAG component. The RAG + LLM combination is next.

Ask the chatbot a question about Aurora Bay: What is the population of Aurora Bay?

In [111]:
def run_chatbot():

    print("ü§ñ Chatbot Initialized. Ask about Aurora Bay. Type 'quit' or 'exit' to end.")

    while True:

        user_input = input("üë§ You: ")



        if user_input.lower() in ('quit', 'exit'):

            print("ü§ñ Goodbye!")

            break



        if not user_input.strip():

            continue



        print("ü§ñ Thinking... (Running RAG in BigQuery)")



        # Call the BigQuery RAG function

        answer = get_rag_answer_from_bq(user_input)



        print(f"ü§ñ Chatbot: {answer}\n")



# Start the chatbot!

run_chatbot()

ü§ñ Chatbot Initialized. Ask about Aurora Bay. Type 'quit' or 'exit' to end.
üë§ You: What is the population of Aurora Bay?
ü§ñ Thinking... (Running RAG in BigQuery)
ü§ñ Chatbot: Aurora Bay has a population of approximately 3,200 residents, although it can fluctuate seasonally due to temporary fishing and tourism workforces.



KeyboardInterrupt: Interrupted by user

Now the goal is to combine the output from the RAG to the user's question and feed that into the LLM and return the output of the LLM to the user.

In [108]:
# --- BigQuery RAG Setup ---
from google.cloud import bigquery
import json
# üéØ CHANGE 1: Import the correct SDK for Vertex AI
from google.cloud import aiplatform
import requests
from google.auth.transport.requests import AuthorizedSession
from google.auth import default

# Define your project and model IDs/locations (MANDATORY for Vertex AI)
PROJECT_ID = "qwiklabs-gcp-03-b295c10c44aa"
# üéØ IMPORTANT: Set the region where your Vertex AI project/model exists
REGION = "us-central1"
DATASET_ID = "rag_dataset"
FAQ_TABLE = f"`{PROJECT_ID}.{DATASET_ID}.embedded_aurora_faqs`"
EMBEDDING_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.faq_embedding_model`"
# The model name for Vertex AI is the public name
VERTEX_MODEL_NAME = "gemini-2.5-flash"

client = bigquery.Client(project=PROJECT_ID)

# üéØ CHANGE 2: Initialize the Vertex AI client using PROJECT_ID and REGION
try:
    aiplatform.init(project=PROJECT_ID, location=REGION)
    print(f"‚úÖ Vertex AI Client initialized for project {PROJECT_ID} in {REGION}")
except Exception as e:
    print(f"[ERROR] Could not initialize Vertex AI client: {e}")
    # Set to None if initialization fails
    aiplatform = None


# --- LLM Call Function (Using Vertex AI SDK) ---
# --- Replacement for call_external_gemini_llm (Using REST API) ---
def call_external_gemini_llm(prompt: str) -> str:
    """
    Calls the Gemini LLM via the Vertex AI REST API using ADC for authentication.
    Bypasses problematic Protobuf serialization in the SDK.
    """
    # 1. Get authenticated credentials
    credentials, project = default()
    authed_session = AuthorizedSession(credentials)

    # 2. Define the REST API endpoint
    # Note: The API path uses the publisher model name
    url = (
        f"https://{REGION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION}/"
        f"publishers/google/models/{VERTEX_MODEL_NAME}:generateContent"
    )

    # 3. Define the request body (JSON payload)
    request_body = {
        "contents": [{"role": "user", "parts": [{"text": prompt}]}],
        # üéØ FIX: Changed 'parameters' to the standard 'generationConfig' for REST API
        "generationConfig": {
            "maxOutputTokens": 512,
            "temperature": 0.2
        }
    }

    try:
        # 4. Make the POST request
        response = authed_session.post(url, json=request_body)
        response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)

        # 5. Parse the JSON response
        response_data = response.json()

        # Extract the text from the standard Gemini JSON structure
        return response_data['candidates'][0]['content']['parts'][0]['text']

    except requests.exceptions.HTTPError as http_err:
        return f"[ERROR] Vertex AI LLM Call Failed (HTTP): {http_err} - {response.text}"
    except Exception as e:
        return f"[ERROR] Vertex AI LLM Call Failed (General): {e}"


# --- RAG Retrieval Function (Placeholder for context) ---
# NOTE: You MUST have your get_context_from_bq function defined above this point.
# It should look similar to the one provided in the previous turn.

# Example placeholder for demonstration purposes
def get_context_from_bq(user_question: str) -> list[str]:
    # Placeholder to simulate a successful BigQuery retrieval
    print(f"    (BigQuery: Simulating retrieval for '{user_question}')")
    if "population" in user_question.lower():
        return [
            "The population of Aurora Bay is 25,000 residents as of the latest census.",
            "Aurora Bay is a town in the Northern region and is known for its size."
        ]
    return ["The context is about a large ship.", "The ship is 500 meters long."]

# --- Main RAG Function (Uses Retrieval and LLM Call) ---
def get_rag_answer_external_llm(user_question: str) -> str:
    # ... (This function remains unchanged from your original block) ...

    # 1. RETRIEVAL: Get context documents from BigQuery
    context_docs = get_context_from_bq(user_question)

    if not context_docs:
        return "Sorry, I couldn't retrieve any relevant context documents."

    # 2. PROMPT CONSTRUCTION: Combine question and context
    context_text = "\n".join(context_docs)

    final_prompt = f"""
    Answer the question based **only** on the following context. If the context does not contain the answer, state that you cannot answer.

    Question: {user_question}

    Context:
    {context_text}
    """

    # 3. GENERATION: Call the external Gemini LLM
    try:
        llm_answer = call_external_gemini_llm(final_prompt)
        return llm_answer
    except Exception as e:
        return f"[ERROR] External LLM Call Failed: {e}"

# --- The Chatbot Loop ---
def run_chatbot():
    print("ü§ñ Chatbot Initialized. Ask about Aurora Bay. Type 'quit' or 'exit' to end.")
    while True:
        user_input = input("üë§ You: ")

        if user_input.lower() in ('quit', 'exit'):
            print("ü§ñ Goodbye!")
            break

        if not user_input.strip():
            continue

        print("ü§ñ Thinking... (Retrieving context from BigQuery and calling external LLM)")

        # Call the new RAG function that uses the external LLM
        answer = get_rag_answer_external_llm(user_input)

        print(f"ü§ñ Chatbot: {answer}\n")

‚úÖ Vertex AI Client initialized for project qwiklabs-gcp-03-b295c10c44aa in us-central1


Ask the chatbot a question about Aurora Bay: What is the population of Aurora Bay?

In [94]:
import vertexai
import google.cloud.aiplatform as aiplatform

print("vertexai:", vertexai.__version__)
print("aiplatform:", aiplatform.__version__)

vertexai: 1.120.0
aiplatform: 1.120.0


In [115]:
from google.cloud import bigquery
import json

# Define your project and model IDs here
PROJECT_ID = "qwiklabs-gcp-03-b295c10c44aa"
DATASET_ID = "rag_dataset"
FAQ_TABLE = f"`{PROJECT_ID}.{DATASET_ID}.embedded_aurora_faqs`"
EMBEDDING_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.faq_embedding_model`"
GEMINI_MODEL = f"`{PROJECT_ID}.{DATASET_ID}.gemini_model`"

client = bigquery.Client(project=PROJECT_ID)


def retrieve_relevant_context(user_question: str, top_k: int = 5) -> str:
    """
    Step 1: RAG Retrieval - Get relevant context from BigQuery vector search.
    """
    cleaned_question = user_question.strip("'").strip('"')
    sql_question = cleaned_question.replace("'", "\\'")

    retrieval_query = f"""
    SELECT
      STRING_AGG(content, '\\n\\n') AS context
    FROM (
      SELECT
        t.content
      FROM
        {FAQ_TABLE} t,
        (
          SELECT ml_generate_embedding_result AS embedding
          FROM ML.GENERATE_EMBEDDING(
            MODEL {EMBEDDING_MODEL},
            (SELECT '{sql_question}' AS content)
          )
        ) q
      ORDER BY ML.DISTANCE(t.embedding, q.embedding, 'COSINE') ASC
      LIMIT {top_k}
    )
    """

    try:
        query_job = client.query(retrieval_query)
        for row in query_job.result():
            return row[0] if row[0] else ""
    except Exception as e:
        raise Exception(f"RAG retrieval failed: {e}")

    return ""


def generate_llm_response(user_question: str, context: str) -> str:
    """
    Step 2: LLM Generation - Feed the context and question to Gemini.
    Uses parameterized query to avoid SQL injection and escaping issues.
    """
    # Build the prompt combining user question + RAG context
    prompt = f"""Answer the question based *only* on the following context. If the context does not contain the answer, state that you cannot answer.

Question: {user_question}

Context:
{context}"""

    # Use parameterized query to avoid SQL escaping issues
    generation_query = f"""
    SELECT
      ml_generate_text_result
    FROM ML.GENERATE_TEXT(
      MODEL {GEMINI_MODEL},
      (SELECT @prompt AS prompt),
      STRUCT(
        2048 AS max_output_tokens,
        0.2 AS temperature
      )
    )
    """

    job_config = bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("prompt", "STRING", prompt)
        ]
    )

    try:
        query_job = client.query(generation_query, job_config=job_config)

        for row in query_job.result():
            raw_result = row[0]

            if isinstance(raw_result, str):
                response_json = json.loads(raw_result)
            elif isinstance(raw_result, dict):
                response_json = raw_result
            else:
                raise Exception(f"Unexpected result type: {type(raw_result)}")

            return response_json['candidates'][0]['content']['parts'][0]['text']

    except Exception as e:
        raise Exception(f"LLM generation failed: {e}")

    return "Sorry, I couldn't generate a response."


def get_rag_answer_from_bq(user_question: str) -> str:
    """
    Combined RAG workflow: Retrieve context, then generate answer with LLM.

    Flow:
    1. User Question ‚Üí RAG Retrieval (find relevant FAQs)
    2. User Question + RAG Context ‚Üí Gemini LLM
    3. LLM Response ‚Üí User
    """
    try:
        # Step 1: Retrieve relevant context using RAG
        print("  ‚Üí Retrieving relevant context...")
        context = retrieve_relevant_context(user_question, top_k=5)

        if not context:
            return "No relevant context found in the knowledge base."

        print(f"  ‚Üí Found context (preview): {context[:150]}...")

        # Step 2: Generate answer using LLM with the retrieved context
        print("  ‚Üí Generating response with LLM...")
        answer = generate_llm_response(user_question, context)

        return answer

    except Exception as e:
        return f"[ERROR] {e}"


def run_chatbot():
    print("ü§ñ Chatbot Initialized. Ask about Aurora Bay. Type 'quit' or 'exit' to end.")
    while True:
        user_input = input("üë§ You: ")

        if user_input.lower() in ('quit', 'exit'):
            print("ü§ñ Goodbye!")
            break

        if not user_input.strip():
            continue

        print("ü§ñ Thinking... (Running RAG in BigQuery)")

        # Call the BigQuery RAG function
        answer = get_rag_answer_from_bq(user_input)

        print(f"ü§ñ Chatbot: {answer}\n")


# Start the chatbot!
run_chatbot()

ü§ñ Chatbot Initialized. Ask about Aurora Bay. Type 'quit' or 'exit' to end.
üë§ You: What is the best way to travel to Aurora Bay?
ü§ñ Thinking... (Running RAG in BigQuery)
  ‚Üí Retrieving relevant context...
  ‚Üí Found context (preview): Question: What is the best way to travel to Aurora Bay?. Answer: Most visitors arrive via regional flights into Aurora Bay Airport or by ferry from ne...
  ‚Üí Generating response with LLM...
ü§ñ Chatbot: Most visitors arrive via regional flights into Aurora Bay Airport or by ferry from nearby coastal towns. Small cruise ships also make seasonal stops.

üë§ You: What is the population of Aurora Bay?
ü§ñ Thinking... (Running RAG in BigQuery)
  ‚Üí Retrieving relevant context...
  ‚Üí Found context (preview): Question: What is the population of Aurora Bay?. Answer: Aurora Bay has a population of approximately 3,200 residents, although it can fluctuate seaso...
  ‚Üí Generating response with LLM...
ü§ñ Chatbot: Aurora Bay has a population of ap

KeyboardInterrupt: Interrupted by user

Ask the chatbot a question about Aurora Bay: What is the population of Aurora Bay?