In [1]:
!pip install --upgrade --user google-cloud-aiplatform google-cloud-bigquery

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.118.0-py2.py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting google-cloud-bigquery
  Downloading google_cloud_bigquery-3.38.0-py3-none-any.whl.metadata (8.0 kB)
Collecting google-genai<2.0.0,>=1.37.0 (from google-cloud-aiplatform)
  Downloading google_genai-1.39.1-py3-none-any.whl.metadata (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.4/45.4 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Downloading google_cloud_aiplatform-1.118.0-py2.py3-none-any.whl (8.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading google_cloud_bigquery-3.38.0-py3-none-any.whl (259 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.3/259.3 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hD

In [None]:
# Get project ID
PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]
LOCATION = "us-central1" # @param {type:"string"}
print(PROJECT_ID)

qwiklabs-gcp-03-43ab15a95217


In [2]:
from google.cloud import aiplatform
aiplatform.init(project=PROJECT_ID, location=LOCATION)

print("Initialized")

Initialized


In [None]:
from google.cloud import bigquery
from google.api_core.exceptions import NotFound

# BigQuery dataset/table configuration.
DATASET_ID_REQUESTED = "genai-skills-workshop"
# BigQuery converts hyphens to underscores in dataset IDs.
DATASET_ID = DATASET_ID_REQUESTED.replace("-", "_")
TABLE_ID = "aurora_bay_faqs"
GCS_URI = "gs://labs.roitraining.com/aurora-bay-faqs/aurora-bay-faqs.csv"

# Reuse the same project inferred earlier in the notebook.
bq_client = bigquery.Client(project=PROJECT_ID)

# Build dataset reference anchored to the desired location.
dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET_ID}")
dataset_ref.location = LOCATION

# Create the dataset if it does not yet exist.
try:
  bq_client.get_dataset(dataset_ref)
  print(f"Dataset `{PROJECT_ID}.{DATASET_ID}` already exists.")
except NotFound:
  bq_client.create_dataset(dataset_ref)
  print(f"Created dataset `{PROJECT_ID}.{DATASET_ID}` in {LOCATION}.")


In [None]:
# Fully qualified table name we will load the CSV into.
table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

# Configure the ingestion job to use CSV autodetect and to overwrite any prior data.
load_job_config = bigquery.LoadJobConfig(
  source_format=bigquery.SourceFormat.CSV,
  skip_leading_rows=1,
  autodetect=True,
  write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
)

# Launch the load job from the public GCS bucket into BigQuery.
load_job = bq_client.load_table_from_uri(GCS_URI, table_id, job_config=load_job_config)
print(f"Starting load job {load_job.job_id}")

# Wait for the load job to complete and fetch metadata for confirmation.
load_job.result()
table = bq_client.get_table(table_id)
print(f"Loaded {table.num_rows} rows into {table.full_table_id}.")


In [None]:
from google.cloud import bigquery_connection_v1

# Connection configuration that allows BigQuery to call Vertex AI embeddings.
CONNECTION_LOCATION = "us"
CONNECTION_ID = "vertex_ai_text_embeddings"
connection_parent = f"projects/{PROJECT_ID}/locations/{CONNECTION_LOCATION}"
connection_name = f"{connection_parent}/connections/{CONNECTION_ID}"

connection_client = bigquery_connection_v1.ConnectionServiceClient()

# Reuse the connection when it already exists, otherwise create a new Vertex AI link.
try:
  connection = connection_client.get_connection(name=connection_name)
  print(f"Connection `{connection.name}` already exists.")
except NotFound:
  connection = bigquery_connection_v1.types.Connection(
      cloud_resource=bigquery_connection_v1.types.CloudResourceProperties(
          service=bigquery_connection_v1.types.CloudResourceProperties.CloudResourceService.VERTEX_AI
      )
  )
  connection = connection_client.create_connection(
      parent=connection_parent,
      connection_id=CONNECTION_ID,
      connection=connection,
  )
  print(f"Created connection `{connection.name}` for Vertex AI embeddings.")


In [None]:
# Define a remote BigQuery ML model that proxies requests to the Vertex AI text-embedding-005 endpoint.
remote_model_sql = f"""
CREATE OR REPLACE MODEL `{PROJECT_ID}.{DATASET_ID}.embedding_model`
REMOTE WITH CONNECTION `{PROJECT_ID}.{CONNECTION_LOCATION}.{CONNECTION_ID}`
OPTIONS (
  endpoint = 'text-embedding-005',
  location = '{LOCATION}'
)
"""

# Execute the DDL so the model can be used in subsequent ML.GENERATE_TEXT_EMBEDDING calls.
query_job = bq_client.query(remote_model_sql)
query_job.result()
print(f"Created or updated remote model `{PROJECT_ID}.{DATASET_ID}.embedding_model` targeting text-embedding-005.")


In [None]:
# Inspect the newly loaded table to understand available fields for downstream processing.
table = bq_client.get_table(table_id)
print("Schema for aurora_bay_faqs:")
for field in table.schema:
  print(f"  {field.name}: {field.field_type}")


In [None]:
# Materialize a table that stores concatenated Q/A text alongside its embedding vector.
EMBEDDING_TABLE_ID = "aurora_bay_faqs_with_embeddings"
embedding_table_ref = f"{PROJECT_ID}.{DATASET_ID}.{EMBEDDING_TABLE_ID}"

embedding_sql = f"""
CREATE OR REPLACE TABLE `{embedding_table_ref}` AS
SELECT
  question,
  answer,
  CONCAT(question, ': ', answer) AS qa_text,
  ML.GENERATE_TEXT_EMBEDDING(
    MODEL `{PROJECT_ID}.{DATASET_ID}.embedding_model`,
    STRUCT(CONCAT(question, ': ', answer) AS content)
  ) AS qa_embedding
FROM `{table_id}`
WHERE question IS NOT NULL AND answer IS NOT NULL;
"""

# Run the transformation query and wait for completion.
embedding_job = bq_client.query(embedding_sql)
embedding_job.result()
print(f"Created table `{embedding_table_ref}` with concatenated QA embeddings.")


In [None]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part

def answer_question_gemini(prompt):
  """Invoke Gemini with consistent generation settings to answer a prompt."""
  model = GenerativeModel("gemini-2.5-flash-lite")
  response = model.generate_content(
    prompt,
    generation_config={
        "max_output_tokens": 8192,
        "temperature": 0.5,
        "top_p": 0.5,
        "top_k": 10,
    },
  stream=False,
  )
  try:
    return response.text
  except:
    print("An Error Ocuured Cleaning the Data")
    return "An Error Ocuured Cleaning the Data"

In [None]:
def run_search(question):
  from google.cloud import bigquery

  client = bigquery.Client()

  # Perform a vector search over the FAQ embedding table, using the remote model
  # to embed the incoming natural language question on the fly.
  sql = f"""
      SELECT base.question, base.answer
      FROM VECTOR_SEARCH(
      TABLE `{embedding_table_ref}`, 'qa_embedding',
      (
      SELECT text_embedding, content AS query
      FROM ML.GENERATE_TEXT_EMBEDDING(MODEL `{PROJECT_ID}.{DATASET_ID}.embedding_model`,
          (SELECT @question AS content))),
      top_k => 5)
      """

  # Bind the user-entered question as a parameter to avoid SQL injection and reuse cached plans.
  job_config = bigquery.QueryJobConfig(
    query_parameters=[
        bigquery.ScalarQueryParameter("question", "STRING", question),
    ]
  )

  query_job = client.query(sql, job_config=job_config)

  # Format retrieved Q/A pairs as plain text paragraphs for downstream prompting.
  rows = []
  for row in query_job:
    rows.append(f"Q: {row.question}\nA: {row.answer}")

  return "\n\n".join(rows)

In [None]:
def build_prompt(data, question):
  """Wrap retrieved context in a simple instruction-following prompt."""
  prompt = """
    Instructions: Answer the question using the following Context.

    Context: {0}

    Question: {1}
  """.format(data, question)
  return prompt

In [None]:
from IPython.core.display import display, HTML

def answer_question(question):
  """Retrieve FAQ context, expose it inline, and ask Gemini for a final answer."""

  data = run_search(question)
  display("Retrieved Data:")
  display(data)
  display(" . . . ")
  prompt = build_prompt(data, question)
  answer_gemini = answer_question_gemini(prompt)

  return answer_gemini

In [9]:
QUESTION = "Tell me about the US Economy"

answer_gemini = answer_question(QUESTION)
display("User Question:")
display(QUESTION)
display("--------------------------------")
display("Gemini Answer:")
display(answer_gemini)

'Retrieved Data:'

'Most areas of the US saw their economy continue to expand in December and early January, the US Federal Reserve said in its latest Beige Book report.\n\nOf the 12 US regions it identifies for the study, 11 showed stronger economic growth, with only the Cleveland area falling behind with a "mixed" rating. Consumer spending was higher in December than November, and festive sales were also up on 2003. The employment picture also improved, the Fed said.\n\n"Labour markets firmed in a number of districts, but wage pressures generally remained modest," the Beige Book said. "Several districts reported higher prices for building materials and manufacturing inputs, but most reported steady or only slightly higher overall price levels." The report added that residential real estate activity remained strong and that commercial real estate activity strengthened in most districts. "Office leasing was especially brisk in Washington DC, and New York City, two of the nation\'s strongest commercial ma

' . . . '

'User Question:'

'Tell me about the US Economy'

'--------------------------------'

'Gemini Answer:'

'Based on the context provided, here is a summary of the US Economy:\n\n**Overall Growth and Consumer Spending:**\n*   The US economy continued to expand in December and early January, with 11 of the 12 regions identified by the Federal Reserve showing stronger economic growth.\n*   Consumer spending was higher in December than in November, and festive sales were up compared to 2003.\n*   Treasury Secretary John Snow stated the economy was "growing at such a fast rate that it is generating lots of disposable income."\n\n**Employment:**\n*   The employment picture improved, with labor markets firming up in a number of districts.\n*   October saw a significant and better-than-expected addition of 337,000 jobs, a seven-month high.\n*   January saw a smaller-than-expected gain of 146,000 jobs, but the unemployment rate fell to 5.2%, its lowest level in three years.\n*   Analysts described the job growth as "moderate but not a satisfying amount."\n\n**Trade and the Dollar:**\n*   The trade 