In [None]:
!pip install --upgrade google-cloud-bigquery pandas

In [49]:
#Necessary Libraries to import
from google.cloud import bigquery
import pandas as pd
from google import genai
from google.genai import types
import base64

In [31]:
project_id = "qwiklabs-gcp-00-171b5867e51b"
client = bigquery.Client(project=project_id)

In [61]:
def create_embedding_model():
  query = """
    CREATE OR REPLACE MODEL `AuroraFAQ.Embeddings`
    REMOTE WITH CONNECTION `us.aurora-bay-connection`
    OPTIONS (ENDPOINT = 'text-embedding-005');
  """
  result = client.query(query)
  print(f"Embedding model created {result}")

In [62]:
def load_faq_data():
  query = """
    LOAD DATA OVERWRITE AuroraFAQ.faqs
    (
        question STRING,
        answer STRING
    )
    FROM FILES (
        format = 'CSV',
        uris = ['gs://labs.roitraining.com/aurora-bay-faqs/aurora-bay-faqs.csv']
    );
  """
  result = client.query(query)
  print(f"Data loaded {result}")

In [63]:
def create_embeddings():
  query = """
    CREATE OR REPLACE TABLE `AuroraFAQ.faqs_embeddings` AS
    SELECT
        *
    FROM
        ML.GENERATE_TEXT_EMBEDDING(
            MODEL `AuroraFAQ.Embeddings`,
            (SELECT
                question,
                answer,
                CONCAT(question, ': ', answer) AS content
            FROM
                `AuroraFAQ.faqs`
            )
        ) as e;
  """
  result = client.query(query)
  print(f"Embeddings created {result}")

In [40]:
#Vector search query to find the user query against embedding table
def vector_search_query(user_query, top_k):
  return f"""
  SELECT query.query, base.content, base.question, base.answer
  FROM VECTOR_SEARCH(
    TABLE `AuroraFAQ.faqs_embeddings`,
    'text_embedding',
    (
      SELECT ml_generate_embedding_result, content AS query
      FROM ML.GENERATE_EMBEDDING(
        MODEL `AuroraFAQ.Embeddings`,
        (SELECT '{user_query}' AS content)
      )
    ),
    top_k => {top_k},
    options => '{{"fraction_lists_to_search": 0.01}}'
  ) AS search_result
  """

In [32]:
#Gen AI Client
genai_client = genai.Client(
      vertexai=True,
      project="qwiklabs-gcp-00-171b5867e51b",
      location="global",
)

In [33]:
#Gen AI Model
model = "gemini-2.5-pro-preview-06-05"

In [34]:
#Gen AI Content generation function
def generate(system_prompt, user_input):
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text=user_input)
      ]
    ),
  ]

  generate_content_config = types.GenerateContentConfig(
    temperature = 0.7, # Control the content creativity
    top_p = 1, #control the probability of token selection
    max_output_tokens = 65535,
    system_instruction=[types.Part.from_text(text=system_prompt)],
    thinking_config=types.ThinkingConfig(
      thinking_budget=-1,
    ),
  )

  for chunk in genai_client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    print(chunk.text, end="")

In [50]:
def faq_chatbot(user_input):
  query = vector_search_query(user_input, 5) #generating vector search query
  query_job = client.query(query) # executing vector search query
  df = query_job.to_dataframe() # converting search result to dataframe

  if df.empty:
      # when no matching result found in DB
      print("I can't help with this.")
  else:
    system_prompt = """You are an expert FAQ assistant. Based on the following FAQs, provide the most relevant and human-like answer to the user's question.
If none of the answers are relevant, respond with 'I can't help with this.'\n\n"""
    for idx, row in df.iterrows():
        system_prompt += f"FAQ {idx+1}:\nQ: {row['question']}\nA: {row['answer']}\n\n"

    # Sending the searched result to Gemini to get proper humanlike response
    generate(system_prompt, user_input)


In [69]:
#Creating Embedding Model
create_embedding_model()

Embedding model created QueryJob<project=qwiklabs-gcp-00-171b5867e51b, location=US, id=826d454b-7120-46a3-b627-f55e47bdf806>


In [65]:
#Loading the FAQ data from Google Storage to Table
load_faq_data()

Data loaded QueryJob<project=qwiklabs-gcp-00-171b5867e51b, location=US, id=f62b5b7a-5866-4748-af91-56b61911bba7>


In [66]:
#Creating embeddings for the faq table's question & answer
create_embeddings()

Embeddings created QueryJob<project=qwiklabs-gcp-00-171b5867e51b, location=US, id=2709d85f-9cf7-4d49-8cb7-2bab312e31ac>


In [67]:
#Example 1:
faq_chatbot("aurora bay population?")

Aurora Bay has a population of approximately 3,200 residents, although it can fluctuate seasonally due to temporary fishing and tourism workforces.

In [68]:
#Example 2:
faq_chatbot("who is U.S president?")

I can't help with this.