In [None]:
#Necessary Libraries to import
from google.cloud import bigquery
import pandas as pd
from vertexai import init
from google import genai
from google.genai import types
import base64
PROJECT_ID = "qwiklabs-gcp-02-8fc93094313c"
client = bigquery.Client(project=PROJECT_ID)
init(project=PROJECT_ID, location="global")

In [None]:
def create_embedding_model():
  query = """
    CREATE OR REPLACE MODEL `AuroraDS.Embeddings`
    REMOTE WITH CONNECTION `us.embedding_conn`
    OPTIONS (ENDPOINT = 'text-embedding-005');
  """
  result = client.query(query).result()
  print(f"Embedding model created {result}")

In [None]:
def load_faq_data():
  query = """
    LOAD DATA OVERWRITE AuroraDS.qas
    (
        question STRING,
        answer STRING
    )
    FROM FILES (
        format = 'CSV',
        uris = ['gs://labs.roitraining.com/alaska-dept-of-snow/alaska-dept-of-snow-faqs.csv']
    );
  """
  result = client.query(query).result()
  print(f"Data loaded {result}")

In [None]:
def create_embeddings():
  query = """
    CREATE OR REPLACE TABLE `AuroraDS.qas_embeddings` AS
    SELECT
        *
    FROM
        ML.GENERATE_TEXT_EMBEDDING(
            MODEL `AuroraDS.Embeddings`,
            (SELECT
                question,
                answer,
                CONCAT(question, ': ', answer) AS content
            FROM
                `AuroraDS.qas`
            )
        ) as e;
  """
  result = client.query(query).result()
  print(f"Embeddings created {result}")

In [None]:
def vector_search_data(user_query):
    return  f"""
    CREATE OR REPLACE TABLE `AuroraDS.qas` AS
    SELECT
        query.query,
        base.content
    FROM
        VECTOR_SEARCH(
            TABLE `AuroraDS.qas_embeddings`,
            'ml_generate_embedding_result',
            (
                SELECT
                    ml_generate_embedding_result,
                    content AS query
                FROM
                    ML.GENERATE_EMBEDDING(
                        MODEL `AuroraDS.Embeddings`,
                        (SELECT '{user_query}' AS content)
                    )
            ),
            top_k => 5,
            options => '{{"fraction_lists_to_search": 1.0}}'
        );
    """
    client.query(search_sql).result()


In [None]:
genai_client = genai.Client(
      vertexai=True,
      project=PROJECT_ID,
      location="global",
)

In [None]:
model = "gemini-2.5-pro-preview-06-05"

In [None]:
create_embedding_model()

Embedding model created <google.cloud.bigquery.table._EmptyRowIterator object at 0x7cf15d273190>


In [None]:
def generate(system_prompt, user_input):
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text=user_input)
      ]
    ),
  ]

  generate_content_config = types.GenerateContentConfig(
    temperature = 0.9,
    top_p = 1,
    max_output_tokens = 65535,
    system_instruction=[types.Part.from_text(text=system_prompt)],
    thinking_config=types.ThinkingConfig(
      thinking_budget=-1,
    ),
  )
  data = ''
  for chunk in genai_client.models.generate_content_stream(
    model = model,
    contents = contents,
    config = generate_content_config,
    ):
    data = data + " " +chunk.text
  return data.strip()

In [None]:
load_faq_data()

Data loaded <google.cloud.bigquery.table._EmptyRowIterator object at 0x7cf162e9bbd0>


In [None]:
create_embeddings()


Embeddings created <google.cloud.bigquery.table._EmptyRowIterator object at 0x7cf15d381310>


In [None]:
def gemini_self_check(user_query, response_text):
    prompt = f"""
    You are a content safety reviewer for a government chatbot.

    Evaluate the following assistant response:
    - Is it relevant to the user's question?
    - Is it fact-based and appropriate?
    - Is it free from harmful or misleading language?

    Respond ONLY with one word:
    - VALID → if the answer is acceptable
    - BLOCK → if it should not be shown to users

    Question: {user_query}
    Response: {response_text}
        """
    return generate(prompt, response_text)

In [None]:
def do_prompt_filtering(input):
  prompt = f"""You are a content safety reviewer for a public-facing government chatbot for the Alaska Department of Snow (ADS).
  Your job is to evaluate if a user’s question is appropriate and relevant to ADS services. Allow only safe, relevant questions

  Flag and reject questions that:
  - Include harmful, offensive, or political content
  - Ask for private or personal information
  - Are not related to ADS services

  Evaluate the following question. Respond ONLY with one of:

  - "ALLOW" (if the question is appropriate)
  - "BLOCK" (if the question should be rejected)

  Question: "{input}"
  """
  return generate(prompt, input)

In [None]:
def ads_chatbot(user_input):
  filter = do_prompt_filtering(user_input).strip().upper()
  if filter == "BLOCK":
    return 'Invalid prompt'
  vector_search_data(user_input)
  result_df = client.query("SELECT * FROM `AuroraDS.qas`").to_dataframe()

  system_prompt = """You are an assistant for Aurora Department of snow. Answer clearly and professionally using only the provided context. If the answer isn’t supported by the context, respond with: “Not able to answer your query. And always great him with hello or hi, and also all the time just say i am getting information, and at last reply like 'anything else i can help you with?'”'\n\n"""
  for idx, row in result_df.iterrows():
      system_prompt += f"FAQ {idx+1}:\nQ: {row['question']}\nA: {row['answer']}\n\n"

  response = generate(system_prompt, user_input)
  if gemini_self_check(user_input, response) == 'BLOCK':
    return 'Response is invalid'
  return response


In [None]:
  print(ads_chatbot("where is bay located"))

Invalid prompt


In [None]:
ads_chatbot('How many people does ADS serve?')

'Hi, i am getting information.\n\nADS serves approximately 750,000 people across Alaska’s widely distributed communities and remote areas.\n\nanything else i can  help you with?'

In [None]:
import pytest

In [None]:
def test_do_prompt_filtering():
    input = "where is bay located"
    response = do_prompt_filtering(input)
    assert response == "BLOCK"

def test_do_prompt_filtering_postive_flow():
    input = "How many people does ADS serve?"
    response = do_prompt_filtering(input)
    assert response == "ALLOW"

def test_gemini_self_check():
    response = "ADS serves approximately 750,000 people across Alaska’s widely distributed communities and remote areas."
    category = gemini_self_check("How many people does ADS serve?",response)
    assert  category == "ALLOW"

In [None]:
test_do_prompt_filtering()
test_do_prompt_filtering_postive_flow()
test_gemini_self_check()

AssertionError: 