In [1]:
!pip install --upgrade --force-reinstall google-cloud-bigquery google-cloud-bigquery-storage google-cloud-aiplatform pyarrow pandas google-cloud-aiplatform langchain-google-genai langchain-google-vertexai langchain-google-community langchain-core --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires google-auth==2.38.0, but you have google-auth 2.43.0 which is incompatible.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.
google-adk 1.17.0 requires google-cloud-storage<3.0.0,>=2.18.0, but you have google-cloud-storage 3.6.0 which is incompatible.
google-adk 1.17.0 requires tenacity<9.0.0,>=8.0.0, but you have tenacity 9.1.2 which is incompatible.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you have langchain-core 1.1.0 which is incompatible.
langchain 0.3.27 requires langchain-text-splitters<1.0.0,>=0.3.9, but you have langchain-text-splitters 1.0.0 which is incompatible.
pylibcudf-cu12 25.6.0 requires pyarrow<20.0.0a0,>=

Imports

In [2]:
from google.cloud import bigquery
from google.cloud import aiplatform
from vertexai.preview.language_models import TextEmbeddingModel
import pandas as pd
import numpy as np
import json

Create table from csv

In [3]:

PROJECT_ID = "qwiklabs-gcp-02-9a09090caa33"
DATASET = "aurora_bay"
TABLE = "faqs"

bq = bigquery.Client(project=PROJECT_ID)

# Create dataset if not exists
dataset_ref = bigquery.Dataset(f"{PROJECT_ID}.{DATASET}")
dataset_ref.location = "US"
bq.create_dataset(dataset_ref, exists_ok=True)

df = pd.read_csv("aurora-bay-faqs.csv")

# Upload to BigQuery
bq.load_table_from_dataframe(df, f"{PROJECT_ID}.{DATASET}.{TABLE}").result()

print("Loaded", len(df), "rows into BigQuery table.")

Loaded 50 rows into BigQuery table.


Create BQ connection

In [4]:
!bq mk --connection --location=US --connection_type=CLOUD_RESOURCE embedding_conn

BigQuery error in mk operation: Already Exists: Connection
projects/666996842470/locations/us/connections/embedding_conn


Create embeddings model

In [5]:
embedding_model_sql = f"""
CREATE OR REPLACE MODEL `{DATASET}.embeddings`
REMOTE WITH CONNECTION `us.embedding_conn`
OPTIONS (ENDPOINT = 'text-embedding-005');
"""

job = bq.query(embedding_model_sql)
job.result()
print("Remote embedding model created.")

Remote embedding model created.


Create embeddings table

In [6]:
generate_embeddings_sql = f"""
CREATE OR REPLACE TABLE
`{DATASET}.faqs_with_embedding` AS
SELECT *
FROM ML.GENERATE_EMBEDDING(
 MODEL `{DATASET}.embeddings`,
 (SELECT CONCAT(question, ' ', answer) AS content, question, answer FROM
`{DATASET}.faqs`));
"""
job = bq.query(generate_embeddings_sql)
job.result()
print("Embedding table created successfully.")

Embedding table created successfully.


Using langchain create an embedding class and a vector store

In [14]:
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_google_community import BigQueryVectorStore

embedding = VertexAIEmbeddings(
    model_name="text-embedding-005", project=PROJECT_ID
)

store = BigQueryVectorStore(
    project_id=PROJECT_ID,
    dataset_name=DATASET,
    table_name="faqs_with_embedding",
    location="US",
    embedding=embedding,
    embedding_field="ml_generate_embedding_result",
)

INFO:langchain_google_community.bq_storage_vectorstores._base:BigQuery table qwiklabs-gcp-02-9a09090caa33.aurora_bay.faqs_with_embedding initialized/validated as persistent storage. Access via BigQuery console:
 https://console.cloud.google.com/bigquery?project=qwiklabs-gcp-02-9a09090caa33&ws=!1m5!1m4!4m3!1sqwiklabs-gcp-02-9a09090caa33!2saurora_bay!3sfaqs_with_embedding


Create a chain

In [15]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_vertexai import VertexAI

# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
prompt = hub.pull("langchain-ai/retrieval-qa-chat")
llm = VertexAI(model_name="gemini-2.0-flash")

qa_chain = (
    {
        "context": store.as_retriever(),
        "input": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [17]:
qa_chain.invoke("what is the capitol of Alaska?")

'Based on the context provided, the answer to your question is not available.\n'

Chat loop

In [19]:
while True:
  user_input = input("You: ")
  if user_input.lower().strip() in ['exit', 'quit']:
        print("Chat session ended")
        break
  print("Bot:")
  print(qa_chain.invoke(user_input))

You: Where can I go to watch whales
Bot:
The Aurora Bay Harbor area offers whale watching tours, especially during spring and summer when humpbacks and orcas frequent the region.
You: Is there public transity
Bot:
Yes. Aurora Bay operates a limited bus service on weekdays from 6 AM to 8 PM, servicing main routes including downtown, the airport, and residential neighborhoods.
You: exit
Chat session ended
