In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

In [2]:
PROJECT_ID = os.getenv("PROJECT_ID")
PROJECT_NUMBER = os.getenv("PROJECT_NUMBER")
BUCKET_NAME = os.getenv("BUCKET_NAME")
DATASET_ID = os.getenv("DATASET_ID")
CONNECTION_ID = os.getenv("CONNECTION_ID")
SPEECH_MODEL = os.getenv("SPEECH_MODEL")
GENERATIVE_AI_MODEL = os.getenv("GENERATIVE_AI_MODEL")
DATASET_LOCATION = os.getenv("DATASET_LOCATION")
AUDIO_OBJECT_TABLE_ID = os.getenv("AUDIO_OBJECT_TABLE_ID")
MAIN_ACCOUNT_ID = os.getenv("MAIN_ACCOUNT_ID")
SERVICE_ACCOUNT_KEY_ID = os.getenv("SERVICE_ACCOUNT_KEY_ID")
SERVICE_ACCOUNT_KEY_FILE_PATH = os.getenv("SERVICE_ACCOUNT_KEY_FILE_PATH")

# Quick check
print("Bucket:", BUCKET_NAME)
print("Service Account Key File Path:", SERVICE_ACCOUNT_KEY_FILE_PATH)

Bucket: speak-aura-ai-audio
Service Account Key File Path: ../credentials/bhack-471114-2b12cc8d7377.json


In [None]:
# ============================================
# Notebook: BigQuery & Vertex AI Service Setup
# ============================================

# Cell 1: Authenticate to Google Cloud (manual login)
# Only needed if you are running this locally, not in Colab Enterprise
!gcloud auth login

In [None]:
!gcloud config set account {MAIN_ACCOUNT_ID}

In [None]:
# Step 1: Set your project ID
!gcloud config set project {PROJECT_ID}

In [None]:
# Cell 3: Activate service account using key file
# Make sure you have the JSON key at the specified path
!gcloud auth activate-service-account --key-file=$SERVICE_ACCOUNT_KEY_FILE_PATH

In [None]:

# Step 2: Create a BigQuery Cloud resource connection
# This allows BigQuery to talk to Vertex AI
!bq mk --connection --location=us \
    --connection_type=CLOUD_RESOURCE {CONNECTION_ID}

In [4]:
# Step 3: Get the Service Account for the connection
SERVICE_ACCT = !bq show --format=prettyjson --connection us.{CONNECTION_ID} | grep "serviceAccountId" | cut -d '"' -f 4
SERVICE_ACCT_EMAIL = SERVICE_ACCT[-1]
print("Service Account for connection:", SERVICE_ACCT_EMAIL)

Service Account for connection: bqcx-962614496383-lb5t@gcp-sa-bigquery-condel.iam.gserviceaccount.com


In [None]:
# Grant IAM permission to the service account on your GCS bucket
# This allows BigQuery/Vertex AI to access audio files
!gsutil iam ch serviceAccount:{SERVICE_ACCT_EMAIL}:roles/storage.objectViewer gs://speak-aura-ai-audio

In [None]:
# Step 4: Grant the necessary IAM roles for Vertex AI access

!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role='roles/bigquery.connectionUser'

In [8]:
!gcloud services enable documentai.googleapis.com


Operation "operations/acat.p2-962614496383-1b0e7614-834b-41c5-bad7-20076319a41a" finished successfully.


In [None]:
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role='roles/documentai.viewer'

In [None]:
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role="roles/documentai.admin"

In [None]:
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role='roles/storage.objectViewer'

In [None]:
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role='roles/speech.editor'

In [None]:
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:{SERVICE_ACCT_EMAIL}" \
    --role='roles/aiplatform.user'

In [None]:
# Step 5: Wait for IAM propagation
import time
print("Waiting 60 seconds for IAM propagation...")
time.sleep(60)
print("Setup complete. You can now create remote models pointing to Vertex AI.")

In [None]:
# Cell 6: Optional: verify permissions and account
!gcloud auth list

In [None]:
!gcloud projects get-iam-policy {PROJECT_ID}

## Cleaning Up
### To clean up all Google Cloud resources used in this project

In [None]:
# # Delete the BigQuery tables
# ! bq rm --table -f {PROJECT_ID}:{DATASET_ID}.{AUDIO_OBJECT_TABLE_ID}

# # Delete the remote model
# ! bq rm --model -f {PROJECT_ID}:{DATASET_ID}.{SPEECH_MODEL}

# # Delete the remote connection
# ! bq rm --connection --project_id={PROJECT_ID} --location=us {CONNECTION_ID}

# # Delete the BigQuery dataset
# ! bq rm -r -f {PROJECT_ID}:{DATASET_ID}